Merge pull request #12070 from rmuir/analysis_rest_tests
add integration tests for analysis plugins
This commit is contained in:
commit
69cd933ea3
|
@ -17,6 +17,8 @@
|
||||||
<properties>
|
<properties>
|
||||||
<tests.jvms>1</tests.jvms>
|
<tests.jvms>1</tests.jvms>
|
||||||
<es.logger.level>INFO</es.logger.level>
|
<es.logger.level>INFO</es.logger.level>
|
||||||
|
<tests.rest.suite>analysis_icu</tests.rest.suite>
|
||||||
|
<tests.rest.load_packaged>false</tests.rest.load_packaged>
|
||||||
</properties>
|
</properties>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
|
|
@ -0,0 +1,37 @@
|
||||||
|
# Integration tests for ICU analysis components
|
||||||
|
#
|
||||||
|
"Tokenizer":
|
||||||
|
- do:
|
||||||
|
indices.analyze:
|
||||||
|
text: Foo Bar
|
||||||
|
tokenizer: icu_tokenizer
|
||||||
|
- length: { tokens: 2 }
|
||||||
|
- match: { tokens.0.token: Foo }
|
||||||
|
- match: { tokens.1.token: Bar }
|
||||||
|
---
|
||||||
|
"Normalization filter":
|
||||||
|
- do:
|
||||||
|
indices.analyze:
|
||||||
|
filters: icu_normalizer
|
||||||
|
text: Foo Bar Ruß
|
||||||
|
tokenizer: keyword
|
||||||
|
- length: { tokens: 1 }
|
||||||
|
- match: { tokens.0.token: foo bar russ }
|
||||||
|
---
|
||||||
|
"Normalization charfilter":
|
||||||
|
- do:
|
||||||
|
indices.analyze:
|
||||||
|
char_filters: icu_normalizer
|
||||||
|
text: Foo Bar Ruß
|
||||||
|
tokenizer: keyword
|
||||||
|
- length: { tokens: 1 }
|
||||||
|
- match: { tokens.0.token: foo bar russ }
|
||||||
|
---
|
||||||
|
"Folding filter":
|
||||||
|
- do:
|
||||||
|
indices.analyze:
|
||||||
|
filters: icu_folding
|
||||||
|
text: Foo Bar résumé
|
||||||
|
tokenizer: keyword
|
||||||
|
- length: { tokens: 1 }
|
||||||
|
- match: { tokens.0.token: foo bar resume }
|
|
@ -22,20 +22,26 @@ package org.elasticsearch.indices.analysis;
|
||||||
import com.ibm.icu.text.Collator;
|
import com.ibm.icu.text.Collator;
|
||||||
import com.ibm.icu.text.Normalizer2;
|
import com.ibm.icu.text.Normalizer2;
|
||||||
import com.ibm.icu.text.Transliterator;
|
import com.ibm.icu.text.Transliterator;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.icu.ICUFoldingFilter;
|
import org.apache.lucene.analysis.icu.ICUFoldingFilter;
|
||||||
|
import org.apache.lucene.analysis.icu.ICUNormalizer2CharFilter;
|
||||||
import org.apache.lucene.analysis.icu.ICUTransformFilter;
|
import org.apache.lucene.analysis.icu.ICUTransformFilter;
|
||||||
import org.apache.lucene.analysis.icu.segmentation.ICUTokenizer;
|
import org.apache.lucene.analysis.icu.segmentation.ICUTokenizer;
|
||||||
import org.elasticsearch.common.component.AbstractComponent;
|
import org.elasticsearch.common.component.AbstractComponent;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.index.analysis.ICUCollationKeyFilter;
|
import org.elasticsearch.index.analysis.ICUCollationKeyFilter;
|
||||||
|
import org.elasticsearch.index.analysis.PreBuiltCharFilterFactoryFactory;
|
||||||
import org.elasticsearch.index.analysis.PreBuiltTokenFilterFactoryFactory;
|
import org.elasticsearch.index.analysis.PreBuiltTokenFilterFactoryFactory;
|
||||||
import org.elasticsearch.index.analysis.PreBuiltTokenizerFactoryFactory;
|
import org.elasticsearch.index.analysis.PreBuiltTokenizerFactoryFactory;
|
||||||
|
import org.elasticsearch.index.analysis.CharFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.TokenFilterFactory;
|
import org.elasticsearch.index.analysis.TokenFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.TokenizerFactory;
|
import org.elasticsearch.index.analysis.TokenizerFactory;
|
||||||
|
|
||||||
|
import java.io.Reader;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Registers indices level analysis components so, if not explicitly configured, will be shared
|
* Registers indices level analysis components so, if not explicitly configured, will be shared
|
||||||
* among all indices.
|
* among all indices.
|
||||||
|
@ -106,5 +112,17 @@ public class IcuIndicesAnalysis extends AbstractComponent {
|
||||||
return new ICUTransformFilter(tokenStream, Transliterator.getInstance("Null", Transliterator.FORWARD));
|
return new ICUTransformFilter(tokenStream, Transliterator.getInstance("Null", Transliterator.FORWARD));
|
||||||
}
|
}
|
||||||
}));
|
}));
|
||||||
|
|
||||||
|
indicesAnalysisService.charFilterFactories().put("icu_normalizer", new PreBuiltCharFilterFactoryFactory(new CharFilterFactory() {
|
||||||
|
@Override
|
||||||
|
public String name() {
|
||||||
|
return "icu_normalizer";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Reader create(Reader reader) {
|
||||||
|
return new ICUNormalizer2CharFilter(reader);
|
||||||
|
}
|
||||||
|
}));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,41 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elasticsearch under one or more contributor
|
||||||
|
* license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright
|
||||||
|
* ownership. Elasticsearch licenses this file to you under
|
||||||
|
* the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
* not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
|
import com.carrotsearch.randomizedtesting.annotations.Name;
|
||||||
|
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
|
||||||
|
import org.elasticsearch.test.rest.ElasticsearchRestTestCase;
|
||||||
|
import org.elasticsearch.test.rest.RestTestCandidate;
|
||||||
|
import org.elasticsearch.test.rest.parser.RestTestParseException;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
public class AnalysisICURestIT extends ElasticsearchRestTestCase {
|
||||||
|
|
||||||
|
public AnalysisICURestIT(@Name("yaml") RestTestCandidate testCandidate) {
|
||||||
|
super(testCandidate);
|
||||||
|
}
|
||||||
|
|
||||||
|
@ParametersFactory
|
||||||
|
public static Iterable<Object[]> parameters() throws IOException, RestTestParseException {
|
||||||
|
return ElasticsearchRestTestCase.createParameters(0, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -16,7 +16,8 @@
|
||||||
<description>The Japanese (kuromoji) Analysis plugin integrates Lucene kuromoji analysis module into elasticsearch.</description>
|
<description>The Japanese (kuromoji) Analysis plugin integrates Lucene kuromoji analysis module into elasticsearch.</description>
|
||||||
|
|
||||||
<properties>
|
<properties>
|
||||||
<!-- You can add any specific project property here -->
|
<tests.rest.suite>analysis_kuromoji</tests.rest.suite>
|
||||||
|
<tests.rest.load_packaged>false</tests.rest.load_packaged>
|
||||||
</properties>
|
</properties>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
|
|
@ -0,0 +1,39 @@
|
||||||
|
# Integration tests for Kuromoji analysis components
|
||||||
|
#
|
||||||
|
"Tokenizer":
|
||||||
|
- do:
|
||||||
|
indices.analyze:
|
||||||
|
text: 関西国際空港
|
||||||
|
tokenizer: kuromoji_tokenizer
|
||||||
|
- length: { tokens: 4 }
|
||||||
|
- match: { tokens.0.token: 関西 }
|
||||||
|
- match: { tokens.1.token: 関西国際空港 }
|
||||||
|
- match: { tokens.2.token: 国際 }
|
||||||
|
- match: { tokens.3.token: 空港 }
|
||||||
|
---
|
||||||
|
"Baseform filter":
|
||||||
|
- do:
|
||||||
|
indices.analyze:
|
||||||
|
text: 飲み
|
||||||
|
tokenizer: kuromoji_tokenizer
|
||||||
|
filters: kuromoji_baseform
|
||||||
|
- length: { tokens: 1 }
|
||||||
|
- match: { tokens.0.token: 飲む }
|
||||||
|
---
|
||||||
|
"Reading filter":
|
||||||
|
- do:
|
||||||
|
indices.analyze:
|
||||||
|
text: 寿司
|
||||||
|
tokenizer: kuromoji_tokenizer
|
||||||
|
filters: kuromoji_readingform
|
||||||
|
- length: { tokens: 1 }
|
||||||
|
- match: { tokens.0.token: sushi }
|
||||||
|
---
|
||||||
|
"Stemming filter":
|
||||||
|
- do:
|
||||||
|
indices.analyze:
|
||||||
|
text: サーバー
|
||||||
|
tokenizer: kuromoji_tokenizer
|
||||||
|
filters: kuromoji_stemmer
|
||||||
|
- length: { tokens: 1 }
|
||||||
|
- match: { tokens.0.token: サーバ }
|
|
@ -0,0 +1,41 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elasticsearch under one or more contributor
|
||||||
|
* license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright
|
||||||
|
* ownership. Elasticsearch licenses this file to you under
|
||||||
|
* the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
* not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
|
import com.carrotsearch.randomizedtesting.annotations.Name;
|
||||||
|
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
|
||||||
|
import org.elasticsearch.test.rest.ElasticsearchRestTestCase;
|
||||||
|
import org.elasticsearch.test.rest.RestTestCandidate;
|
||||||
|
import org.elasticsearch.test.rest.parser.RestTestParseException;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
public class AnalysisKuromojiRestIT extends ElasticsearchRestTestCase {
|
||||||
|
|
||||||
|
public AnalysisKuromojiRestIT(@Name("yaml") RestTestCandidate testCandidate) {
|
||||||
|
super(testCandidate);
|
||||||
|
}
|
||||||
|
|
||||||
|
@ParametersFactory
|
||||||
|
public static Iterable<Object[]> parameters() throws IOException, RestTestParseException {
|
||||||
|
return ElasticsearchRestTestCase.createParameters(0, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -15,7 +15,8 @@
|
||||||
<description>Smart Chinese Analysis plugin integrates Lucene Smart Chinese analysis module into elasticsearch.</description>
|
<description>Smart Chinese Analysis plugin integrates Lucene Smart Chinese analysis module into elasticsearch.</description>
|
||||||
|
|
||||||
<properties>
|
<properties>
|
||||||
<!-- You can add any specific project property here -->
|
<tests.rest.suite>analysis_smartcn</tests.rest.suite>
|
||||||
|
<tests.rest.load_packaged>false</tests.rest.load_packaged>
|
||||||
</properties>
|
</properties>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
|
|
@ -0,0 +1,28 @@
|
||||||
|
# Integration tests for Smart Chinese analysis components
|
||||||
|
#
|
||||||
|
"Tokenizer":
|
||||||
|
- do:
|
||||||
|
indices.analyze:
|
||||||
|
text: 我购买了道具和服装。
|
||||||
|
tokenizer: smartcn_tokenizer
|
||||||
|
- length: { tokens: 7 }
|
||||||
|
- match: { tokens.0.token: 我 }
|
||||||
|
- match: { tokens.1.token: 购买 }
|
||||||
|
- match: { tokens.2.token: 了 }
|
||||||
|
- match: { tokens.3.token: 道具 }
|
||||||
|
- match: { tokens.4.token: 和 }
|
||||||
|
- match: { tokens.5.token: 服装 }
|
||||||
|
- match: { tokens.6.token: "," }
|
||||||
|
---
|
||||||
|
"Analyzer":
|
||||||
|
- do:
|
||||||
|
indices.analyze:
|
||||||
|
text: 我购买了道具和服装。
|
||||||
|
analyzer: smartcn
|
||||||
|
- length: { tokens: 6 }
|
||||||
|
- match: { tokens.0.token: 我 }
|
||||||
|
- match: { tokens.1.token: 购买 }
|
||||||
|
- match: { tokens.2.token: 了 }
|
||||||
|
- match: { tokens.3.token: 道具 }
|
||||||
|
- match: { tokens.4.token: 和 }
|
||||||
|
- match: { tokens.5.token: 服装 }
|
|
@ -0,0 +1,41 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elasticsearch under one or more contributor
|
||||||
|
* license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright
|
||||||
|
* ownership. Elasticsearch licenses this file to you under
|
||||||
|
* the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
* not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
|
import com.carrotsearch.randomizedtesting.annotations.Name;
|
||||||
|
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
|
||||||
|
import org.elasticsearch.test.rest.ElasticsearchRestTestCase;
|
||||||
|
import org.elasticsearch.test.rest.RestTestCandidate;
|
||||||
|
import org.elasticsearch.test.rest.parser.RestTestParseException;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
public class AnalysisSmartChineseRestIT extends ElasticsearchRestTestCase {
|
||||||
|
|
||||||
|
public AnalysisSmartChineseRestIT(@Name("yaml") RestTestCandidate testCandidate) {
|
||||||
|
super(testCandidate);
|
||||||
|
}
|
||||||
|
|
||||||
|
@ParametersFactory
|
||||||
|
public static Iterable<Object[]> parameters() throws IOException, RestTestParseException {
|
||||||
|
return ElasticsearchRestTestCase.createParameters(0, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -15,7 +15,8 @@
|
||||||
<description>The Stempel (Polish) Analysis plugin integrates Lucene stempel (polish) analysis module into elasticsearch.</description>
|
<description>The Stempel (Polish) Analysis plugin integrates Lucene stempel (polish) analysis module into elasticsearch.</description>
|
||||||
|
|
||||||
<properties>
|
<properties>
|
||||||
<!-- You can add any specific project property here -->
|
<tests.rest.suite>analysis_stempel</tests.rest.suite>
|
||||||
|
<tests.rest.load_packaged>false</tests.rest.load_packaged>
|
||||||
</properties>
|
</properties>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
|
|
@ -0,0 +1,18 @@
|
||||||
|
# Integration tests for Polish analysis components
|
||||||
|
#
|
||||||
|
"Stemmer":
|
||||||
|
- do:
|
||||||
|
indices.analyze:
|
||||||
|
text: studenci
|
||||||
|
tokenizer: keyword
|
||||||
|
filters: polish_stem
|
||||||
|
- length: { tokens: 1 }
|
||||||
|
- match: { tokens.0.token: student }
|
||||||
|
---
|
||||||
|
"Analyzer":
|
||||||
|
- do:
|
||||||
|
indices.analyze:
|
||||||
|
text: studenta był
|
||||||
|
analyzer: polish
|
||||||
|
- length: { tokens: 1 }
|
||||||
|
- match: { tokens.0.token: student }
|
|
@ -0,0 +1,41 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elasticsearch under one or more contributor
|
||||||
|
* license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright
|
||||||
|
* ownership. Elasticsearch licenses this file to you under
|
||||||
|
* the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
* not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
|
import com.carrotsearch.randomizedtesting.annotations.Name;
|
||||||
|
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
|
||||||
|
import org.elasticsearch.test.rest.ElasticsearchRestTestCase;
|
||||||
|
import org.elasticsearch.test.rest.RestTestCandidate;
|
||||||
|
import org.elasticsearch.test.rest.parser.RestTestParseException;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
public class AnalysisPolishRestIT extends ElasticsearchRestTestCase {
|
||||||
|
|
||||||
|
public AnalysisPolishRestIT(@Name("yaml") RestTestCandidate testCandidate) {
|
||||||
|
super(testCandidate);
|
||||||
|
}
|
||||||
|
|
||||||
|
@ParametersFactory
|
||||||
|
public static Iterable<Object[]> parameters() throws IOException, RestTestParseException {
|
||||||
|
return ElasticsearchRestTestCase.createParameters(0, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -277,6 +277,7 @@
|
||||||
<include>api/cluster.health.json</include>
|
<include>api/cluster.health.json</include>
|
||||||
<!-- used in plugin REST tests -->
|
<!-- used in plugin REST tests -->
|
||||||
<include>api/index.json</include>
|
<include>api/index.json</include>
|
||||||
|
<include>api/indices.analyze.json</include>
|
||||||
<include>api/indices.refresh.json</include>
|
<include>api/indices.refresh.json</include>
|
||||||
<include>api/count.json</include>
|
<include>api/count.json</include>
|
||||||
</includes>
|
</includes>
|
||||||
|
|
Loading…
Reference in New Issue