Merge pull request #12070 from rmuir/analysis_rest_tests
add integration tests for analysis plugins
This commit is contained in:
commit
69cd933ea3
|
@ -17,6 +17,8 @@
|
|||
<properties>
|
||||
<tests.jvms>1</tests.jvms>
|
||||
<es.logger.level>INFO</es.logger.level>
|
||||
<tests.rest.suite>analysis_icu</tests.rest.suite>
|
||||
<tests.rest.load_packaged>false</tests.rest.load_packaged>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
|
|
|
@ -0,0 +1,37 @@
|
|||
# Integration tests for ICU analysis components
|
||||
#
|
||||
"Tokenizer":
|
||||
- do:
|
||||
indices.analyze:
|
||||
text: Foo Bar
|
||||
tokenizer: icu_tokenizer
|
||||
- length: { tokens: 2 }
|
||||
- match: { tokens.0.token: Foo }
|
||||
- match: { tokens.1.token: Bar }
|
||||
---
|
||||
"Normalization filter":
|
||||
- do:
|
||||
indices.analyze:
|
||||
filters: icu_normalizer
|
||||
text: Foo Bar Ruß
|
||||
tokenizer: keyword
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: foo bar russ }
|
||||
---
|
||||
"Normalization charfilter":
|
||||
- do:
|
||||
indices.analyze:
|
||||
char_filters: icu_normalizer
|
||||
text: Foo Bar Ruß
|
||||
tokenizer: keyword
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: foo bar russ }
|
||||
---
|
||||
"Folding filter":
|
||||
- do:
|
||||
indices.analyze:
|
||||
filters: icu_folding
|
||||
text: Foo Bar résumé
|
||||
tokenizer: keyword
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: foo bar resume }
|
|
@ -22,20 +22,26 @@ package org.elasticsearch.indices.analysis;
|
|||
import com.ibm.icu.text.Collator;
|
||||
import com.ibm.icu.text.Normalizer2;
|
||||
import com.ibm.icu.text.Transliterator;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.icu.ICUFoldingFilter;
|
||||
import org.apache.lucene.analysis.icu.ICUNormalizer2CharFilter;
|
||||
import org.apache.lucene.analysis.icu.ICUTransformFilter;
|
||||
import org.apache.lucene.analysis.icu.segmentation.ICUTokenizer;
|
||||
import org.elasticsearch.common.component.AbstractComponent;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.analysis.ICUCollationKeyFilter;
|
||||
import org.elasticsearch.index.analysis.PreBuiltCharFilterFactoryFactory;
|
||||
import org.elasticsearch.index.analysis.PreBuiltTokenFilterFactoryFactory;
|
||||
import org.elasticsearch.index.analysis.PreBuiltTokenizerFactoryFactory;
|
||||
import org.elasticsearch.index.analysis.CharFilterFactory;
|
||||
import org.elasticsearch.index.analysis.TokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.TokenizerFactory;
|
||||
|
||||
import java.io.Reader;
|
||||
|
||||
/**
|
||||
* Registers indices level analysis components so, if not explicitly configured, will be shared
|
||||
* among all indices.
|
||||
|
@ -106,5 +112,17 @@ public class IcuIndicesAnalysis extends AbstractComponent {
|
|||
return new ICUTransformFilter(tokenStream, Transliterator.getInstance("Null", Transliterator.FORWARD));
|
||||
}
|
||||
}));
|
||||
|
||||
indicesAnalysisService.charFilterFactories().put("icu_normalizer", new PreBuiltCharFilterFactoryFactory(new CharFilterFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return "icu_normalizer";
|
||||
}
|
||||
|
||||
@Override
|
||||
public Reader create(Reader reader) {
|
||||
return new ICUNormalizer2CharFilter(reader);
|
||||
}
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,41 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.annotations.Name;
|
||||
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
|
||||
import org.elasticsearch.test.rest.ElasticsearchRestTestCase;
|
||||
import org.elasticsearch.test.rest.RestTestCandidate;
|
||||
import org.elasticsearch.test.rest.parser.RestTestParseException;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class AnalysisICURestIT extends ElasticsearchRestTestCase {
|
||||
|
||||
public AnalysisICURestIT(@Name("yaml") RestTestCandidate testCandidate) {
|
||||
super(testCandidate);
|
||||
}
|
||||
|
||||
@ParametersFactory
|
||||
public static Iterable<Object[]> parameters() throws IOException, RestTestParseException {
|
||||
return ElasticsearchRestTestCase.createParameters(0, 1);
|
||||
}
|
||||
}
|
||||
|
|
@ -16,7 +16,8 @@
|
|||
<description>The Japanese (kuromoji) Analysis plugin integrates Lucene kuromoji analysis module into elasticsearch.</description>
|
||||
|
||||
<properties>
|
||||
<!-- You can add any specific project property here -->
|
||||
<tests.rest.suite>analysis_kuromoji</tests.rest.suite>
|
||||
<tests.rest.load_packaged>false</tests.rest.load_packaged>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
|
|
|
@ -0,0 +1,39 @@
|
|||
# Integration tests for Kuromoji analysis components
|
||||
#
|
||||
"Tokenizer":
|
||||
- do:
|
||||
indices.analyze:
|
||||
text: 関西国際空港
|
||||
tokenizer: kuromoji_tokenizer
|
||||
- length: { tokens: 4 }
|
||||
- match: { tokens.0.token: 関西 }
|
||||
- match: { tokens.1.token: 関西国際空港 }
|
||||
- match: { tokens.2.token: 国際 }
|
||||
- match: { tokens.3.token: 空港 }
|
||||
---
|
||||
"Baseform filter":
|
||||
- do:
|
||||
indices.analyze:
|
||||
text: 飲み
|
||||
tokenizer: kuromoji_tokenizer
|
||||
filters: kuromoji_baseform
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: 飲む }
|
||||
---
|
||||
"Reading filter":
|
||||
- do:
|
||||
indices.analyze:
|
||||
text: 寿司
|
||||
tokenizer: kuromoji_tokenizer
|
||||
filters: kuromoji_readingform
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: sushi }
|
||||
---
|
||||
"Stemming filter":
|
||||
- do:
|
||||
indices.analyze:
|
||||
text: サーバー
|
||||
tokenizer: kuromoji_tokenizer
|
||||
filters: kuromoji_stemmer
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: サーバ }
|
|
@ -0,0 +1,41 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.annotations.Name;
|
||||
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
|
||||
import org.elasticsearch.test.rest.ElasticsearchRestTestCase;
|
||||
import org.elasticsearch.test.rest.RestTestCandidate;
|
||||
import org.elasticsearch.test.rest.parser.RestTestParseException;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class AnalysisKuromojiRestIT extends ElasticsearchRestTestCase {
|
||||
|
||||
public AnalysisKuromojiRestIT(@Name("yaml") RestTestCandidate testCandidate) {
|
||||
super(testCandidate);
|
||||
}
|
||||
|
||||
@ParametersFactory
|
||||
public static Iterable<Object[]> parameters() throws IOException, RestTestParseException {
|
||||
return ElasticsearchRestTestCase.createParameters(0, 1);
|
||||
}
|
||||
}
|
||||
|
|
@ -15,7 +15,8 @@
|
|||
<description>Smart Chinese Analysis plugin integrates Lucene Smart Chinese analysis module into elasticsearch.</description>
|
||||
|
||||
<properties>
|
||||
<!-- You can add any specific project property here -->
|
||||
<tests.rest.suite>analysis_smartcn</tests.rest.suite>
|
||||
<tests.rest.load_packaged>false</tests.rest.load_packaged>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
# Integration tests for Smart Chinese analysis components
|
||||
#
|
||||
"Tokenizer":
|
||||
- do:
|
||||
indices.analyze:
|
||||
text: 我购买了道具和服装。
|
||||
tokenizer: smartcn_tokenizer
|
||||
- length: { tokens: 7 }
|
||||
- match: { tokens.0.token: 我 }
|
||||
- match: { tokens.1.token: 购买 }
|
||||
- match: { tokens.2.token: 了 }
|
||||
- match: { tokens.3.token: 道具 }
|
||||
- match: { tokens.4.token: 和 }
|
||||
- match: { tokens.5.token: 服装 }
|
||||
- match: { tokens.6.token: "," }
|
||||
---
|
||||
"Analyzer":
|
||||
- do:
|
||||
indices.analyze:
|
||||
text: 我购买了道具和服装。
|
||||
analyzer: smartcn
|
||||
- length: { tokens: 6 }
|
||||
- match: { tokens.0.token: 我 }
|
||||
- match: { tokens.1.token: 购买 }
|
||||
- match: { tokens.2.token: 了 }
|
||||
- match: { tokens.3.token: 道具 }
|
||||
- match: { tokens.4.token: 和 }
|
||||
- match: { tokens.5.token: 服装 }
|
|
@ -0,0 +1,41 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.annotations.Name;
|
||||
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
|
||||
import org.elasticsearch.test.rest.ElasticsearchRestTestCase;
|
||||
import org.elasticsearch.test.rest.RestTestCandidate;
|
||||
import org.elasticsearch.test.rest.parser.RestTestParseException;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class AnalysisSmartChineseRestIT extends ElasticsearchRestTestCase {
|
||||
|
||||
public AnalysisSmartChineseRestIT(@Name("yaml") RestTestCandidate testCandidate) {
|
||||
super(testCandidate);
|
||||
}
|
||||
|
||||
@ParametersFactory
|
||||
public static Iterable<Object[]> parameters() throws IOException, RestTestParseException {
|
||||
return ElasticsearchRestTestCase.createParameters(0, 1);
|
||||
}
|
||||
}
|
||||
|
|
@ -15,7 +15,8 @@
|
|||
<description>The Stempel (Polish) Analysis plugin integrates Lucene stempel (polish) analysis module into elasticsearch.</description>
|
||||
|
||||
<properties>
|
||||
<!-- You can add any specific project property here -->
|
||||
<tests.rest.suite>analysis_stempel</tests.rest.suite>
|
||||
<tests.rest.load_packaged>false</tests.rest.load_packaged>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
|
|
|
@ -0,0 +1,18 @@
|
|||
# Integration tests for Polish analysis components
|
||||
#
|
||||
"Stemmer":
|
||||
- do:
|
||||
indices.analyze:
|
||||
text: studenci
|
||||
tokenizer: keyword
|
||||
filters: polish_stem
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: student }
|
||||
---
|
||||
"Analyzer":
|
||||
- do:
|
||||
indices.analyze:
|
||||
text: studenta był
|
||||
analyzer: polish
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: student }
|
|
@ -0,0 +1,41 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.annotations.Name;
|
||||
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
|
||||
import org.elasticsearch.test.rest.ElasticsearchRestTestCase;
|
||||
import org.elasticsearch.test.rest.RestTestCandidate;
|
||||
import org.elasticsearch.test.rest.parser.RestTestParseException;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class AnalysisPolishRestIT extends ElasticsearchRestTestCase {
|
||||
|
||||
public AnalysisPolishRestIT(@Name("yaml") RestTestCandidate testCandidate) {
|
||||
super(testCandidate);
|
||||
}
|
||||
|
||||
@ParametersFactory
|
||||
public static Iterable<Object[]> parameters() throws IOException, RestTestParseException {
|
||||
return ElasticsearchRestTestCase.createParameters(0, 1);
|
||||
}
|
||||
}
|
||||
|
|
@ -277,6 +277,7 @@
|
|||
<include>api/cluster.health.json</include>
|
||||
<!-- used in plugin REST tests -->
|
||||
<include>api/index.json</include>
|
||||
<include>api/indices.analyze.json</include>
|
||||
<include>api/indices.refresh.json</include>
|
||||
<include>api/count.json</include>
|
||||
</includes>
|
||||
|
|
Loading…
Reference in New Issue