mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-03-27 18:38:41 +00:00
Move more token filters to analysis-common module
The following token filters were moved: arabic_stem, brazilian_stem, czech_stem, dutch_stem, french_stem, german_stem and russian_stem. Relates to #23658
This commit is contained in:
parent
7e3cd6a019
commit
1146a35870
@ -30,11 +30,9 @@ import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AnalysisRegistry;
|
||||
import org.elasticsearch.index.analysis.AnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.ArabicAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.ArabicStemTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.ArmenianAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.BasqueAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.BrazilianAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.BrazilianStemTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.BulgarianAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.CatalanAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.CharFilterFactory;
|
||||
@ -42,19 +40,15 @@ import org.elasticsearch.index.analysis.ChineseAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.CjkAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.ClassicTokenizerFactory;
|
||||
import org.elasticsearch.index.analysis.CzechAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.CzechStemTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.DanishAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.DutchAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.DutchStemTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.EdgeNGramTokenizerFactory;
|
||||
import org.elasticsearch.index.analysis.EnglishAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.FingerprintAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.FinnishAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.FrenchAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.FrenchStemTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.GalicianAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.GermanAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.GermanStemTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.GreekAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.HindiAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.HungarianAnalyzerProvider;
|
||||
@ -80,7 +74,6 @@ import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
|
||||
import org.elasticsearch.index.analysis.PreConfiguredTokenizer;
|
||||
import org.elasticsearch.index.analysis.RomanianAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.RussianAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.RussianStemTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.ShingleTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.SimpleAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.SnowballAnalyzerProvider;
|
||||
@ -172,14 +165,6 @@ public final class AnalysisModule {
|
||||
tokenFilters.register("stop", StopTokenFilterFactory::new);
|
||||
tokenFilters.register("standard", StandardTokenFilterFactory::new);
|
||||
tokenFilters.register("shingle", ShingleTokenFilterFactory::new);
|
||||
tokenFilters.register("arabic_stem", ArabicStemTokenFilterFactory::new);
|
||||
tokenFilters.register("brazilian_stem", BrazilianStemTokenFilterFactory::new);
|
||||
tokenFilters.register("czech_stem", CzechStemTokenFilterFactory::new);
|
||||
tokenFilters.register("dutch_stem", DutchStemTokenFilterFactory::new);
|
||||
tokenFilters.register("french_stem", FrenchStemTokenFilterFactory::new);
|
||||
tokenFilters.register("german_stem", GermanStemTokenFilterFactory::new);
|
||||
tokenFilters.register("russian_stem", RussianStemTokenFilterFactory::new);
|
||||
|
||||
tokenFilters.register("hunspell", requriesAnalysisSettings((indexSettings, env, name, settings) -> new HunspellTokenFilterFactory
|
||||
(indexSettings, name, settings, hunspellService)));
|
||||
|
||||
|
@ -188,14 +188,6 @@ public class AnalysisModuleTests extends ESTestCase {
|
||||
assertThat(analyzer, instanceOf(CustomAnalyzer.class));
|
||||
CustomAnalyzer custom4 = (CustomAnalyzer) analyzer;
|
||||
assertThat(custom4.tokenFilters()[0], instanceOf(MyFilterTokenFilterFactory.class));
|
||||
|
||||
// // verify Czech stemmer
|
||||
// analyzer = analysisService.analyzer("czechAnalyzerWithStemmer").analyzer();
|
||||
// assertThat(analyzer, instanceOf(CustomAnalyzer.class));
|
||||
// CustomAnalyzer czechstemmeranalyzer = (CustomAnalyzer) analyzer;
|
||||
// assertThat(czechstemmeranalyzer.tokenizerFactory(), instanceOf(StandardTokenizerFactory.class));
|
||||
// assertThat(czechstemmeranalyzer.tokenFilters().length, equalTo(4));
|
||||
// assertThat(czechstemmeranalyzer.tokenFilters()[3], instanceOf(CzechStemTokenFilterFactory.class));
|
||||
}
|
||||
|
||||
public void testWordListPath() throws Exception {
|
||||
|
@ -35,10 +35,6 @@
|
||||
"custom6":{
|
||||
"tokenizer":"standard",
|
||||
"position_increment_gap": 256
|
||||
},
|
||||
"czechAnalyzerWithStemmer":{
|
||||
"tokenizer":"standard",
|
||||
"filter":["standard", "lowercase", "stop", "czech_stem"]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -28,6 +28,3 @@ index :
|
||||
custom7 :
|
||||
type : standard
|
||||
version: 3.6
|
||||
czechAnalyzerWithStemmer :
|
||||
tokenizer : standard
|
||||
filter : [standard, lowercase, stop, czech_stem]
|
||||
|
@ -17,17 +17,18 @@
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
package org.elasticsearch.analysis.common;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.ar.ArabicStemFilter;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
|
||||
|
||||
public class ArabicStemTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||
|
||||
public ArabicStemTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||
ArabicStemTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||
super(indexSettings, name, settings);
|
||||
}
|
||||
|
@ -17,7 +17,7 @@
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
package org.elasticsearch.analysis.common;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.br.BrazilianStemFilter;
|
||||
@ -26,12 +26,14 @@ import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.Analysis;
|
||||
|
||||
public class BrazilianStemTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||
|
||||
private final CharArraySet exclusions;
|
||||
|
||||
public BrazilianStemTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||
BrazilianStemTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||
super(indexSettings, name, settings);
|
||||
this.exclusions = Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET);
|
||||
}
|
@ -92,20 +92,26 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin {
|
||||
Map<String, AnalysisProvider<TokenFilterFactory>> filters = new TreeMap<>();
|
||||
filters.put("apostrophe", ApostropheFilterFactory::new);
|
||||
filters.put("arabic_normalization", ArabicNormalizationFilterFactory::new);
|
||||
filters.put("arabic_stem", ArabicStemTokenFilterFactory::new);
|
||||
filters.put("asciifolding", ASCIIFoldingTokenFilterFactory::new);
|
||||
filters.put("brazilian_stem", BrazilianStemTokenFilterFactory::new);
|
||||
filters.put("cjk_bigram", CJKBigramFilterFactory::new);
|
||||
filters.put("cjk_width", CJKWidthFilterFactory::new);
|
||||
filters.put("classic", ClassicFilterFactory::new);
|
||||
filters.put("czech_stem", CzechStemTokenFilterFactory::new);
|
||||
filters.put("common_grams", requriesAnalysisSettings(CommonGramsTokenFilterFactory::new));
|
||||
filters.put("decimal_digit", DecimalDigitFilterFactory::new);
|
||||
filters.put("delimited_payload_filter", DelimitedPayloadTokenFilterFactory::new);
|
||||
filters.put("dictionary_decompounder", requriesAnalysisSettings(DictionaryCompoundWordTokenFilterFactory::new));
|
||||
filters.put("dutch_stem", DutchStemTokenFilterFactory::new);
|
||||
filters.put("edge_ngram", EdgeNGramTokenFilterFactory::new);
|
||||
filters.put("edgeNGram", EdgeNGramTokenFilterFactory::new);
|
||||
filters.put("elision", ElisionTokenFilterFactory::new);
|
||||
filters.put("fingerprint", FingerprintTokenFilterFactory::new);
|
||||
filters.put("flatten_graph", FlattenGraphTokenFilterFactory::new);
|
||||
filters.put("french_stem", FrenchStemTokenFilterFactory::new);
|
||||
filters.put("german_normalization", GermanNormalizationFilterFactory::new);
|
||||
filters.put("german_stem", GermanStemTokenFilterFactory::new);
|
||||
filters.put("hindi_normalization", HindiNormalizationFilterFactory::new);
|
||||
filters.put("hyphenation_decompounder", requriesAnalysisSettings(HyphenationCompoundWordTokenFilterFactory::new));
|
||||
filters.put("indic_normalization", IndicNormalizationFilterFactory::new);
|
||||
@ -124,6 +130,7 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin {
|
||||
filters.put("persian_normalization", PersianNormalizationFilterFactory::new);
|
||||
filters.put("porter_stem", PorterStemTokenFilterFactory::new);
|
||||
filters.put("reverse", ReverseTokenFilterFactory::new);
|
||||
filters.put("russian_stem", RussianStemTokenFilterFactory::new);
|
||||
filters.put("scandinavian_folding", ScandinavianFoldingFilterFactory::new);
|
||||
filters.put("scandinavian_normalization", ScandinavianNormalizationFilterFactory::new);
|
||||
filters.put("serbian_normalization", SerbianNormalizationFilterFactory::new);
|
||||
|
@ -16,17 +16,18 @@
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.index.analysis;
|
||||
package org.elasticsearch.analysis.common;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.cz.CzechStemFilter;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
|
||||
|
||||
public class CzechStemTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||
|
||||
public CzechStemTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||
CzechStemTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||
super(indexSettings, name, settings);
|
||||
}
|
||||
|
@ -17,7 +17,7 @@
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
package org.elasticsearch.analysis.common;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
@ -26,13 +26,15 @@ import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.Analysis;
|
||||
import org.tartarus.snowball.ext.DutchStemmer;
|
||||
|
||||
public class DutchStemTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||
|
||||
private final CharArraySet exclusions;
|
||||
|
||||
public DutchStemTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||
DutchStemTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||
super(indexSettings, name, settings);
|
||||
this.exclusions = Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET);
|
||||
}
|
@ -17,7 +17,7 @@
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
package org.elasticsearch.analysis.common;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
@ -26,13 +26,15 @@ import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.Analysis;
|
||||
import org.tartarus.snowball.ext.FrenchStemmer;
|
||||
|
||||
public class FrenchStemTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||
|
||||
private final CharArraySet exclusions;
|
||||
|
||||
public FrenchStemTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||
FrenchStemTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||
super(indexSettings, name, settings);
|
||||
this.exclusions = Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET);
|
||||
}
|
@ -17,7 +17,7 @@
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
package org.elasticsearch.analysis.common;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
@ -26,12 +26,14 @@ import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.Analysis;
|
||||
|
||||
public class GermanStemTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||
|
||||
private final CharArraySet exclusions;
|
||||
|
||||
public GermanStemTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||
GermanStemTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||
super(indexSettings, name, settings);
|
||||
this.exclusions = Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET);
|
||||
}
|
@ -17,13 +17,14 @@
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
package org.elasticsearch.analysis.common;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
|
||||
|
||||
public class RussianStemTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||
|
@ -124,6 +124,10 @@ public class CommonAnalysisFactoryTests extends AnalysisFactoryTestCase {
|
||||
filters.put("fingerprint", FingerprintTokenFilterFactory.class);
|
||||
filters.put("minhash", MinHashTokenFilterFactory.class);
|
||||
filters.put("scandinavianfolding", ScandinavianFoldingFilterFactory.class);
|
||||
filters.put("arabicstem", ArabicStemTokenFilterFactory.class);
|
||||
filters.put("brazilianstem", BrazilianStemTokenFilterFactory.class);
|
||||
filters.put("czechstem", CzechStemTokenFilterFactory.class);
|
||||
filters.put("germanstem", GermanStemTokenFilterFactory.class);
|
||||
return filters;
|
||||
}
|
||||
|
||||
|
@ -1258,3 +1258,220 @@
|
||||
filter: [scandinavian_folding]
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: raksmorgas }
|
||||
|
||||
---
|
||||
"arabic_stem":
|
||||
- do:
|
||||
indices.create:
|
||||
index: test
|
||||
body:
|
||||
settings:
|
||||
analysis:
|
||||
filter:
|
||||
my_arabic_stem:
|
||||
type: arabic_stem
|
||||
- do:
|
||||
indices.analyze:
|
||||
index: test
|
||||
body:
|
||||
text: الحسن
|
||||
tokenizer: keyword
|
||||
filter: [my_arabic_stem]
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: حسن }
|
||||
|
||||
# Test pre-configured token filter too:
|
||||
- do:
|
||||
indices.analyze:
|
||||
body:
|
||||
text: الحسن
|
||||
tokenizer: keyword
|
||||
filter: [arabic_stem]
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: حسن }
|
||||
|
||||
---
|
||||
"brazilian_stem":
|
||||
- do:
|
||||
indices.create:
|
||||
index: test
|
||||
body:
|
||||
settings:
|
||||
analysis:
|
||||
filter:
|
||||
my_brazilian_stem:
|
||||
type: brazilian_stem
|
||||
- do:
|
||||
indices.analyze:
|
||||
index: test
|
||||
body:
|
||||
text: Brasília
|
||||
tokenizer: keyword
|
||||
filter: [my_brazilian_stem]
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: brasil }
|
||||
|
||||
# Test pre-configured token filter too:
|
||||
- do:
|
||||
indices.analyze:
|
||||
body:
|
||||
text: Brasília
|
||||
tokenizer: keyword
|
||||
filter: [brazilian_stem]
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: brasil }
|
||||
|
||||
---
|
||||
"czech_stem":
|
||||
- do:
|
||||
indices.create:
|
||||
index: test
|
||||
body:
|
||||
settings:
|
||||
analysis:
|
||||
filter:
|
||||
my_czech_stem:
|
||||
type: czech_stem
|
||||
- do:
|
||||
indices.analyze:
|
||||
index: test
|
||||
body:
|
||||
text: angličtí
|
||||
tokenizer: keyword
|
||||
filter: [my_czech_stem]
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: anglick }
|
||||
|
||||
# Test pre-configured token filter too:
|
||||
- do:
|
||||
indices.analyze:
|
||||
body:
|
||||
text: angličtí
|
||||
tokenizer: keyword
|
||||
filter: [czech_stem]
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: anglick }
|
||||
|
||||
---
|
||||
"dutch_stem":
|
||||
- do:
|
||||
indices.create:
|
||||
index: test
|
||||
body:
|
||||
settings:
|
||||
analysis:
|
||||
filter:
|
||||
my_dutch_stem:
|
||||
type: dutch_stem
|
||||
- do:
|
||||
indices.analyze:
|
||||
index: test
|
||||
body:
|
||||
text: ophouden
|
||||
tokenizer: keyword
|
||||
filter: [my_dutch_stem]
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: ophoud }
|
||||
|
||||
# Test pre-configured token filter too:
|
||||
- do:
|
||||
indices.analyze:
|
||||
body:
|
||||
text: ophouden
|
||||
tokenizer: keyword
|
||||
filter: [dutch_stem]
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: ophoud }
|
||||
|
||||
---
|
||||
"french_stem":
|
||||
- do:
|
||||
indices.create:
|
||||
index: test
|
||||
body:
|
||||
settings:
|
||||
analysis:
|
||||
filter:
|
||||
my_french_stem:
|
||||
type: french_stem
|
||||
- do:
|
||||
indices.analyze:
|
||||
index: test
|
||||
body:
|
||||
text: chevaux
|
||||
tokenizer: keyword
|
||||
filter: [my_french_stem]
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: cheval }
|
||||
|
||||
# Test pre-configured token filter too:
|
||||
- do:
|
||||
indices.analyze:
|
||||
body:
|
||||
text: chevaux
|
||||
tokenizer: keyword
|
||||
filter: [french_stem]
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: cheval }
|
||||
|
||||
---
|
||||
"german_stem":
|
||||
- do:
|
||||
indices.create:
|
||||
index: test
|
||||
body:
|
||||
settings:
|
||||
analysis:
|
||||
filter:
|
||||
my_german_stem:
|
||||
type: german_stem
|
||||
- do:
|
||||
indices.analyze:
|
||||
index: test
|
||||
body:
|
||||
text: abschließen
|
||||
tokenizer: keyword
|
||||
filter: [my_german_stem]
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: abschliess }
|
||||
|
||||
# Test pre-configured token filter too:
|
||||
- do:
|
||||
indices.analyze:
|
||||
body:
|
||||
text: abschließen
|
||||
tokenizer: keyword
|
||||
filter: [german_stem]
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: abschliess }
|
||||
|
||||
---
|
||||
"russian_stem":
|
||||
- do:
|
||||
indices.create:
|
||||
index: test
|
||||
body:
|
||||
settings:
|
||||
analysis:
|
||||
filter:
|
||||
my_russian_stem:
|
||||
type: russian_stem
|
||||
- do:
|
||||
indices.analyze:
|
||||
index: test
|
||||
body:
|
||||
text: журналы
|
||||
tokenizer: keyword
|
||||
filter: [my_russian_stem]
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: журнал }
|
||||
|
||||
# Test pre-configured token filter too:
|
||||
- do:
|
||||
indices.analyze:
|
||||
body:
|
||||
text: журналы
|
||||
tokenizer: keyword
|
||||
filter: [russian_stem]
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: журнал }
|
||||
|
@ -23,12 +23,8 @@ import org.apache.lucene.analysis.util.CharFilterFactory;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
import org.apache.lucene.analysis.util.TokenizerFactory;
|
||||
import org.elasticsearch.common.collect.MapBuilder;
|
||||
import org.elasticsearch.index.analysis.ArabicStemTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.BrazilianStemTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.ClassicTokenizerFactory;
|
||||
import org.elasticsearch.index.analysis.CzechStemTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.EdgeNGramTokenizerFactory;
|
||||
import org.elasticsearch.index.analysis.GermanStemTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.HunspellTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.KeywordTokenizerFactory;
|
||||
import org.elasticsearch.index.analysis.LetterTokenizerFactory;
|
||||
@ -114,16 +110,16 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
|
||||
// exposed in ES
|
||||
.put("apostrophe", MovedToAnalysisCommon.class)
|
||||
.put("arabicnormalization", MovedToAnalysisCommon.class)
|
||||
.put("arabicstem", ArabicStemTokenFilterFactory.class)
|
||||
.put("arabicstem", MovedToAnalysisCommon.class)
|
||||
.put("asciifolding", MovedToAnalysisCommon.class)
|
||||
.put("brazilianstem", BrazilianStemTokenFilterFactory.class)
|
||||
.put("brazilianstem", MovedToAnalysisCommon.class)
|
||||
.put("bulgarianstem", MovedToAnalysisCommon.class)
|
||||
.put("cjkbigram", MovedToAnalysisCommon.class)
|
||||
.put("cjkwidth", MovedToAnalysisCommon.class)
|
||||
.put("classic", MovedToAnalysisCommon.class)
|
||||
.put("commongrams", MovedToAnalysisCommon.class)
|
||||
.put("commongramsquery", MovedToAnalysisCommon.class)
|
||||
.put("czechstem", CzechStemTokenFilterFactory.class)
|
||||
.put("czechstem", MovedToAnalysisCommon.class)
|
||||
.put("decimaldigit", MovedToAnalysisCommon.class)
|
||||
.put("delimitedpayload", MovedToAnalysisCommon.class)
|
||||
.put("dictionarycompoundword", MovedToAnalysisCommon.class)
|
||||
@ -136,7 +132,7 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
|
||||
.put("frenchminimalstem", MovedToAnalysisCommon.class)
|
||||
.put("galicianminimalstem", MovedToAnalysisCommon.class)
|
||||
.put("galicianstem", MovedToAnalysisCommon.class)
|
||||
.put("germanstem", GermanStemTokenFilterFactory.class)
|
||||
.put("germanstem", MovedToAnalysisCommon.class)
|
||||
.put("germanlightstem", MovedToAnalysisCommon.class)
|
||||
.put("germanminimalstem", MovedToAnalysisCommon.class)
|
||||
.put("germannormalization", MovedToAnalysisCommon.class)
|
||||
|
Loading…
x
Reference in New Issue
Block a user