Add a MultiTermAwareComponent marker interface to analysis factories. #19028

This is the same as what Lucene does for its analysis factories, and we hawe
tests that make sure that the elasticsearch factories are in sync with
Lucene's. This is a first step to move forward on #9978 and #18064.
This commit is contained in:
Adrien Grand 2016-06-22 18:31:39 +02:00
parent 6c8744ecb5
commit 7ba5bceebe
28 changed files with 690 additions and 207 deletions

View File

@ -29,7 +29,7 @@ import org.elasticsearch.index.IndexSettings;
/**
* Factory for ASCIIFoldingFilter.
*/
public class ASCIIFoldingTokenFilterFactory extends AbstractTokenFilterFactory {
public class ASCIIFoldingTokenFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
public static ParseField PRESERVE_ORIGINAL = new ParseField("preserve_original");
public static boolean DEFAULT_PRESERVE_ORIGINAL = false;
@ -44,4 +44,9 @@ public class ASCIIFoldingTokenFilterFactory extends AbstractTokenFilterFactory {
public TokenStream create(TokenStream tokenStream) {
return new ASCIIFoldingFilter(tokenStream, preserveOriginal);
}
@Override
public Object getMultiTermComponent() {
return this;
}
}

View File

@ -27,7 +27,7 @@ import org.elasticsearch.index.IndexSettings;
/**
*
*/
public class ArabicNormalizationFilterFactory extends AbstractTokenFilterFactory {
public class ArabicNormalizationFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
public ArabicNormalizationFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name, settings);
@ -37,4 +37,9 @@ public class ArabicNormalizationFilterFactory extends AbstractTokenFilterFactory
public TokenStream create(TokenStream tokenStream) {
return new ArabicNormalizationFilter(tokenStream);
}
@Override
public Object getMultiTermComponent() {
return this;
}
}

View File

@ -25,7 +25,7 @@ import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
public final class CJKWidthFilterFactory extends AbstractTokenFilterFactory {
public final class CJKWidthFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
public CJKWidthFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
@ -36,4 +36,9 @@ public final class CJKWidthFilterFactory extends AbstractTokenFilterFactory {
return new CJKWidthFilter(tokenStream);
}
@Override
public Object getMultiTermComponent() {
return this;
}
}

View File

@ -28,7 +28,7 @@ import org.elasticsearch.index.IndexSettings;
/**
* Factory for {@link DecimalDigitFilter}
*/
public final class DecimalDigitFilterFactory extends AbstractTokenFilterFactory {
public final class DecimalDigitFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
public DecimalDigitFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
@ -38,4 +38,9 @@ public final class DecimalDigitFilterFactory extends AbstractTokenFilterFactory
public TokenStream create(TokenStream tokenStream) {
return new DecimalDigitFilter(tokenStream);
}
@Override
public Object getMultiTermComponent() {
return this;
}
}

View File

@ -29,7 +29,7 @@ import org.elasticsearch.index.IndexSettings;
/**
*
*/
public class ElisionTokenFilterFactory extends AbstractTokenFilterFactory {
public class ElisionTokenFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
private final CharArraySet articles;
@ -42,4 +42,9 @@ public class ElisionTokenFilterFactory extends AbstractTokenFilterFactory {
public TokenStream create(TokenStream tokenStream) {
return new ElisionFilter(tokenStream, articles);
}
@Override
public Object getMultiTermComponent() {
return this;
}
}

View File

@ -27,7 +27,7 @@ import org.elasticsearch.index.IndexSettings;
/**
* Factory for {@link GermanNormalizationFilter}
*/
public class GermanNormalizationFilterFactory extends AbstractTokenFilterFactory {
public class GermanNormalizationFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
public GermanNormalizationFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name, settings);
@ -38,4 +38,8 @@ public class GermanNormalizationFilterFactory extends AbstractTokenFilterFactory
return new GermanNormalizationFilter(tokenStream);
}
@Override
public Object getMultiTermComponent() {
return this;
}
}

View File

@ -27,7 +27,7 @@ import org.elasticsearch.index.IndexSettings;
/**
* Factory for {@link HindiNormalizationFilter}
*/
public class HindiNormalizationFilterFactory extends AbstractTokenFilterFactory {
public class HindiNormalizationFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
public HindiNormalizationFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name, settings);
@ -38,4 +38,8 @@ public class HindiNormalizationFilterFactory extends AbstractTokenFilterFactory
return new HindiNormalizationFilter(tokenStream);
}
@Override
public Object getMultiTermComponent() {
return this;
}
}

View File

@ -27,7 +27,7 @@ import org.elasticsearch.index.IndexSettings;
/**
* Factory for {@link IndicNormalizationFilter}
*/
public class IndicNormalizationFilterFactory extends AbstractTokenFilterFactory {
public class IndicNormalizationFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
public IndicNormalizationFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name, settings);
@ -38,4 +38,8 @@ public class IndicNormalizationFilterFactory extends AbstractTokenFilterFactory
return new IndicNormalizationFilter(tokenStream);
}
@Override
public Object getMultiTermComponent() {
return this;
}
}

View File

@ -37,7 +37,7 @@ import org.elasticsearch.index.IndexSettings;
* <li>turkish: {@link TurkishLowerCaseFilter}
* </ul>
*/
public class LowerCaseTokenFilterFactory extends AbstractTokenFilterFactory {
public class LowerCaseTokenFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
private final String lang;
@ -60,6 +60,11 @@ public class LowerCaseTokenFilterFactory extends AbstractTokenFilterFactory {
throw new IllegalArgumentException("language [" + lang + "] not support for lower case");
}
}
@Override
public Object getMultiTermComponent() {
return this;
}
}

View File

@ -28,7 +28,7 @@ import org.elasticsearch.index.IndexSettings;
/**
*
*/
public class LowerCaseTokenizerFactory extends AbstractTokenizerFactory {
public class LowerCaseTokenizerFactory extends AbstractTokenizerFactory implements MultiTermAwareComponent {
public LowerCaseTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name, settings);
@ -38,4 +38,9 @@ public class LowerCaseTokenizerFactory extends AbstractTokenizerFactory {
public Tokenizer create() {
return new LowerCaseTokenizer();
}
@Override
public Object getMultiTermComponent() {
return this;
}
}

View File

@ -30,7 +30,7 @@ import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class MappingCharFilterFactory extends AbstractCharFilterFactory {
public class MappingCharFilterFactory extends AbstractCharFilterFactory implements MultiTermAwareComponent {
private final NormalizeCharMap normMap;
@ -114,4 +114,9 @@ public class MappingCharFilterFactory extends AbstractCharFilterFactory {
}
return new String(out, 0, writePos);
}
@Override
public Object getMultiTermComponent() {
return this;
}
}

View File

@ -0,0 +1,30 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
/** Elasticsearch counterpart of {@link org.apache.lucene.analysis.util.MultiTermAwareComponent}. */
public interface MultiTermAwareComponent {
/** Returns an analysis component to handle analysis if multi-term queries.
* The returned component must be a TokenizerFactory, TokenFilterFactory or CharFilterFactory.
*/
public Object getMultiTermComponent();
}

View File

@ -27,7 +27,7 @@ import org.elasticsearch.index.IndexSettings;
/**
*
*/
public class PersianNormalizationFilterFactory extends AbstractTokenFilterFactory {
public class PersianNormalizationFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
public PersianNormalizationFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name, settings);
@ -38,4 +38,8 @@ public class PersianNormalizationFilterFactory extends AbstractTokenFilterFactor
return new PersianNormalizationFilter(tokenStream);
}
@Override
public Object getMultiTermComponent() {
return this;
}
}

View File

@ -27,7 +27,7 @@ import org.elasticsearch.index.IndexSettings;
/**
*
*/
public class SerbianNormalizationFilterFactory extends AbstractTokenFilterFactory {
public class SerbianNormalizationFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
public SerbianNormalizationFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name, settings);
@ -37,4 +37,9 @@ public class SerbianNormalizationFilterFactory extends AbstractTokenFilterFactor
public TokenStream create(TokenStream tokenStream) {
return new SerbianNormalizationFilter(tokenStream);
}
@Override
public Object getMultiTermComponent() {
return this;
}
}

View File

@ -27,7 +27,7 @@ import org.elasticsearch.index.IndexSettings;
/**
* Factory for {@link SoraniNormalizationFilter}
*/
public class SoraniNormalizationFilterFactory extends AbstractTokenFilterFactory {
public class SoraniNormalizationFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
public SoraniNormalizationFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name, settings);
@ -38,4 +38,9 @@ public class SoraniNormalizationFilterFactory extends AbstractTokenFilterFactory
return new SoraniNormalizationFilter(tokenStream);
}
@Override
public Object getMultiTermComponent() {
return this;
}
}

View File

@ -28,7 +28,7 @@ import org.elasticsearch.index.IndexSettings;
/**
*
*/
public class UpperCaseTokenFilterFactory extends AbstractTokenFilterFactory {
public class UpperCaseTokenFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
public UpperCaseTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name, settings);
@ -38,6 +38,11 @@ public class UpperCaseTokenFilterFactory extends AbstractTokenFilterFactory {
public TokenStream create(TokenStream tokenStream) {
return new UpperCaseFilter(tokenStream);
}
@Override
public Object getMultiTermComponent() {
return this;
}
}

View File

@ -19,192 +19,8 @@
package org.elasticsearch.index.analysis;
import org.elasticsearch.index.analysis.compound.DictionaryCompoundWordTokenFilterFactory;
import org.elasticsearch.index.analysis.compound.HyphenationCompoundWordTokenFilterFactory;
import org.elasticsearch.test.ESTestCase;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
/**
* Alerts us if new analyzers are added to lucene, so we don't miss them.
* <p>
* If we don't want to expose one for a specific reason, just map it to Void
*/
public class AnalysisFactoryTests extends ESTestCase {
static final Map<String,Class<?>> KNOWN_TOKENIZERS = new HashMap<String,Class<?>>() {{
// deprecated ones, we dont care about these
put("arabicletter", Deprecated.class);
put("chinese", Deprecated.class);
put("cjk", Deprecated.class);
put("russianletter", Deprecated.class);
// exposed in ES
put("classic", ClassicTokenizerFactory.class);
put("edgengram", EdgeNGramTokenizerFactory.class);
put("keyword", KeywordTokenizerFactory.class);
put("letter", LetterTokenizerFactory.class);
put("lowercase", LowerCaseTokenizerFactory.class);
put("ngram", NGramTokenizerFactory.class);
put("pathhierarchy", PathHierarchyTokenizerFactory.class);
put("pattern", PatternTokenizerFactory.class);
put("standard", StandardTokenizerFactory.class);
put("thai", ThaiTokenizerFactory.class);
put("uax29urlemail", UAX29URLEmailTokenizerFactory.class);
put("whitespace", WhitespaceTokenizerFactory.class);
// this one "seems to mess up offsets". probably shouldn't be a tokenizer...
put("wikipedia", Void.class);
}};
public void testTokenizers() {
Set<String> missing = new TreeSet<String>(org.apache.lucene.analysis.util.TokenizerFactory.availableTokenizers());
missing.removeAll(KNOWN_TOKENIZERS.keySet());
assertTrue("new tokenizers found, please update KNOWN_TOKENIZERS: " + missing.toString(), missing.isEmpty());
}
static final Map<String,Class<?>> KNOWN_TOKENFILTERS = new HashMap<String,Class<?>>() {{
// deprecated ones, we dont care about these
put("chinese", Deprecated.class);
put("collationkey", Deprecated.class);
put("position", Deprecated.class);
put("thaiword", Deprecated.class);
// exposed in ES
put("apostrophe", ApostropheFilterFactory.class);
put("arabicnormalization", ArabicNormalizationFilterFactory.class);
put("arabicstem", ArabicStemTokenFilterFactory.class);
put("asciifolding", ASCIIFoldingTokenFilterFactory.class);
put("brazilianstem", BrazilianStemTokenFilterFactory.class);
put("bulgarianstem", StemmerTokenFilterFactory.class);
put("cjkbigram", CJKBigramFilterFactory.class);
put("cjkwidth", CJKWidthFilterFactory.class);
put("classic", ClassicFilterFactory.class);
put("commongrams", CommonGramsTokenFilterFactory.class);
put("commongramsquery", CommonGramsTokenFilterFactory.class);
put("czechstem", CzechStemTokenFilterFactory.class);
put("decimaldigit", DecimalDigitFilterFactory.class);
put("delimitedpayload", DelimitedPayloadTokenFilterFactory.class);
put("dictionarycompoundword", DictionaryCompoundWordTokenFilterFactory.class);
put("edgengram", EdgeNGramTokenFilterFactory.class);
put("elision", ElisionTokenFilterFactory.class);
put("englishminimalstem", StemmerTokenFilterFactory.class);
put("englishpossessive", StemmerTokenFilterFactory.class);
put("finnishlightstem", StemmerTokenFilterFactory.class);
put("frenchlightstem", StemmerTokenFilterFactory.class);
put("frenchminimalstem", StemmerTokenFilterFactory.class);
put("galicianminimalstem", StemmerTokenFilterFactory.class);
put("galicianstem", StemmerTokenFilterFactory.class);
put("germanstem", GermanStemTokenFilterFactory.class);
put("germanlightstem", StemmerTokenFilterFactory.class);
put("germanminimalstem", StemmerTokenFilterFactory.class);
put("germannormalization", GermanNormalizationFilterFactory.class);
put("greeklowercase", LowerCaseTokenFilterFactory.class);
put("greekstem", StemmerTokenFilterFactory.class);
put("hindinormalization", HindiNormalizationFilterFactory.class);
put("hindistem", StemmerTokenFilterFactory.class);
put("hungarianlightstem", StemmerTokenFilterFactory.class);
put("hunspellstem", HunspellTokenFilterFactory.class);
put("hyphenationcompoundword", HyphenationCompoundWordTokenFilterFactory.class);
put("indicnormalization", IndicNormalizationFilterFactory.class);
put("irishlowercase", LowerCaseTokenFilterFactory.class);
put("indonesianstem", StemmerTokenFilterFactory.class);
put("italianlightstem", StemmerTokenFilterFactory.class);
put("keepword", KeepWordFilterFactory.class);
put("keywordmarker", KeywordMarkerTokenFilterFactory.class);
put("kstem", KStemTokenFilterFactory.class);
put("latvianstem", StemmerTokenFilterFactory.class);
put("length", LengthTokenFilterFactory.class);
put("limittokencount", LimitTokenCountFilterFactory.class);
put("lowercase", LowerCaseTokenFilterFactory.class);
put("ngram", NGramTokenFilterFactory.class);
put("norwegianlightstem", StemmerTokenFilterFactory.class);
put("norwegianminimalstem", StemmerTokenFilterFactory.class);
put("patterncapturegroup", PatternCaptureGroupTokenFilterFactory.class);
put("patternreplace", PatternReplaceTokenFilterFactory.class);
put("persiannormalization", PersianNormalizationFilterFactory.class);
put("porterstem", PorterStemTokenFilterFactory.class);
put("portuguesestem", StemmerTokenFilterFactory.class);
put("portugueselightstem", StemmerTokenFilterFactory.class);
put("portugueseminimalstem", StemmerTokenFilterFactory.class);
put("reversestring", ReverseTokenFilterFactory.class);
put("russianlightstem", StemmerTokenFilterFactory.class);
put("scandinavianfolding", ScandinavianFoldingFilterFactory.class);
put("scandinaviannormalization", ScandinavianNormalizationFilterFactory.class);
put("serbiannormalization", SerbianNormalizationFilterFactory.class);
put("shingle", ShingleTokenFilterFactory.class);
put("snowballporter", SnowballTokenFilterFactory.class);
put("soraninormalization", SoraniNormalizationFilterFactory.class);
put("soranistem", StemmerTokenFilterFactory.class);
put("spanishlightstem", StemmerTokenFilterFactory.class);
put("standard", StandardTokenFilterFactory.class);
put("stemmeroverride", StemmerOverrideTokenFilterFactory.class);
put("stop", StopTokenFilterFactory.class);
put("swedishlightstem", StemmerTokenFilterFactory.class);
put("synonym", SynonymTokenFilterFactory.class);
put("trim", TrimTokenFilterFactory.class);
put("truncate", TruncateTokenFilterFactory.class);
put("turkishlowercase", LowerCaseTokenFilterFactory.class);
put("type", KeepTypesFilterFactory.class);
put("uppercase", UpperCaseTokenFilterFactory.class);
put("worddelimiter", WordDelimiterTokenFilterFactory.class);
// TODO: these tokenfilters are not yet exposed: useful?
// suggest stop
put("suggeststop", Void.class);
// capitalizes tokens
put("capitalization", Void.class);
// like length filter (but codepoints)
put("codepointcount", Void.class);
// puts hyphenated words back together
put("hyphenatedwords", Void.class);
// repeats anything marked as keyword
put("keywordrepeat", Void.class);
// like limittokencount, but by offset
put("limittokenoffset", Void.class);
// like limittokencount, but by position
put("limittokenposition", Void.class);
// ???
put("numericpayload", Void.class);
// removes duplicates at the same position (this should be used by the existing factory)
put("removeduplicates", Void.class);
// ???
put("tokenoffsetpayload", Void.class);
// puts the type into the payload
put("typeaspayload", Void.class);
// fingerprint
put("fingerprint", Void.class);
// for tee-sinks
put("daterecognizer", Void.class);
}};
public void testTokenFilters() {
Set<String> missing = new TreeSet<String>(org.apache.lucene.analysis.util.TokenFilterFactory.availableTokenFilters());
missing.removeAll(KNOWN_TOKENFILTERS.keySet());
assertTrue("new tokenfilters found, please update KNOWN_TOKENFILTERS: " + missing.toString(), missing.isEmpty());
}
static final Map<String,Class<?>> KNOWN_CHARFILTERS = new HashMap<String,Class<?>>() {{
// exposed in ES
put("htmlstrip", HtmlStripCharFilterFactory.class);
put("mapping", MappingCharFilterFactory.class);
put("patternreplace", PatternReplaceCharFilterFactory.class);
// TODO: these charfilters are not yet exposed: useful?
// handling of zwnj for persian
put("persian", Void.class);
}};
public void testCharFilters() {
Set<String> missing = new TreeSet<String>(org.apache.lucene.analysis.util.CharFilterFactory.availableCharFilters());
missing.removeAll(KNOWN_CHARFILTERS.keySet());
assertTrue("new charfilters found, please update KNOWN_CHARFILTERS: " + missing.toString(), missing.isEmpty());
}
import org.elasticsearch.AnalysisFactoryTestCase;
public class AnalysisFactoryTests extends AnalysisFactoryTestCase {
// tests are inherited
}

View File

@ -40,7 +40,7 @@ import org.elasticsearch.index.IndexSettings;
*
* @author kimchy (shay.banon)
*/
public class IcuFoldingTokenFilterFactory extends AbstractTokenFilterFactory {
public class IcuFoldingTokenFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
private final String unicodeSetFilter;
public IcuFoldingTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
@ -67,4 +67,9 @@ public class IcuFoldingTokenFilterFactory extends AbstractTokenFilterFactory {
return new ICUFoldingFilter(tokenStream);
}
}
@Override
public Object getMultiTermComponent() {
return this;
}
}

View File

@ -34,7 +34,7 @@ import java.io.Reader;
* <p>The <tt>name</tt> can be used to provide the type of normalization to perform.</p>
* <p>The <tt>mode</tt> can be used to provide 'compose' or 'decompose'. Default is compose.</p>
*/
public class IcuNormalizerCharFilterFactory extends AbstractCharFilterFactory {
public class IcuNormalizerCharFilterFactory extends AbstractCharFilterFactory implements MultiTermAwareComponent {
private final String name;
@ -55,4 +55,9 @@ public class IcuNormalizerCharFilterFactory extends AbstractCharFilterFactory {
public Reader create(Reader reader) {
return new ICUNormalizer2CharFilter(reader, normalizer);
}
@Override
public Object getMultiTermComponent() {
return this;
}
}

View File

@ -32,7 +32,7 @@ import org.elasticsearch.index.IndexSettings;
*
*
*/
public class IcuNormalizerTokenFilterFactory extends AbstractTokenFilterFactory {
public class IcuNormalizerTokenFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
private final String name;
@ -45,4 +45,9 @@ public class IcuNormalizerTokenFilterFactory extends AbstractTokenFilterFactory
public TokenStream create(TokenStream tokenStream) {
return new org.apache.lucene.analysis.icu.ICUNormalizer2Filter(tokenStream, Normalizer2.getInstance(null, name, Normalizer2.Mode.COMPOSE));
}
@Override
public Object getMultiTermComponent() {
return this;
}
}

View File

@ -29,7 +29,7 @@ import org.elasticsearch.index.IndexSettings;
/**
*/
public class IcuTransformTokenFilterFactory extends AbstractTokenFilterFactory {
public class IcuTransformTokenFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
private final String id;
private final int dir;
@ -47,4 +47,9 @@ public class IcuTransformTokenFilterFactory extends AbstractTokenFilterFactory {
public TokenStream create(TokenStream tokenStream) {
return new ICUTransformFilter(tokenStream, transliterator);
}
@Override
public Object getMultiTermComponent() {
return this;
}
}

View File

@ -0,0 +1,52 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import org.elasticsearch.AnalysisFactoryTestCase;
import java.util.HashMap;
import java.util.Map;
public class AnalysisICUFactoryTests extends AnalysisFactoryTestCase {
@Override
protected Map<String, Class<?>> getTokenizers() {
Map<String, Class<?>> tokenizers = new HashMap<>(super.getTokenizers());
tokenizers.put("icu", IcuTokenizerFactory.class);
return tokenizers;
}
@Override
protected Map<String, Class<?>> getTokenFilters() {
Map<String, Class<?>> filters = new HashMap<>(super.getTokenFilters());
filters.put("icufolding", IcuFoldingTokenFilterFactory.class);
filters.put("icunormalizer2", IcuNormalizerTokenFilterFactory.class);
filters.put("icutransform", IcuTransformTokenFilterFactory.class);
return filters;
}
@Override
protected Map<String, Class<?>> getCharFilters() {
Map<String, Class<?>> filters = new HashMap<>(super.getCharFilters());
filters.put("icunormalizer2", IcuNormalizerCharFilterFactory.class);
return filters;
}
}

View File

@ -26,7 +26,7 @@ import org.elasticsearch.index.IndexSettings;
import java.io.Reader;
public class KuromojiIterationMarkCharFilterFactory extends AbstractCharFilterFactory {
public class KuromojiIterationMarkCharFilterFactory extends AbstractCharFilterFactory implements MultiTermAwareComponent {
private final boolean normalizeKanji;
private final boolean normalizeKana;
@ -41,4 +41,9 @@ public class KuromojiIterationMarkCharFilterFactory extends AbstractCharFilterFa
public Reader create(Reader reader) {
return new JapaneseIterationMarkCharFilter(reader, normalizeKanji, normalizeKana);
}
@Override
public Object getMultiTermComponent() {
return this;
}
}

View File

@ -0,0 +1,54 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.ja.JapaneseTokenizerFactory;
import org.elasticsearch.AnalysisFactoryTestCase;
import java.util.HashMap;
import java.util.Map;
public class AnalysisKuromojiFactoryTests extends AnalysisFactoryTestCase {
@Override
protected Map<String, Class<?>> getTokenizers() {
Map<String, Class<?>> tokenizers = new HashMap<>(super.getTokenizers());
tokenizers.put("japanese", JapaneseTokenizerFactory.class);
return tokenizers;
}
@Override
protected Map<String, Class<?>> getTokenFilters() {
Map<String, Class<?>> filters = new HashMap<>(super.getTokenFilters());
filters.put("japanesebaseform", KuromojiBaseFormFilterFactory.class);
filters.put("japanesepartofspeechstop", KuromojiPartOfSpeechFilterFactory.class);
filters.put("japanesereadingform", KuromojiReadingFormFilterFactory.class);
filters.put("japanesekatakanastem", KuromojiKatakanaStemmerFactory.class);
filters.put("japanesenumber", KuromojiNumberFilterFactory.class);
return filters;
}
@Override
protected Map<String, Class<?>> getCharFilters() {
Map<String, Class<?>> filters = new HashMap<>(super.getCharFilters());
filters.put("japaneseiterationmark", KuromojiIterationMarkCharFilterFactory.class);
return filters;
}
}

View File

@ -0,0 +1,37 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import org.elasticsearch.AnalysisFactoryTestCase;
import java.util.HashMap;
import java.util.Map;
public class AnalysisPhoneticFactoryTests extends AnalysisFactoryTestCase {
@Override
protected Map<String, Class<?>> getTokenFilters() {
Map<String, Class<?>> filters = new HashMap<>(super.getTokenFilters());
filters.put("beidermorse", PhoneticTokenFilterFactory.class);
filters.put("doublemetaphone", PhoneticTokenFilterFactory.class);
filters.put("phonetic", PhoneticTokenFilterFactory.class);
return filters;
}
}

View File

@ -0,0 +1,36 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import org.elasticsearch.AnalysisFactoryTestCase;
import java.util.HashMap;
import java.util.Map;
public class AnalysisSmartChineseFactoryTests extends AnalysisFactoryTestCase {
@Override
protected Map<String, Class<?>> getTokenizers() {
Map<String, Class<?>> tokenizers = new HashMap<>(super.getTokenizers());
tokenizers.put("hmmchinese", SmartChineseTokenizerTokenizerFactory.class);
return tokenizers;
}
}

View File

@ -0,0 +1,37 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import org.elasticsearch.AnalysisFactoryTestCase;
import org.elasticsearch.index.analysis.pl.PolishStemTokenFilterFactory;
import java.util.HashMap;
import java.util.Map;
public class AnalysisPolishFactoryTests extends AnalysisFactoryTestCase {
@Override
protected Map<String, Class<?>> getTokenFilters() {
Map<String, Class<?>> filters = new HashMap<>(super.getTokenFilters());
filters.put("stempelpolishstem", PolishStemTokenFilterFactory.class);
return filters;
}
}

View File

@ -0,0 +1,325 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch;
import org.elasticsearch.common.collect.MapBuilder;
import org.elasticsearch.index.analysis.ASCIIFoldingTokenFilterFactory;
import org.elasticsearch.index.analysis.ApostropheFilterFactory;
import org.elasticsearch.index.analysis.ArabicNormalizationFilterFactory;
import org.elasticsearch.index.analysis.ArabicStemTokenFilterFactory;
import org.elasticsearch.index.analysis.BrazilianStemTokenFilterFactory;
import org.elasticsearch.index.analysis.CJKBigramFilterFactory;
import org.elasticsearch.index.analysis.CJKWidthFilterFactory;
import org.elasticsearch.index.analysis.ClassicFilterFactory;
import org.elasticsearch.index.analysis.ClassicTokenizerFactory;
import org.elasticsearch.index.analysis.CommonGramsTokenFilterFactory;
import org.elasticsearch.index.analysis.CzechStemTokenFilterFactory;
import org.elasticsearch.index.analysis.DecimalDigitFilterFactory;
import org.elasticsearch.index.analysis.DelimitedPayloadTokenFilterFactory;
import org.elasticsearch.index.analysis.EdgeNGramTokenFilterFactory;
import org.elasticsearch.index.analysis.EdgeNGramTokenizerFactory;
import org.elasticsearch.index.analysis.ElisionTokenFilterFactory;
import org.elasticsearch.index.analysis.GermanNormalizationFilterFactory;
import org.elasticsearch.index.analysis.GermanStemTokenFilterFactory;
import org.elasticsearch.index.analysis.HindiNormalizationFilterFactory;
import org.elasticsearch.index.analysis.HtmlStripCharFilterFactory;
import org.elasticsearch.index.analysis.HunspellTokenFilterFactory;
import org.elasticsearch.index.analysis.IndicNormalizationFilterFactory;
import org.elasticsearch.index.analysis.KStemTokenFilterFactory;
import org.elasticsearch.index.analysis.KeepTypesFilterFactory;
import org.elasticsearch.index.analysis.KeepWordFilterFactory;
import org.elasticsearch.index.analysis.KeywordMarkerTokenFilterFactory;
import org.elasticsearch.index.analysis.KeywordTokenizerFactory;
import org.elasticsearch.index.analysis.LengthTokenFilterFactory;
import org.elasticsearch.index.analysis.LetterTokenizerFactory;
import org.elasticsearch.index.analysis.LimitTokenCountFilterFactory;
import org.elasticsearch.index.analysis.LowerCaseTokenFilterFactory;
import org.elasticsearch.index.analysis.LowerCaseTokenizerFactory;
import org.elasticsearch.index.analysis.MappingCharFilterFactory;
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
import org.elasticsearch.index.analysis.NGramTokenFilterFactory;
import org.elasticsearch.index.analysis.NGramTokenizerFactory;
import org.elasticsearch.index.analysis.PathHierarchyTokenizerFactory;
import org.elasticsearch.index.analysis.PatternCaptureGroupTokenFilterFactory;
import org.elasticsearch.index.analysis.PatternReplaceCharFilterFactory;
import org.elasticsearch.index.analysis.PatternReplaceTokenFilterFactory;
import org.elasticsearch.index.analysis.PatternTokenizerFactory;
import org.elasticsearch.index.analysis.PersianNormalizationFilterFactory;
import org.elasticsearch.index.analysis.PorterStemTokenFilterFactory;
import org.elasticsearch.index.analysis.ReverseTokenFilterFactory;
import org.elasticsearch.index.analysis.ScandinavianFoldingFilterFactory;
import org.elasticsearch.index.analysis.ScandinavianNormalizationFilterFactory;
import org.elasticsearch.index.analysis.SerbianNormalizationFilterFactory;
import org.elasticsearch.index.analysis.ShingleTokenFilterFactory;
import org.elasticsearch.index.analysis.SnowballTokenFilterFactory;
import org.elasticsearch.index.analysis.SoraniNormalizationFilterFactory;
import org.elasticsearch.index.analysis.StandardTokenFilterFactory;
import org.elasticsearch.index.analysis.StandardTokenizerFactory;
import org.elasticsearch.index.analysis.StemmerOverrideTokenFilterFactory;
import org.elasticsearch.index.analysis.StemmerTokenFilterFactory;
import org.elasticsearch.index.analysis.StopTokenFilterFactory;
import org.elasticsearch.index.analysis.SynonymTokenFilterFactory;
import org.elasticsearch.index.analysis.ThaiTokenizerFactory;
import org.elasticsearch.index.analysis.TrimTokenFilterFactory;
import org.elasticsearch.index.analysis.TruncateTokenFilterFactory;
import org.elasticsearch.index.analysis.UAX29URLEmailTokenizerFactory;
import org.elasticsearch.index.analysis.UpperCaseTokenFilterFactory;
import org.elasticsearch.index.analysis.WhitespaceTokenizerFactory;
import org.elasticsearch.index.analysis.WordDelimiterTokenFilterFactory;
import org.elasticsearch.index.analysis.compound.DictionaryCompoundWordTokenFilterFactory;
import org.elasticsearch.index.analysis.compound.HyphenationCompoundWordTokenFilterFactory;
import org.elasticsearch.test.ESTestCase;
import java.util.Collection;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
/**
* Alerts us if new analyzers are added to lucene, so we don't miss them.
* <p>
* If we don't want to expose one for a specific reason, just map it to Void.
* The deprecated ones can be mapped to Deprecated.class.
*/
public class AnalysisFactoryTestCase extends ESTestCase {
static final Map<String,Class<?>> KNOWN_TOKENIZERS = new MapBuilder<String,Class<?>>()
// exposed in ES
.put("classic", ClassicTokenizerFactory.class)
.put("edgengram", EdgeNGramTokenizerFactory.class)
.put("keyword", KeywordTokenizerFactory.class)
.put("letter", LetterTokenizerFactory.class)
.put("lowercase", LowerCaseTokenizerFactory.class)
.put("ngram", NGramTokenizerFactory.class)
.put("pathhierarchy", PathHierarchyTokenizerFactory.class)
.put("pattern", PatternTokenizerFactory.class)
.put("standard", StandardTokenizerFactory.class)
.put("thai", ThaiTokenizerFactory.class)
.put("uax29urlemail", UAX29URLEmailTokenizerFactory.class)
.put("whitespace", WhitespaceTokenizerFactory.class)
// this one "seems to mess up offsets". probably shouldn't be a tokenizer...
.put("wikipedia", Void.class)
.immutableMap();
static final Map<String,Class<?>> KNOWN_TOKENFILTERS = new MapBuilder<String,Class<?>>()
// exposed in ES
.put("apostrophe", ApostropheFilterFactory.class)
.put("arabicnormalization", ArabicNormalizationFilterFactory.class)
.put("arabicstem", ArabicStemTokenFilterFactory.class)
.put("asciifolding", ASCIIFoldingTokenFilterFactory.class)
.put("brazilianstem", BrazilianStemTokenFilterFactory.class)
.put("bulgarianstem", StemmerTokenFilterFactory.class)
.put("cjkbigram", CJKBigramFilterFactory.class)
.put("cjkwidth", CJKWidthFilterFactory.class)
.put("classic", ClassicFilterFactory.class)
.put("commongrams", CommonGramsTokenFilterFactory.class)
.put("commongramsquery", CommonGramsTokenFilterFactory.class)
.put("czechstem", CzechStemTokenFilterFactory.class)
.put("decimaldigit", DecimalDigitFilterFactory.class)
.put("delimitedpayload", DelimitedPayloadTokenFilterFactory.class)
.put("dictionarycompoundword", DictionaryCompoundWordTokenFilterFactory.class)
.put("edgengram", EdgeNGramTokenFilterFactory.class)
.put("elision", ElisionTokenFilterFactory.class)
.put("englishminimalstem", StemmerTokenFilterFactory.class)
.put("englishpossessive", StemmerTokenFilterFactory.class)
.put("finnishlightstem", StemmerTokenFilterFactory.class)
.put("frenchlightstem", StemmerTokenFilterFactory.class)
.put("frenchminimalstem", StemmerTokenFilterFactory.class)
.put("galicianminimalstem", StemmerTokenFilterFactory.class)
.put("galicianstem", StemmerTokenFilterFactory.class)
.put("germanstem", GermanStemTokenFilterFactory.class)
.put("germanlightstem", StemmerTokenFilterFactory.class)
.put("germanminimalstem", StemmerTokenFilterFactory.class)
.put("germannormalization", GermanNormalizationFilterFactory.class)
.put("greeklowercase", LowerCaseTokenFilterFactory.class)
.put("greekstem", StemmerTokenFilterFactory.class)
.put("hindinormalization", HindiNormalizationFilterFactory.class)
.put("hindistem", StemmerTokenFilterFactory.class)
.put("hungarianlightstem", StemmerTokenFilterFactory.class)
.put("hunspellstem", HunspellTokenFilterFactory.class)
.put("hyphenationcompoundword", HyphenationCompoundWordTokenFilterFactory.class)
.put("indicnormalization", IndicNormalizationFilterFactory.class)
.put("irishlowercase", LowerCaseTokenFilterFactory.class)
.put("indonesianstem", StemmerTokenFilterFactory.class)
.put("italianlightstem", StemmerTokenFilterFactory.class)
.put("keepword", KeepWordFilterFactory.class)
.put("keywordmarker", KeywordMarkerTokenFilterFactory.class)
.put("kstem", KStemTokenFilterFactory.class)
.put("latvianstem", StemmerTokenFilterFactory.class)
.put("length", LengthTokenFilterFactory.class)
.put("limittokencount", LimitTokenCountFilterFactory.class)
.put("lowercase", LowerCaseTokenFilterFactory.class)
.put("ngram", NGramTokenFilterFactory.class)
.put("norwegianlightstem", StemmerTokenFilterFactory.class)
.put("norwegianminimalstem", StemmerTokenFilterFactory.class)
.put("patterncapturegroup", PatternCaptureGroupTokenFilterFactory.class)
.put("patternreplace", PatternReplaceTokenFilterFactory.class)
.put("persiannormalization", PersianNormalizationFilterFactory.class)
.put("porterstem", PorterStemTokenFilterFactory.class)
.put("portuguesestem", StemmerTokenFilterFactory.class)
.put("portugueselightstem", StemmerTokenFilterFactory.class)
.put("portugueseminimalstem", StemmerTokenFilterFactory.class)
.put("reversestring", ReverseTokenFilterFactory.class)
.put("russianlightstem", StemmerTokenFilterFactory.class)
.put("scandinavianfolding", ScandinavianFoldingFilterFactory.class)
.put("scandinaviannormalization", ScandinavianNormalizationFilterFactory.class)
.put("serbiannormalization", SerbianNormalizationFilterFactory.class)
.put("shingle", ShingleTokenFilterFactory.class)
.put("snowballporter", SnowballTokenFilterFactory.class)
.put("soraninormalization", SoraniNormalizationFilterFactory.class)
.put("soranistem", StemmerTokenFilterFactory.class)
.put("spanishlightstem", StemmerTokenFilterFactory.class)
.put("standard", StandardTokenFilterFactory.class)
.put("stemmeroverride", StemmerOverrideTokenFilterFactory.class)
.put("stop", StopTokenFilterFactory.class)
.put("swedishlightstem", StemmerTokenFilterFactory.class)
.put("synonym", SynonymTokenFilterFactory.class)
.put("trim", TrimTokenFilterFactory.class)
.put("truncate", TruncateTokenFilterFactory.class)
.put("turkishlowercase", LowerCaseTokenFilterFactory.class)
.put("type", KeepTypesFilterFactory.class)
.put("uppercase", UpperCaseTokenFilterFactory.class)
.put("worddelimiter", WordDelimiterTokenFilterFactory.class)
// TODO: these tokenfilters are not yet exposed: useful?
// suggest stop
.put("suggeststop", Void.class)
// capitalizes tokens
.put("capitalization", Void.class)
// like length filter (but codepoints)
.put("codepointcount", Void.class)
// puts hyphenated words back together
.put("hyphenatedwords", Void.class)
// repeats anything marked as keyword
.put("keywordrepeat", Void.class)
// like limittokencount, but by offset
.put("limittokenoffset", Void.class)
// like limittokencount, but by position
.put("limittokenposition", Void.class)
// ???
.put("numericpayload", Void.class)
// removes duplicates at the same position (this should be used by the existing factory)
.put("removeduplicates", Void.class)
// ???
.put("tokenoffsetpayload", Void.class)
// puts the type into the payload
.put("typeaspayload", Void.class)
// fingerprint
.put("fingerprint", Void.class)
// for tee-sinks
.put("daterecognizer", Void.class)
.immutableMap();
static final Map<String,Class<?>> KNOWN_CHARFILTERS = new MapBuilder<String,Class<?>>()
// exposed in ES
.put("htmlstrip", HtmlStripCharFilterFactory.class)
.put("mapping", MappingCharFilterFactory.class)
.put("patternreplace", PatternReplaceCharFilterFactory.class)
// TODO: these charfilters are not yet exposed: useful?
// handling of zwnj for persian
.put("persian", Void.class)
.immutableMap();
protected Map<String, Class<?>> getTokenizers() {
return KNOWN_TOKENIZERS;
}
protected Map<String, Class<?>> getTokenFilters() {
return KNOWN_TOKENFILTERS;
}
protected Map<String, Class<?>> getCharFilters() {
return KNOWN_CHARFILTERS;
}
public void testTokenizers() {
Set<String> missing = new TreeSet<String>(org.apache.lucene.analysis.util.TokenizerFactory.availableTokenizers());
missing.removeAll(getTokenizers().keySet());
assertTrue("new tokenizers found, please update KNOWN_TOKENIZERS: " + missing.toString(), missing.isEmpty());
}
public void testCharFilters() {
Set<String> missing = new TreeSet<String>(org.apache.lucene.analysis.util.CharFilterFactory.availableCharFilters());
missing.removeAll(getCharFilters().keySet());
assertTrue("new charfilters found, please update KNOWN_CHARFILTERS: " + missing.toString(), missing.isEmpty());
}
public void testTokenFilters() {
Set<String> missing = new TreeSet<String>(org.apache.lucene.analysis.util.TokenFilterFactory.availableTokenFilters());
missing.removeAll(getTokenFilters().keySet());
assertTrue("new tokenfilters found, please update KNOWN_TOKENFILTERS: " + missing.toString(), missing.isEmpty());
}
public void testMultiTermAware() {
Collection<Class<?>> expected = new HashSet<>();
for (Map.Entry<String, Class<?>> entry : getTokenizers().entrySet()) {
if (org.apache.lucene.analysis.util.MultiTermAwareComponent.class.isAssignableFrom(
org.apache.lucene.analysis.util.TokenizerFactory.lookupClass(entry.getKey()))) {
expected.add(entry.getValue());
}
}
for (Map.Entry<String, Class<?>> entry : getTokenFilters().entrySet()) {
if (org.apache.lucene.analysis.util.MultiTermAwareComponent.class.isAssignableFrom(
org.apache.lucene.analysis.util.TokenFilterFactory.lookupClass(entry.getKey()))) {
expected.add(entry.getValue());
}
}
for (Map.Entry<String, Class<?>> entry : getCharFilters().entrySet()) {
if (org.apache.lucene.analysis.util.MultiTermAwareComponent.class.isAssignableFrom(
org.apache.lucene.analysis.util.CharFilterFactory.lookupClass(entry.getKey()))) {
expected.add(entry.getValue());
}
}
expected.remove(Void.class);
expected.remove(Deprecated.class);
Collection<Class<?>> actual = new HashSet<>();
for (Class<?> clazz : getTokenizers().values()) {
if (MultiTermAwareComponent.class.isAssignableFrom(clazz)) {
actual.add(clazz);
}
}
for (Class<?> clazz : getTokenFilters().values()) {
if (MultiTermAwareComponent.class.isAssignableFrom(clazz)) {
actual.add(clazz);
}
}
for (Class<?> clazz : getCharFilters().values()) {
if (MultiTermAwareComponent.class.isAssignableFrom(clazz)) {
actual.add(clazz);
}
}
Set<Class<?>> classesMissingMultiTermSupport = new HashSet<>(expected);
classesMissingMultiTermSupport.removeAll(actual);
assertTrue("Classes are missing multi-term support: " + classesMissingMultiTermSupport,
classesMissingMultiTermSupport.isEmpty());
Set<Class<?>> classesThatShouldNotHaveMultiTermSupport = new HashSet<>(actual);
classesThatShouldNotHaveMultiTermSupport.removeAll(expected);
assertTrue("Classes should not have multi-term support: " + classesThatShouldNotHaveMultiTermSupport,
classesThatShouldNotHaveMultiTermSupport.isEmpty());
}
}