Move more token filters to analysis-common module
The following token filters were moved: common grams, limit token, pattern capture and pattern raplace. Relates to #23658
This commit is contained in:
parent
d71feceb23
commit
6db708ef75
|
@ -266,7 +266,6 @@
|
|||
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]MergePolicyConfig.java" checks="LineLength" />
|
||||
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]SearchSlowLog.java" checks="LineLength" />
|
||||
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]AnalysisRegistry.java" checks="LineLength" />
|
||||
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]CommonGramsTokenFilterFactory.java" checks="LineLength" />
|
||||
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]CustomAnalyzerProvider.java" checks="LineLength" />
|
||||
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]ShingleTokenFilterFactory.java" checks="LineLength" />
|
||||
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]StemmerOverrideTokenFilterFactory.java" checks="LineLength" />
|
||||
|
@ -564,9 +563,7 @@
|
|||
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]IndexingSlowLogTests.java" checks="LineLength" />
|
||||
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]MergePolicySettingsTests.java" checks="LineLength" />
|
||||
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]SearchSlowLogTests.java" checks="LineLength" />
|
||||
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]PatternCaptureTokenFilterTests.java" checks="LineLength" />
|
||||
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]PreBuiltAnalyzerTests.java" checks="LineLength" />
|
||||
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]commongrams[/\\]CommonGramsTokenFilterFactoryTests.java" checks="LineLength" />
|
||||
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]engine[/\\]InternalEngineMergeIT.java" checks="LineLength" />
|
||||
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]engine[/\\]InternalEngineTests.java" checks="LineLength" />
|
||||
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]fielddata[/\\]AbstractFieldDataTestCase.java" checks="LineLength" />
|
||||
|
|
|
@ -46,7 +46,6 @@ import org.elasticsearch.index.analysis.ChineseAnalyzerProvider;
|
|||
import org.elasticsearch.index.analysis.CjkAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.ClassicFilterFactory;
|
||||
import org.elasticsearch.index.analysis.ClassicTokenizerFactory;
|
||||
import org.elasticsearch.index.analysis.CommonGramsTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.CzechAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.CzechStemTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.DanishAnalyzerProvider;
|
||||
|
@ -80,7 +79,6 @@ import org.elasticsearch.index.analysis.KeywordAnalyzerProvider;
|
|||
import org.elasticsearch.index.analysis.KeywordTokenizerFactory;
|
||||
import org.elasticsearch.index.analysis.LatvianAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.LetterTokenizerFactory;
|
||||
import org.elasticsearch.index.analysis.LimitTokenCountFilterFactory;
|
||||
import org.elasticsearch.index.analysis.LithuanianAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.LowerCaseTokenizerFactory;
|
||||
import org.elasticsearch.index.analysis.MinHashTokenFilterFactory;
|
||||
|
@ -88,8 +86,6 @@ import org.elasticsearch.index.analysis.NGramTokenizerFactory;
|
|||
import org.elasticsearch.index.analysis.NorwegianAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.PathHierarchyTokenizerFactory;
|
||||
import org.elasticsearch.index.analysis.PatternAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.PatternCaptureGroupTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.PatternReplaceTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.PatternTokenizerFactory;
|
||||
import org.elasticsearch.index.analysis.PersianAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.PersianNormalizationFilterFactory;
|
||||
|
@ -196,13 +192,9 @@ public final class AnalysisModule {
|
|||
tokenFilters.register("standard", StandardTokenFilterFactory::new);
|
||||
tokenFilters.register("shingle", ShingleTokenFilterFactory::new);
|
||||
tokenFilters.register("min_hash", MinHashTokenFilterFactory::new);
|
||||
tokenFilters.register("limit", LimitTokenCountFilterFactory::new);
|
||||
tokenFilters.register("common_grams", requriesAnalysisSettings(CommonGramsTokenFilterFactory::new));
|
||||
tokenFilters.register("delimited_payload_filter", DelimitedPayloadTokenFilterFactory::new);
|
||||
tokenFilters.register("keep", requriesAnalysisSettings(KeepWordFilterFactory::new));
|
||||
tokenFilters.register("keep_types", requriesAnalysisSettings(KeepTypesFilterFactory::new));
|
||||
tokenFilters.register("pattern_capture", requriesAnalysisSettings(PatternCaptureGroupTokenFilterFactory::new));
|
||||
tokenFilters.register("pattern_replace", requriesAnalysisSettings(PatternReplaceTokenFilterFactory::new));
|
||||
tokenFilters.register("arabic_stem", ArabicStemTokenFilterFactory::new);
|
||||
tokenFilters.register("brazilian_stem", BrazilianStemTokenFilterFactory::new);
|
||||
tokenFilters.register("czech_stem", CzechStemTokenFilterFactory::new);
|
||||
|
|
|
@ -68,7 +68,6 @@ import org.apache.lucene.analysis.util.ElisionFilter;
|
|||
import org.elasticsearch.index.analysis.CharFilterFactory;
|
||||
import org.elasticsearch.index.analysis.DelimitedPayloadTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.HtmlStripCharFilterFactory;
|
||||
import org.elasticsearch.index.analysis.LimitTokenCountFilterFactory;
|
||||
import org.elasticsearch.index.analysis.PreConfiguredCharFilter;
|
||||
import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
|
||||
import org.elasticsearch.index.analysis.PreConfiguredTokenizer;
|
||||
|
@ -115,6 +114,10 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin {
|
|||
filters.put("reverse", ReverseTokenFilterFactory::new);
|
||||
filters.put("elision", ElisionTokenFilterFactory::new);
|
||||
filters.put("truncate", requriesAnalysisSettings(TruncateTokenFilterFactory::new));
|
||||
filters.put("limit", LimitTokenCountFilterFactory::new);
|
||||
filters.put("common_grams", requriesAnalysisSettings(CommonGramsTokenFilterFactory::new));
|
||||
filters.put("pattern_replace", requriesAnalysisSettings(PatternReplaceTokenFilterFactory::new));
|
||||
filters.put("pattern_capture", requriesAnalysisSettings(PatternCaptureGroupTokenFilterFactory::new));
|
||||
return filters;
|
||||
}
|
||||
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
package org.elasticsearch.analysis.common;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
@ -26,6 +26,8 @@ import org.apache.lucene.analysis.commongrams.CommonGramsQueryFilter;
|
|||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.Analysis;
|
||||
|
||||
public class CommonGramsTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||
|
||||
|
@ -35,14 +37,17 @@ public class CommonGramsTokenFilterFactory extends AbstractTokenFilterFactory {
|
|||
|
||||
private final boolean queryMode;
|
||||
|
||||
public CommonGramsTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
||||
CommonGramsTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
||||
super(indexSettings, name, settings);
|
||||
this.ignoreCase = settings.getAsBooleanLenientForPreEs6Indices(indexSettings.getIndexVersionCreated(), "ignore_case", false, deprecationLogger);
|
||||
this.queryMode = settings.getAsBooleanLenientForPreEs6Indices(indexSettings.getIndexVersionCreated(), "query_mode", false, deprecationLogger);
|
||||
this.ignoreCase = settings.getAsBooleanLenientForPreEs6Indices(indexSettings.getIndexVersionCreated(),
|
||||
"ignore_case", false, deprecationLogger);
|
||||
this.queryMode = settings.getAsBooleanLenientForPreEs6Indices(indexSettings.getIndexVersionCreated(),
|
||||
"query_mode", false, deprecationLogger);
|
||||
this.words = Analysis.parseCommonWords(env, settings, null, ignoreCase);
|
||||
|
||||
if (this.words == null) {
|
||||
throw new IllegalArgumentException("missing or empty [common_words] or [common_words_path] configuration for common_grams token filter");
|
||||
throw new IllegalArgumentException(
|
||||
"missing or empty [common_words] or [common_words_path] configuration for common_grams token filter");
|
||||
}
|
||||
}
|
||||
|
|
@ -17,23 +17,24 @@
|
|||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
package org.elasticsearch.analysis.common;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.miscellaneous.LimitTokenCountFilter;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
|
||||
|
||||
public class LimitTokenCountFilterFactory extends AbstractTokenFilterFactory {
|
||||
|
||||
public static final int DEFAULT_MAX_TOKEN_COUNT = 1;
|
||||
public static final boolean DEFAULT_CONSUME_ALL_TOKENS = false;
|
||||
static final int DEFAULT_MAX_TOKEN_COUNT = 1;
|
||||
static final boolean DEFAULT_CONSUME_ALL_TOKENS = false;
|
||||
|
||||
final int maxTokenCount;
|
||||
final boolean consumeAllTokens;
|
||||
private final int maxTokenCount;
|
||||
private final boolean consumeAllTokens;
|
||||
|
||||
public LimitTokenCountFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
||||
LimitTokenCountFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
||||
super(indexSettings, name, settings);
|
||||
this.maxTokenCount = settings.getAsInt("max_token_count", DEFAULT_MAX_TOKEN_COUNT);
|
||||
this.consumeAllTokens = settings.getAsBooleanLenientForPreEs6Indices(
|
|
@ -16,7 +16,7 @@
|
|||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.index.analysis;
|
||||
package org.elasticsearch.analysis.common;
|
||||
|
||||
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
|
@ -25,6 +25,7 @@ import org.apache.lucene.analysis.pattern.PatternCaptureGroupTokenFilter;
|
|||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
|
||||
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
|
@ -34,7 +35,7 @@ public class PatternCaptureGroupTokenFilterFactory extends AbstractTokenFilterFa
|
|||
private static final String PATTERNS_KEY = "patterns";
|
||||
private static final String PRESERVE_ORIG_KEY = "preserve_original";
|
||||
|
||||
public PatternCaptureGroupTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||
PatternCaptureGroupTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||
super(indexSettings, name, settings);
|
||||
String[] regexes = settings.getAsArray(PATTERNS_KEY, null, false);
|
||||
if (regexes == null) {
|
|
@ -35,7 +35,7 @@ public class PatternReplaceCharFilterFactory extends AbstractCharFilterFactory i
|
|||
private final Pattern pattern;
|
||||
private final String replacement;
|
||||
|
||||
public PatternReplaceCharFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
||||
PatternReplaceCharFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
||||
super(indexSettings, name);
|
||||
|
||||
String sPattern = settings.get("pattern");
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
package org.elasticsearch.analysis.common;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.pattern.PatternReplaceFilter;
|
||||
|
@ -25,6 +25,7 @@ import org.elasticsearch.common.regex.Regex;
|
|||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
|
||||
|
||||
import java.util.regex.Pattern;
|
||||
|
|
@ -101,6 +101,11 @@ public class CommonAnalysisFactoryTests extends AnalysisFactoryTestCase {
|
|||
filters.put("reversestring", ReverseTokenFilterFactory.class);
|
||||
filters.put("elision", ElisionTokenFilterFactory.class);
|
||||
filters.put("truncate", TruncateTokenFilterFactory.class);
|
||||
filters.put("limittokencount", LimitTokenCountFilterFactory.class);
|
||||
filters.put("commongrams", CommonGramsTokenFilterFactory.class);
|
||||
filters.put("commongramsquery", CommonGramsTokenFilterFactory.class);
|
||||
filters.put("patternreplace", PatternReplaceTokenFilterFactory.class);
|
||||
filters.put("patterncapturegroup", PatternCaptureGroupTokenFilterFactory.class);
|
||||
return filters;
|
||||
}
|
||||
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis.commongrams;
|
||||
package org.elasticsearch.analysis.common;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
|
@ -60,7 +60,7 @@ public class CommonGramsTokenFilterFactoryTests extends ESTokenStreamTestCase {
|
|||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||
.build();
|
||||
|
||||
ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
|
||||
ESTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings);
|
||||
{
|
||||
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("common_grams_default");
|
||||
String source = "the quick brown is a fox Or noT";
|
||||
|
@ -77,7 +77,7 @@ public class CommonGramsTokenFilterFactoryTests extends ESTokenStreamTestCase {
|
|||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||
.putArray("index.analysis.filter.common_grams_default.common_words", "chromosome", "protein")
|
||||
.build();
|
||||
ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
|
||||
ESTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings);
|
||||
{
|
||||
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("common_grams_default");
|
||||
String source = "the quick brown is a fox Or noT";
|
||||
|
@ -96,10 +96,11 @@ public class CommonGramsTokenFilterFactoryTests extends ESTokenStreamTestCase {
|
|||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||
.putArray("index.analysis.filter.common_grams_1.common_words", "the", "Or", "Not", "a", "is", "an", "they", "are")
|
||||
.build();
|
||||
ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
|
||||
ESTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings);
|
||||
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("common_grams_1");
|
||||
String source = "the quick brown is a fox or noT";
|
||||
String[] expected = new String[] { "the", "the_quick", "quick", "brown", "brown_is", "is", "is_a", "a", "a_fox", "fox", "fox_or", "or", "or_noT", "noT" };
|
||||
String[] expected = new String[] { "the", "the_quick", "quick", "brown", "brown_is", "is", "is_a", "a",
|
||||
"a_fox", "fox", "fox_or", "or", "or_noT", "noT" };
|
||||
Tokenizer tokenizer = new WhitespaceTokenizer();
|
||||
tokenizer.setReader(new StringReader(source));
|
||||
assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
|
||||
|
@ -110,10 +111,11 @@ public class CommonGramsTokenFilterFactoryTests extends ESTokenStreamTestCase {
|
|||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||
.putArray("index.analysis.filter.common_grams_2.common_words", "the", "Or", "noT", "a", "is", "an", "they", "are")
|
||||
.build();
|
||||
ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
|
||||
ESTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings);
|
||||
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("common_grams_2");
|
||||
String source = "the quick brown is a fox or why noT";
|
||||
String[] expected = new String[] { "the", "the_quick", "quick", "brown", "brown_is", "is", "is_a", "a", "a_fox", "fox", "or", "why", "why_noT", "noT" };
|
||||
String[] expected = new String[] { "the", "the_quick", "quick", "brown", "brown_is", "is", "is_a", "a", "" +
|
||||
"a_fox", "fox", "or", "why", "why_noT", "noT" };
|
||||
Tokenizer tokenizer = new WhitespaceTokenizer();
|
||||
tokenizer.setReader(new StringReader(source));
|
||||
assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
|
||||
|
@ -123,10 +125,11 @@ public class CommonGramsTokenFilterFactoryTests extends ESTokenStreamTestCase {
|
|||
.putArray("index.analysis.filter.common_grams_3.common_words", "the", "or", "not", "a", "is", "an", "they", "are")
|
||||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||
.build();
|
||||
ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
|
||||
ESTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings);
|
||||
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("common_grams_3");
|
||||
String source = "the quick brown is a fox Or noT";
|
||||
String[] expected = new String[] { "the", "the_quick", "quick", "brown", "brown_is", "is", "is_a", "a", "a_fox", "fox", "Or", "noT" };
|
||||
String[] expected = new String[] { "the", "the_quick", "quick", "brown", "brown_is", "is", "is_a", "a",
|
||||
"a_fox", "fox", "Or", "noT" };
|
||||
Tokenizer tokenizer = new WhitespaceTokenizer();
|
||||
tokenizer.setReader(new StringReader(source));
|
||||
assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
|
||||
|
@ -134,25 +137,27 @@ public class CommonGramsTokenFilterFactoryTests extends ESTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public void testCommonGramsAnalysis() throws IOException {
|
||||
String json = "/org/elasticsearch/index/analysis/commongrams/commongrams.json";
|
||||
String json = "/org/elasticsearch/analysis/common/commongrams.json";
|
||||
Settings settings = Settings.builder()
|
||||
.loadFromStream(json, getClass().getResourceAsStream(json))
|
||||
.put(Environment.PATH_HOME_SETTING.getKey(), createHome())
|
||||
.build();
|
||||
{
|
||||
IndexAnalyzers indexAnalyzers = AnalysisTestsHelper.createTestAnalysisFromSettings(settings)
|
||||
IndexAnalyzers indexAnalyzers = createTestAnalysisFromSettings(settings)
|
||||
.indexAnalyzers;
|
||||
Analyzer analyzer = indexAnalyzers.get("commongramsAnalyzer").analyzer();
|
||||
String source = "the quick brown is a fox or not";
|
||||
String[] expected = new String[] { "the", "quick", "quick_brown", "brown", "brown_is", "is", "a", "a_fox", "fox", "fox_or", "or", "not" };
|
||||
String[] expected = new String[] { "the", "quick", "quick_brown", "brown", "brown_is", "is", "a", "a_fox",
|
||||
"fox", "fox_or", "or", "not" };
|
||||
assertTokenStreamContents(analyzer.tokenStream("test", source), expected);
|
||||
}
|
||||
{
|
||||
IndexAnalyzers indexAnalyzers = AnalysisTestsHelper.createTestAnalysisFromSettings(settings)
|
||||
IndexAnalyzers indexAnalyzers = createTestAnalysisFromSettings(settings)
|
||||
.indexAnalyzers;
|
||||
Analyzer analyzer = indexAnalyzers.get("commongramsAnalyzer_file").analyzer();
|
||||
String source = "the quick brown is a fox or not";
|
||||
String[] expected = new String[] { "the", "quick", "quick_brown", "brown", "brown_is", "is", "a", "a_fox", "fox", "fox_or", "or", "not" };
|
||||
String[] expected = new String[] { "the", "quick", "quick_brown", "brown", "brown_is", "is", "a", "a_fox",
|
||||
"fox", "fox_or", "or", "not" };
|
||||
assertTokenStreamContents(analyzer.tokenStream("test", source), expected);
|
||||
}
|
||||
}
|
||||
|
@ -165,7 +170,7 @@ public class CommonGramsTokenFilterFactoryTests extends ESTokenStreamTestCase {
|
|||
.put("index.analysis.filter.common_grams_1.ignore_case", true)
|
||||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||
.build();
|
||||
ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
|
||||
ESTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings);
|
||||
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("common_grams_1");
|
||||
String source = "the quick brown is a fox or noT";
|
||||
String[] expected = new String[] { "the_quick", "quick", "brown_is", "is_a", "a_fox", "fox_or", "or_noT" };
|
||||
|
@ -180,7 +185,7 @@ public class CommonGramsTokenFilterFactoryTests extends ESTokenStreamTestCase {
|
|||
.put("index.analysis.filter.common_grams_2.ignore_case", false)
|
||||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||
.build();
|
||||
ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
|
||||
ESTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings);
|
||||
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("common_grams_2");
|
||||
String source = "the quick brown is a fox or why noT";
|
||||
String[] expected = new String[] { "the_quick", "quick", "brown_is", "is_a", "a_fox", "fox", "or", "why_noT" };
|
||||
|
@ -194,7 +199,7 @@ public class CommonGramsTokenFilterFactoryTests extends ESTokenStreamTestCase {
|
|||
.putArray("index.analysis.filter.common_grams_3.common_words", "the", "Or", "noT", "a", "is", "an", "they", "are")
|
||||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||
.build();
|
||||
ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
|
||||
ESTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings);
|
||||
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("common_grams_3");
|
||||
String source = "the quick brown is a fox or why noT";
|
||||
String[] expected = new String[] { "the_quick", "quick", "brown_is", "is_a", "a_fox", "fox", "or", "why_noT" };
|
||||
|
@ -208,7 +213,7 @@ public class CommonGramsTokenFilterFactoryTests extends ESTokenStreamTestCase {
|
|||
.putArray("index.analysis.filter.common_grams_4.common_words", "the", "or", "not", "a", "is", "an", "they", "are")
|
||||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||
.build();
|
||||
ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
|
||||
ESTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings);
|
||||
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("common_grams_4");
|
||||
String source = "the quick brown is a fox Or noT";
|
||||
String[] expected = new String[] { "the_quick", "quick", "brown_is", "is_a", "a_fox", "fox", "Or", "noT" };
|
||||
|
@ -219,13 +224,13 @@ public class CommonGramsTokenFilterFactoryTests extends ESTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public void testQueryModeCommonGramsAnalysis() throws IOException {
|
||||
String json = "/org/elasticsearch/index/analysis/commongrams/commongrams_query_mode.json";
|
||||
String json = "/org/elasticsearch/analysis/common/commongrams_query_mode.json";
|
||||
Settings settings = Settings.builder()
|
||||
.loadFromStream(json, getClass().getResourceAsStream(json))
|
||||
.put(Environment.PATH_HOME_SETTING.getKey(), createHome())
|
||||
.build();
|
||||
{
|
||||
IndexAnalyzers indexAnalyzers = AnalysisTestsHelper.createTestAnalysisFromSettings(settings)
|
||||
IndexAnalyzers indexAnalyzers = createTestAnalysisFromSettings(settings)
|
||||
.indexAnalyzers;
|
||||
Analyzer analyzer = indexAnalyzers.get("commongramsAnalyzer").analyzer();
|
||||
String source = "the quick brown is a fox or not";
|
||||
|
@ -233,7 +238,7 @@ public class CommonGramsTokenFilterFactoryTests extends ESTokenStreamTestCase {
|
|||
assertTokenStreamContents(analyzer.tokenStream("test", source), expected);
|
||||
}
|
||||
{
|
||||
IndexAnalyzers indexAnalyzers = AnalysisTestsHelper.createTestAnalysisFromSettings(settings)
|
||||
IndexAnalyzers indexAnalyzers = createTestAnalysisFromSettings(settings)
|
||||
.indexAnalyzers;
|
||||
Analyzer analyzer = indexAnalyzers.get("commongramsAnalyzer_file").analyzer();
|
||||
String source = "the quick brown is a fox or not";
|
||||
|
@ -251,4 +256,8 @@ public class CommonGramsTokenFilterFactoryTests extends ESTokenStreamTestCase {
|
|||
return home;
|
||||
}
|
||||
|
||||
private static ESTestCase.TestAnalysis createTestAnalysisFromSettings(Settings settings) throws IOException {
|
||||
return AnalysisTestsHelper.createTestAnalysisFromSettings(settings, new CommonAnalysisPlugin());
|
||||
}
|
||||
|
||||
}
|
|
@ -17,12 +17,14 @@
|
|||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
package org.elasticsearch.analysis.common;
|
||||
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.analysis.AnalysisTestsHelper;
|
||||
import org.elasticsearch.index.analysis.TokenFilterFactory;
|
||||
import org.elasticsearch.test.ESTestCase;
|
||||
import org.elasticsearch.test.ESTokenStreamTestCase;
|
||||
|
||||
|
@ -35,7 +37,7 @@ public class LimitTokenCountFilterFactoryTests extends ESTokenStreamTestCase {
|
|||
.put("index.analysis.filter.limit_default.type", "limit")
|
||||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||
.build();
|
||||
ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
|
||||
ESTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings);
|
||||
{
|
||||
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("limit_default");
|
||||
String source = "the quick brown fox";
|
||||
|
@ -62,7 +64,7 @@ public class LimitTokenCountFilterFactoryTests extends ESTokenStreamTestCase {
|
|||
.put("index.analysis.filter.limit_1.consume_all_tokens", true)
|
||||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||
.build();
|
||||
ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
|
||||
ESTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings);
|
||||
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("limit_1");
|
||||
String source = "the quick brown fox";
|
||||
String[] expected = new String[] { "the", "quick", "brown" };
|
||||
|
@ -77,7 +79,7 @@ public class LimitTokenCountFilterFactoryTests extends ESTokenStreamTestCase {
|
|||
.put("index.analysis.filter.limit_1.consume_all_tokens", false)
|
||||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||
.build();
|
||||
ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
|
||||
ESTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings);
|
||||
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("limit_1");
|
||||
String source = "the quick brown fox";
|
||||
String[] expected = new String[] { "the", "quick", "brown" };
|
||||
|
@ -93,7 +95,7 @@ public class LimitTokenCountFilterFactoryTests extends ESTokenStreamTestCase {
|
|||
.put("index.analysis.filter.limit_1.consume_all_tokens", true)
|
||||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||
.build();
|
||||
ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
|
||||
ESTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings);
|
||||
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("limit_1");
|
||||
String source = "the quick brown fox";
|
||||
String[] expected = new String[] { "the", "quick", "brown", "fox" };
|
||||
|
@ -103,4 +105,8 @@ public class LimitTokenCountFilterFactoryTests extends ESTokenStreamTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
private static ESTestCase.TestAnalysis createTestAnalysisFromSettings(Settings settings) throws IOException {
|
||||
return AnalysisTestsHelper.createTestAnalysisFromSettings(settings, new CommonAnalysisPlugin());
|
||||
}
|
||||
|
||||
}
|
|
@ -17,13 +17,15 @@
|
|||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
package org.elasticsearch.analysis.common;
|
||||
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.IndexAnalyzers;
|
||||
import org.elasticsearch.index.analysis.NamedAnalyzer;
|
||||
import org.elasticsearch.test.ESTokenStreamTestCase;
|
||||
import org.elasticsearch.test.IndexSettingsModule;
|
||||
|
||||
|
@ -32,7 +34,7 @@ import static org.hamcrest.Matchers.containsString;
|
|||
|
||||
public class PatternCaptureTokenFilterTests extends ESTokenStreamTestCase {
|
||||
public void testPatternCaptureTokenFilter() throws Exception {
|
||||
String json = "/org/elasticsearch/index/analysis/pattern_capture.json";
|
||||
String json = "/org/elasticsearch/analysis/common/pattern_capture.json";
|
||||
Settings settings = Settings.builder()
|
||||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
|
||||
.loadFromStream(json, getClass().getResourceAsStream(json))
|
||||
|
@ -40,7 +42,7 @@ public class PatternCaptureTokenFilterTests extends ESTokenStreamTestCase {
|
|||
.build();
|
||||
|
||||
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
|
||||
IndexAnalyzers indexAnalyzers = createTestAnalysis(idxSettings, settings).indexAnalyzers;
|
||||
IndexAnalyzers indexAnalyzers = createTestAnalysis(idxSettings, settings, new CommonAnalysisPlugin()).indexAnalyzers;
|
||||
NamedAnalyzer analyzer1 = indexAnalyzers.get("single");
|
||||
|
||||
assertTokenStreamContents(analyzer1.tokenStream("test", "foobarbaz"), new String[]{"foobarbaz","foobar","foo"});
|
||||
|
@ -56,7 +58,8 @@ public class PatternCaptureTokenFilterTests extends ESTokenStreamTestCase {
|
|||
|
||||
public void testNoPatterns() {
|
||||
try {
|
||||
new PatternCaptureGroupTokenFilterFactory(IndexSettingsModule.newIndexSettings("test", Settings.EMPTY), null, "pattern_capture", Settings.builder().put("pattern", "foobar").build());
|
||||
new PatternCaptureGroupTokenFilterFactory(IndexSettingsModule.newIndexSettings("test", Settings.EMPTY), null,
|
||||
"pattern_capture", Settings.builder().put("pattern", "foobar").build());
|
||||
fail ("Expected IllegalArgumentException");
|
||||
} catch (IllegalArgumentException e) {
|
||||
assertThat(e.getMessage(), containsString("required setting 'patterns' is missing"));
|
|
@ -568,3 +568,98 @@
|
|||
filter: [my_truncate]
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: foo }
|
||||
|
||||
---
|
||||
"pattern_capture":
|
||||
- do:
|
||||
indices.create:
|
||||
index: test
|
||||
body:
|
||||
settings:
|
||||
analysis:
|
||||
filter:
|
||||
my_pattern_capture:
|
||||
type: pattern_capture
|
||||
preserve_original: false
|
||||
patterns: ["([^@]+)"]
|
||||
- do:
|
||||
indices.analyze:
|
||||
index: test
|
||||
body:
|
||||
text: foo@bar.baz
|
||||
tokenizer: keyword
|
||||
filter: [my_pattern_capture]
|
||||
- length: { tokens: 2 }
|
||||
- match: { tokens.0.token: foo }
|
||||
- match: { tokens.1.token: bar.baz }
|
||||
|
||||
---
|
||||
"pattern_replace":
|
||||
- do:
|
||||
indices.create:
|
||||
index: test
|
||||
body:
|
||||
settings:
|
||||
analysis:
|
||||
filter:
|
||||
my_pattern_replace:
|
||||
type: pattern_replace
|
||||
pattern: a
|
||||
replacement: b
|
||||
- do:
|
||||
indices.analyze:
|
||||
index: test
|
||||
body:
|
||||
text: a
|
||||
tokenizer: keyword
|
||||
filter: [my_pattern_replace]
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: b }
|
||||
|
||||
---
|
||||
"limit_count":
|
||||
- do:
|
||||
indices.create:
|
||||
index: test
|
||||
body:
|
||||
settings:
|
||||
analysis:
|
||||
filter:
|
||||
my_limit:
|
||||
type: limit
|
||||
max_token_count: 2
|
||||
- do:
|
||||
indices.analyze:
|
||||
index: test
|
||||
body:
|
||||
text: a b c
|
||||
tokenizer: whitespace
|
||||
filter: [my_limit]
|
||||
- length: { tokens: 2 }
|
||||
- match: { tokens.0.token: a }
|
||||
- match: { tokens.1.token: b }
|
||||
|
||||
---
|
||||
"common_grams":
|
||||
- do:
|
||||
indices.create:
|
||||
index: test
|
||||
body:
|
||||
settings:
|
||||
analysis:
|
||||
filter:
|
||||
my_limit:
|
||||
type: common_grams
|
||||
common_words: [a]
|
||||
- do:
|
||||
indices.analyze:
|
||||
index: test
|
||||
body:
|
||||
text: a b c
|
||||
tokenizer: whitespace
|
||||
filter: [my_limit]
|
||||
- length: { tokens: 4 }
|
||||
- match: { tokens.0.token: a }
|
||||
- match: { tokens.1.token: a_b }
|
||||
- match: { tokens.2.token: b }
|
||||
- match: { tokens.3.token: c }
|
||||
|
|
|
@ -31,7 +31,6 @@ import org.elasticsearch.index.analysis.CJKBigramFilterFactory;
|
|||
import org.elasticsearch.index.analysis.CJKWidthFilterFactory;
|
||||
import org.elasticsearch.index.analysis.ClassicFilterFactory;
|
||||
import org.elasticsearch.index.analysis.ClassicTokenizerFactory;
|
||||
import org.elasticsearch.index.analysis.CommonGramsTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.CzechStemTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.DecimalDigitFilterFactory;
|
||||
import org.elasticsearch.index.analysis.DelimitedPayloadTokenFilterFactory;
|
||||
|
@ -45,14 +44,11 @@ import org.elasticsearch.index.analysis.KeepTypesFilterFactory;
|
|||
import org.elasticsearch.index.analysis.KeepWordFilterFactory;
|
||||
import org.elasticsearch.index.analysis.KeywordTokenizerFactory;
|
||||
import org.elasticsearch.index.analysis.LetterTokenizerFactory;
|
||||
import org.elasticsearch.index.analysis.LimitTokenCountFilterFactory;
|
||||
import org.elasticsearch.index.analysis.LowerCaseTokenizerFactory;
|
||||
import org.elasticsearch.index.analysis.MinHashTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
|
||||
import org.elasticsearch.index.analysis.NGramTokenizerFactory;
|
||||
import org.elasticsearch.index.analysis.PathHierarchyTokenizerFactory;
|
||||
import org.elasticsearch.index.analysis.PatternCaptureGroupTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.PatternReplaceTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.PatternTokenizerFactory;
|
||||
import org.elasticsearch.index.analysis.PersianNormalizationFilterFactory;
|
||||
import org.elasticsearch.index.analysis.PreConfiguredCharFilter;
|
||||
|
@ -143,8 +139,8 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
|
|||
.put("cjkbigram", CJKBigramFilterFactory.class)
|
||||
.put("cjkwidth", CJKWidthFilterFactory.class)
|
||||
.put("classic", ClassicFilterFactory.class)
|
||||
.put("commongrams", CommonGramsTokenFilterFactory.class)
|
||||
.put("commongramsquery", CommonGramsTokenFilterFactory.class)
|
||||
.put("commongrams", MovedToAnalysisCommon.class)
|
||||
.put("commongramsquery", MovedToAnalysisCommon.class)
|
||||
.put("czechstem", CzechStemTokenFilterFactory.class)
|
||||
.put("decimaldigit", DecimalDigitFilterFactory.class)
|
||||
.put("delimitedpayload", DelimitedPayloadTokenFilterFactory.class)
|
||||
|
@ -178,13 +174,13 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
|
|||
.put("kstem", MovedToAnalysisCommon.class)
|
||||
.put("latvianstem", MovedToAnalysisCommon.class)
|
||||
.put("length", MovedToAnalysisCommon.class)
|
||||
.put("limittokencount", LimitTokenCountFilterFactory.class)
|
||||
.put("limittokencount", MovedToAnalysisCommon.class)
|
||||
.put("lowercase", MovedToAnalysisCommon.class)
|
||||
.put("ngram", MovedToAnalysisCommon.class)
|
||||
.put("norwegianlightstem", MovedToAnalysisCommon.class)
|
||||
.put("norwegianminimalstem", MovedToAnalysisCommon.class)
|
||||
.put("patterncapturegroup", PatternCaptureGroupTokenFilterFactory.class)
|
||||
.put("patternreplace", PatternReplaceTokenFilterFactory.class)
|
||||
.put("patterncapturegroup", MovedToAnalysisCommon.class)
|
||||
.put("patternreplace", MovedToAnalysisCommon.class)
|
||||
.put("persiannormalization", PersianNormalizationFilterFactory.class)
|
||||
.put("porterstem", MovedToAnalysisCommon.class)
|
||||
.put("portuguesestem", MovedToAnalysisCommon.class)
|
||||
|
|
Loading…
Reference in New Issue