diff --git a/docs/reference/analysis/tokenfilters.asciidoc b/docs/reference/analysis/tokenfilters.asciidoc index ad72fb70ccb..14a67b2fc04 100644 --- a/docs/reference/analysis/tokenfilters.asciidoc +++ b/docs/reference/analysis/tokenfilters.asciidoc @@ -17,6 +17,8 @@ include::tokenfilters/length-tokenfilter.asciidoc[] include::tokenfilters/lowercase-tokenfilter.asciidoc[] +include::tokenfilters/uppercase-tokenfilter.asciidoc[] + include::tokenfilters/ngram-tokenfilter.asciidoc[] include::tokenfilters/edgengram-tokenfilter.asciidoc[] diff --git a/docs/reference/analysis/tokenfilters/uppercase-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/uppercase-tokenfilter.asciidoc new file mode 100644 index 00000000000..639d1e91068 --- /dev/null +++ b/docs/reference/analysis/tokenfilters/uppercase-tokenfilter.asciidoc @@ -0,0 +1,5 @@ +[[analysis-uppercase-tokenfilter]] +=== Uppercase Token Filter + +A token filter of type `uppercase` that normalizes token text to upper +case. diff --git a/src/main/java/org/elasticsearch/index/analysis/AnalysisModule.java b/src/main/java/org/elasticsearch/index/analysis/AnalysisModule.java index fa5a8579c52..45b8e8f3d7a 100644 --- a/src/main/java/org/elasticsearch/index/analysis/AnalysisModule.java +++ b/src/main/java/org/elasticsearch/index/analysis/AnalysisModule.java @@ -426,6 +426,7 @@ public class AnalysisModule extends AbstractModule { tokenFiltersBindings.processTokenFilter("asciifolding", ASCIIFoldingTokenFilterFactory.class); tokenFiltersBindings.processTokenFilter("length", LengthTokenFilterFactory.class); tokenFiltersBindings.processTokenFilter("lowercase", LowerCaseTokenFilterFactory.class); + tokenFiltersBindings.processTokenFilter("uppercase", UpperCaseTokenFilterFactory.class); tokenFiltersBindings.processTokenFilter("porter_stem", PorterStemTokenFilterFactory.class); tokenFiltersBindings.processTokenFilter("kstem", KStemTokenFilterFactory.class); tokenFiltersBindings.processTokenFilter("standard", StandardTokenFilterFactory.class); diff --git a/src/main/java/org/elasticsearch/index/analysis/UpperCaseTokenFilterFactory.java b/src/main/java/org/elasticsearch/index/analysis/UpperCaseTokenFilterFactory.java new file mode 100644 index 00000000000..db94f906f93 --- /dev/null +++ b/src/main/java/org/elasticsearch/index/analysis/UpperCaseTokenFilterFactory.java @@ -0,0 +1,46 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.analysis; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.core.UpperCaseFilter; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.inject.assistedinject.Assisted; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.settings.IndexSettings; + +/** + * + */ +public class UpperCaseTokenFilterFactory extends AbstractTokenFilterFactory { + + @Inject + public UpperCaseTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) { + super(index, indexSettings, name, settings); + } + + @Override + public TokenStream create(TokenStream tokenStream) { + return new UpperCaseFilter(version, tokenStream); + } +} + + diff --git a/src/main/java/org/elasticsearch/indices/analysis/PreBuiltTokenFilters.java b/src/main/java/org/elasticsearch/indices/analysis/PreBuiltTokenFilters.java index ce195a8aa59..57fd297ada3 100644 --- a/src/main/java/org/elasticsearch/indices/analysis/PreBuiltTokenFilters.java +++ b/src/main/java/org/elasticsearch/indices/analysis/PreBuiltTokenFilters.java @@ -26,6 +26,7 @@ import org.apache.lucene.analysis.commongrams.CommonGramsFilter; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.StopAnalyzer; import org.apache.lucene.analysis.core.StopFilter; +import org.apache.lucene.analysis.core.UpperCaseFilter; import org.apache.lucene.analysis.cz.CzechStemFilter; import org.apache.lucene.analysis.de.GermanStemFilter; import org.apache.lucene.analysis.en.KStemFilter; @@ -117,6 +118,13 @@ public enum PreBuiltTokenFilters { } }, + UPPERCASE(CachingStrategy.LUCENE) { + @Override + public TokenStream create(TokenStream tokenStream, Version version) { + return new UpperCaseFilter(version.luceneVersion, tokenStream); + } + }, + KSTEM(CachingStrategy.ONE) { @Override public TokenStream create(TokenStream tokenStream, Version version) {