Moved keyword tokenizer to analysis-common module (#30642)
Relates to #23658
This commit is contained in:
parent
363f1e84ca
commit
544822c78b
|
@ -193,6 +193,7 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin {
|
||||||
tokenizers.put("pattern", PatternTokenizerFactory::new);
|
tokenizers.put("pattern", PatternTokenizerFactory::new);
|
||||||
tokenizers.put("uax_url_email", UAX29URLEmailTokenizerFactory::new);
|
tokenizers.put("uax_url_email", UAX29URLEmailTokenizerFactory::new);
|
||||||
tokenizers.put("whitespace", WhitespaceTokenizerFactory::new);
|
tokenizers.put("whitespace", WhitespaceTokenizerFactory::new);
|
||||||
|
tokenizers.put("keyword", KeywordTokenizerFactory::new);
|
||||||
return tokenizers;
|
return tokenizers;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -17,7 +17,7 @@
|
||||||
* under the License.
|
* under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.analysis.common;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||||
|
@ -30,7 +30,7 @@ public class KeywordTokenizerFactory extends AbstractTokenizerFactory {
|
||||||
|
|
||||||
private final int bufferSize;
|
private final int bufferSize;
|
||||||
|
|
||||||
public KeywordTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
KeywordTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||||
super(indexSettings, name, settings);
|
super(indexSettings, name, settings);
|
||||||
bufferSize = settings.getAsInt("buffer_size", 256);
|
bufferSize = settings.getAsInt("buffer_size", 256);
|
||||||
}
|
}
|
|
@ -24,7 +24,6 @@ import org.apache.lucene.analysis.en.PorterStemFilterFactory;
|
||||||
import org.apache.lucene.analysis.miscellaneous.LimitTokenCountFilterFactory;
|
import org.apache.lucene.analysis.miscellaneous.LimitTokenCountFilterFactory;
|
||||||
import org.apache.lucene.analysis.reverse.ReverseStringFilterFactory;
|
import org.apache.lucene.analysis.reverse.ReverseStringFilterFactory;
|
||||||
import org.apache.lucene.analysis.snowball.SnowballPorterFilterFactory;
|
import org.apache.lucene.analysis.snowball.SnowballPorterFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.KeywordTokenizerFactory;
|
|
||||||
import org.elasticsearch.index.analysis.SoraniNormalizationFilterFactory;
|
import org.elasticsearch.index.analysis.SoraniNormalizationFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.SynonymTokenFilterFactory;
|
import org.elasticsearch.index.analysis.SynonymTokenFilterFactory;
|
||||||
import org.elasticsearch.indices.analysis.AnalysisFactoryTestCase;
|
import org.elasticsearch.indices.analysis.AnalysisFactoryTestCase;
|
||||||
|
@ -56,6 +55,7 @@ public class CommonAnalysisFactoryTests extends AnalysisFactoryTestCase {
|
||||||
tokenizers.put("pattern", PatternTokenizerFactory.class);
|
tokenizers.put("pattern", PatternTokenizerFactory.class);
|
||||||
tokenizers.put("uax29urlemail", UAX29URLEmailTokenizerFactory.class);
|
tokenizers.put("uax29urlemail", UAX29URLEmailTokenizerFactory.class);
|
||||||
tokenizers.put("whitespace", WhitespaceTokenizerFactory.class);
|
tokenizers.put("whitespace", WhitespaceTokenizerFactory.class);
|
||||||
|
tokenizers.put("keyword", KeywordTokenizerFactory.class);
|
||||||
return tokenizers;
|
return tokenizers;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -5,9 +5,22 @@
|
||||||
indices.analyze:
|
indices.analyze:
|
||||||
body:
|
body:
|
||||||
text: Foo Bar!
|
text: Foo Bar!
|
||||||
|
explain: true
|
||||||
tokenizer: keyword
|
tokenizer: keyword
|
||||||
- length: { tokens: 1 }
|
- length: { detail.tokenizer.tokens: 1 }
|
||||||
- match: { tokens.0.token: Foo Bar! }
|
- match: { detail.tokenizer.name: keyword }
|
||||||
|
- match: { detail.tokenizer.tokens.0.token: Foo Bar! }
|
||||||
|
|
||||||
|
- do:
|
||||||
|
indices.analyze:
|
||||||
|
body:
|
||||||
|
text: Foo Bar!
|
||||||
|
explain: true
|
||||||
|
tokenizer:
|
||||||
|
type: keyword
|
||||||
|
- length: { detail.tokenizer.tokens: 1 }
|
||||||
|
- match: { detail.tokenizer.name: _anonymous_tokenizer }
|
||||||
|
- match: { detail.tokenizer.tokens.0.token: Foo Bar! }
|
||||||
|
|
||||||
---
|
---
|
||||||
"nGram":
|
"nGram":
|
||||||
|
|
|
@ -97,3 +97,19 @@
|
||||||
- length: { tokens: 2 }
|
- length: { tokens: 2 }
|
||||||
- match: { tokens.0.token: sha }
|
- match: { tokens.0.token: sha }
|
||||||
- match: { tokens.1.token: hay }
|
- match: { tokens.1.token: hay }
|
||||||
|
|
||||||
|
---
|
||||||
|
"Custom normalizer in request":
|
||||||
|
- do:
|
||||||
|
indices.analyze:
|
||||||
|
body:
|
||||||
|
text: ABc
|
||||||
|
explain: true
|
||||||
|
filter: ["lowercase"]
|
||||||
|
|
||||||
|
- length: { detail.tokenizer.tokens: 1 }
|
||||||
|
- length: { detail.tokenfilters.0.tokens: 1 }
|
||||||
|
- match: { detail.tokenizer.name: keyword_for_normalizer }
|
||||||
|
- match: { detail.tokenizer.tokens.0.token: ABc }
|
||||||
|
- match: { detail.tokenfilters.0.name: lowercase }
|
||||||
|
- match: { detail.tokenfilters.0.tokens.0.token: abc }
|
||||||
|
|
|
@ -16,9 +16,11 @@
|
||||||
body:
|
body:
|
||||||
filter: [icu_normalizer]
|
filter: [icu_normalizer]
|
||||||
text: Foo Bar Ruß
|
text: Foo Bar Ruß
|
||||||
tokenizer: keyword
|
tokenizer: standard
|
||||||
- length: { tokens: 1 }
|
- length: { tokens: 3 }
|
||||||
- match: { tokens.0.token: foo bar russ }
|
- match: { tokens.0.token: foo}
|
||||||
|
- match: { tokens.1.token: bar }
|
||||||
|
- match: { tokens.2.token: russ }
|
||||||
---
|
---
|
||||||
"Normalization charfilter":
|
"Normalization charfilter":
|
||||||
- do:
|
- do:
|
||||||
|
@ -26,9 +28,11 @@
|
||||||
body:
|
body:
|
||||||
char_filter: [icu_normalizer]
|
char_filter: [icu_normalizer]
|
||||||
text: Foo Bar Ruß
|
text: Foo Bar Ruß
|
||||||
tokenizer: keyword
|
tokenizer: standard
|
||||||
- length: { tokens: 1 }
|
- length: { tokens: 3 }
|
||||||
- match: { tokens.0.token: foo bar russ }
|
- match: { tokens.0.token: foo }
|
||||||
|
- match: { tokens.1.token: bar }
|
||||||
|
- match: { tokens.2.token: russ }
|
||||||
---
|
---
|
||||||
"Folding filter":
|
"Folding filter":
|
||||||
- do:
|
- do:
|
||||||
|
@ -36,9 +40,11 @@
|
||||||
body:
|
body:
|
||||||
filter: [icu_folding]
|
filter: [icu_folding]
|
||||||
text: Foo Bar résumé
|
text: Foo Bar résumé
|
||||||
tokenizer: keyword
|
tokenizer: standard
|
||||||
- length: { tokens: 1 }
|
- length: { tokens: 3 }
|
||||||
- match: { tokens.0.token: foo bar resume }
|
- match: { tokens.0.token: foo }
|
||||||
|
- match: { tokens.1.token: bar }
|
||||||
|
- match: { tokens.2.token: resume }
|
||||||
---
|
---
|
||||||
"Normalization with a UnicodeSet Filter":
|
"Normalization with a UnicodeSet Filter":
|
||||||
- do:
|
- do:
|
||||||
|
@ -64,25 +70,34 @@
|
||||||
index: test
|
index: test
|
||||||
body:
|
body:
|
||||||
char_filter: ["charfilter_icu_normalizer"]
|
char_filter: ["charfilter_icu_normalizer"]
|
||||||
tokenizer: keyword
|
tokenizer: standard
|
||||||
text: charfilter Föo Bâr Ruß
|
text: charfilter Föo Bâr Ruß
|
||||||
- length: { tokens: 1 }
|
- length: { tokens: 4 }
|
||||||
- match: { tokens.0.token: charfilter föo bâr ruß }
|
- match: { tokens.0.token: charfilter }
|
||||||
|
- match: { tokens.1.token: föo }
|
||||||
|
- match: { tokens.2.token: bâr }
|
||||||
|
- match: { tokens.3.token: ruß }
|
||||||
- do:
|
- do:
|
||||||
indices.analyze:
|
indices.analyze:
|
||||||
index: test
|
index: test
|
||||||
body:
|
body:
|
||||||
tokenizer: keyword
|
tokenizer: standard
|
||||||
filter: ["tokenfilter_icu_normalizer"]
|
filter: ["tokenfilter_icu_normalizer"]
|
||||||
text: tokenfilter Föo Bâr Ruß
|
text: tokenfilter Föo Bâr Ruß
|
||||||
- length: { tokens: 1 }
|
- length: { tokens: 4 }
|
||||||
- match: { tokens.0.token: tokenfilter föo Bâr ruß }
|
- match: { tokens.0.token: tokenfilter }
|
||||||
|
- match: { tokens.1.token: föo }
|
||||||
|
- match: { tokens.2.token: Bâr }
|
||||||
|
- match: { tokens.3.token: ruß }
|
||||||
- do:
|
- do:
|
||||||
indices.analyze:
|
indices.analyze:
|
||||||
index: test
|
index: test
|
||||||
body:
|
body:
|
||||||
tokenizer: keyword
|
tokenizer: standard
|
||||||
filter: ["tokenfilter_icu_folding"]
|
filter: ["tokenfilter_icu_folding"]
|
||||||
text: icufolding Föo Bâr Ruß
|
text: icufolding Föo Bâr Ruß
|
||||||
- length: { tokens: 1 }
|
- length: { tokens: 4 }
|
||||||
- match: { tokens.0.token: icufolding foo bâr russ }
|
- match: { tokens.0.token: icufolding }
|
||||||
|
- match: { tokens.1.token: foo }
|
||||||
|
- match: { tokens.2.token: bâr }
|
||||||
|
- match: { tokens.3.token: russ }
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
indices.analyze:
|
indices.analyze:
|
||||||
body:
|
body:
|
||||||
text: studenci
|
text: studenci
|
||||||
tokenizer: keyword
|
tokenizer: standard
|
||||||
filter: [polish_stem]
|
filter: [polish_stem]
|
||||||
- length: { tokens: 1 }
|
- length: { tokens: 1 }
|
||||||
- match: { tokens.0.token: student }
|
- match: { tokens.0.token: student }
|
||||||
|
|
|
@ -75,19 +75,3 @@
|
||||||
- match: { detail.tokenizer.tokens.2.token: buzz }
|
- match: { detail.tokenizer.tokens.2.token: buzz }
|
||||||
- match: { detail.tokenfilters.0.name: "_anonymous_tokenfilter" }
|
- match: { detail.tokenfilters.0.name: "_anonymous_tokenfilter" }
|
||||||
- match: { detail.tokenfilters.0.tokens.0.token: bar }
|
- match: { detail.tokenfilters.0.tokens.0.token: bar }
|
||||||
|
|
||||||
---
|
|
||||||
"Custom normalizer in request":
|
|
||||||
- do:
|
|
||||||
indices.analyze:
|
|
||||||
body:
|
|
||||||
text: ABc
|
|
||||||
explain: true
|
|
||||||
filter: ["lowercase"]
|
|
||||||
|
|
||||||
- length: { detail.tokenizer.tokens: 1 }
|
|
||||||
- length: { detail.tokenfilters.0.tokens: 1 }
|
|
||||||
- match: { detail.tokenizer.name: keyword_for_normalizer }
|
|
||||||
- match: { detail.tokenizer.tokens.0.token: ABc }
|
|
||||||
- match: { detail.tokenfilters.0.name: lowercase }
|
|
||||||
- match: { detail.tokenfilters.0.tokens.0.token: abc }
|
|
||||||
|
|
|
@ -548,6 +548,10 @@ public final class AnalysisRegistry implements Closeable {
|
||||||
TokenizerFactory keywordTokenizerFactory,
|
TokenizerFactory keywordTokenizerFactory,
|
||||||
Map<String, TokenFilterFactory> tokenFilters,
|
Map<String, TokenFilterFactory> tokenFilters,
|
||||||
Map<String, CharFilterFactory> charFilters) {
|
Map<String, CharFilterFactory> charFilters) {
|
||||||
|
if (keywordTokenizerFactory == null) {
|
||||||
|
throw new IllegalStateException("keyword tokenizer factory is null, normalizers require analysis-common module");
|
||||||
|
}
|
||||||
|
|
||||||
if (normalizerFactory instanceof CustomNormalizerProvider) {
|
if (normalizerFactory instanceof CustomNormalizerProvider) {
|
||||||
((CustomNormalizerProvider) normalizerFactory).build(keywordTokenizerFactory, charFilters, tokenFilters);
|
((CustomNormalizerProvider) normalizerFactory).build(keywordTokenizerFactory, charFilters, tokenFilters);
|
||||||
}
|
}
|
||||||
|
|
|
@ -56,7 +56,6 @@ import org.elasticsearch.index.analysis.IndonesianAnalyzerProvider;
|
||||||
import org.elasticsearch.index.analysis.IrishAnalyzerProvider;
|
import org.elasticsearch.index.analysis.IrishAnalyzerProvider;
|
||||||
import org.elasticsearch.index.analysis.ItalianAnalyzerProvider;
|
import org.elasticsearch.index.analysis.ItalianAnalyzerProvider;
|
||||||
import org.elasticsearch.index.analysis.KeywordAnalyzerProvider;
|
import org.elasticsearch.index.analysis.KeywordAnalyzerProvider;
|
||||||
import org.elasticsearch.index.analysis.KeywordTokenizerFactory;
|
|
||||||
import org.elasticsearch.index.analysis.LatvianAnalyzerProvider;
|
import org.elasticsearch.index.analysis.LatvianAnalyzerProvider;
|
||||||
import org.elasticsearch.index.analysis.LithuanianAnalyzerProvider;
|
import org.elasticsearch.index.analysis.LithuanianAnalyzerProvider;
|
||||||
import org.elasticsearch.index.analysis.NorwegianAnalyzerProvider;
|
import org.elasticsearch.index.analysis.NorwegianAnalyzerProvider;
|
||||||
|
@ -225,7 +224,6 @@ public final class AnalysisModule {
|
||||||
private NamedRegistry<AnalysisProvider<TokenizerFactory>> setupTokenizers(List<AnalysisPlugin> plugins) {
|
private NamedRegistry<AnalysisProvider<TokenizerFactory>> setupTokenizers(List<AnalysisPlugin> plugins) {
|
||||||
NamedRegistry<AnalysisProvider<TokenizerFactory>> tokenizers = new NamedRegistry<>("tokenizer");
|
NamedRegistry<AnalysisProvider<TokenizerFactory>> tokenizers = new NamedRegistry<>("tokenizer");
|
||||||
tokenizers.register("standard", StandardTokenizerFactory::new);
|
tokenizers.register("standard", StandardTokenizerFactory::new);
|
||||||
tokenizers.register("keyword", KeywordTokenizerFactory::new);
|
|
||||||
tokenizers.extractAndRegister(plugins, AnalysisPlugin::getTokenizers);
|
tokenizers.extractAndRegister(plugins, AnalysisPlugin::getTokenizers);
|
||||||
return tokenizers;
|
return tokenizers;
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,6 +19,7 @@
|
||||||
package org.elasticsearch.action.admin.indices;
|
package org.elasticsearch.action.admin.indices;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.MockTokenFilter;
|
import org.apache.lucene.analysis.MockTokenFilter;
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.elasticsearch.Version;
|
import org.elasticsearch.Version;
|
||||||
import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequest;
|
import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequest;
|
||||||
|
@ -37,6 +38,7 @@ import org.elasticsearch.index.analysis.CharFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.IndexAnalyzers;
|
import org.elasticsearch.index.analysis.IndexAnalyzers;
|
||||||
import org.elasticsearch.index.analysis.PreConfiguredCharFilter;
|
import org.elasticsearch.index.analysis.PreConfiguredCharFilter;
|
||||||
import org.elasticsearch.index.analysis.TokenFilterFactory;
|
import org.elasticsearch.index.analysis.TokenFilterFactory;
|
||||||
|
import org.elasticsearch.index.analysis.TokenizerFactory;
|
||||||
import org.elasticsearch.indices.analysis.AnalysisModule;
|
import org.elasticsearch.indices.analysis.AnalysisModule;
|
||||||
import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider;
|
import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider;
|
||||||
import org.elasticsearch.indices.analysis.AnalysisModuleTests.AppendCharFilter;
|
import org.elasticsearch.indices.analysis.AnalysisModuleTests.AppendCharFilter;
|
||||||
|
@ -107,6 +109,12 @@ public class TransportAnalyzeActionTests extends ESTestCase {
|
||||||
return singletonMap("append", AppendCharFilterFactory::new);
|
return singletonMap("append", AppendCharFilterFactory::new);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() {
|
||||||
|
return singletonMap("keyword", (indexSettings, environment, name, settings) ->
|
||||||
|
() -> new MockTokenizer(MockTokenizer.KEYWORD, false));
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
|
public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
|
||||||
return singletonMap("mock", MockFactory::new);
|
return singletonMap("mock", MockFactory::new);
|
||||||
|
|
|
@ -37,10 +37,13 @@ import org.elasticsearch.common.xcontent.ToXContent;
|
||||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||||
import org.elasticsearch.index.engine.VersionConflictEngineException;
|
import org.elasticsearch.index.engine.VersionConflictEngineException;
|
||||||
import org.elasticsearch.index.mapper.FieldMapper;
|
import org.elasticsearch.index.mapper.FieldMapper;
|
||||||
|
import org.elasticsearch.plugins.Plugin;
|
||||||
|
import org.elasticsearch.test.MockKeywordPlugin;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.Collection;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
|
@ -58,6 +61,12 @@ import static org.hamcrest.Matchers.notNullValue;
|
||||||
import static org.hamcrest.Matchers.nullValue;
|
import static org.hamcrest.Matchers.nullValue;
|
||||||
|
|
||||||
public class GetTermVectorsIT extends AbstractTermVectorsTestCase {
|
public class GetTermVectorsIT extends AbstractTermVectorsTestCase {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Collection<Class<? extends Plugin>> nodePlugins() {
|
||||||
|
return Collections.singleton(MockKeywordPlugin.class);
|
||||||
|
}
|
||||||
|
|
||||||
public void testNoSuchDoc() throws Exception {
|
public void testNoSuchDoc() throws Exception {
|
||||||
XContentBuilder mapping = jsonBuilder().startObject().startObject("type1")
|
XContentBuilder mapping = jsonBuilder().startObject().startObject("type1")
|
||||||
.startObject("properties")
|
.startObject("properties")
|
||||||
|
|
|
@ -432,7 +432,7 @@ public class GatewayIndexStateIT extends ESIntegTestCase {
|
||||||
logger.info("--> starting one node");
|
logger.info("--> starting one node");
|
||||||
internalCluster().startNode();
|
internalCluster().startNode();
|
||||||
prepareCreate("test").setSettings(Settings.builder()
|
prepareCreate("test").setSettings(Settings.builder()
|
||||||
.put("index.analysis.analyzer.test.tokenizer", "keyword")
|
.put("index.analysis.analyzer.test.tokenizer", "standard")
|
||||||
.put("index.number_of_shards", "1"))
|
.put("index.number_of_shards", "1"))
|
||||||
.addMapping("type1", "{\n" +
|
.addMapping("type1", "{\n" +
|
||||||
" \"type1\": {\n" +
|
" \"type1\": {\n" +
|
||||||
|
|
|
@ -20,6 +20,8 @@
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.MockLowerCaseFilter;
|
import org.apache.lucene.analysis.MockLowerCaseFilter;
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.env.Environment;
|
import org.elasticsearch.env.Environment;
|
||||||
|
@ -71,7 +73,7 @@ public class CustomNormalizerTests extends ESTokenStreamTestCase {
|
||||||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||||
.build();
|
.build();
|
||||||
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||||
() -> AnalysisTestsHelper.createTestAnalysisFromSettings(settings));
|
() -> AnalysisTestsHelper.createTestAnalysisFromSettings(settings, MOCK_ANALYSIS_PLUGIN));
|
||||||
assertEquals("Custom normalizer [my_normalizer] cannot configure a tokenizer", e.getMessage());
|
assertEquals("Custom normalizer [my_normalizer] cannot configure a tokenizer", e.getMessage());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -135,7 +137,7 @@ public class CustomNormalizerTests extends ESTokenStreamTestCase {
|
||||||
@Override
|
@Override
|
||||||
public int read(char[] cbuf, int off, int len) throws IOException {
|
public int read(char[] cbuf, int off, int len) throws IOException {
|
||||||
int result = reader.read(cbuf, off, len);
|
int result = reader.read(cbuf, off, len);
|
||||||
for (int i = off; i < result; i++) {
|
for (int i = off; i < off + len; i++) {
|
||||||
if (cbuf[i] == 'a') {
|
if (cbuf[i] == 'a') {
|
||||||
cbuf[i] = 'z';
|
cbuf[i] = 'z';
|
||||||
}
|
}
|
||||||
|
@ -157,5 +159,11 @@ public class CustomNormalizerTests extends ESTokenStreamTestCase {
|
||||||
return new Factory();
|
return new Factory();
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() {
|
||||||
|
return singletonMap("keyword", (indexSettings, environment, name, settings) ->
|
||||||
|
() -> new MockTokenizer(MockTokenizer.KEYWORD, false));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,6 +20,8 @@
|
||||||
package org.elasticsearch.index.mapper;
|
package org.elasticsearch.index.mapper;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.MockLowerCaseFilter;
|
import org.apache.lucene.analysis.MockLowerCaseFilter;
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.index.DocValuesType;
|
import org.apache.lucene.index.DocValuesType;
|
||||||
import org.apache.lucene.index.IndexOptions;
|
import org.apache.lucene.index.IndexOptions;
|
||||||
import org.apache.lucene.index.IndexableField;
|
import org.apache.lucene.index.IndexableField;
|
||||||
|
@ -33,7 +35,9 @@ import org.elasticsearch.common.xcontent.XContentFactory;
|
||||||
import org.elasticsearch.common.xcontent.XContentType;
|
import org.elasticsearch.common.xcontent.XContentType;
|
||||||
import org.elasticsearch.index.IndexService;
|
import org.elasticsearch.index.IndexService;
|
||||||
import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
|
import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
|
||||||
|
import org.elasticsearch.index.analysis.TokenizerFactory;
|
||||||
import org.elasticsearch.index.mapper.MapperService.MergeReason;
|
import org.elasticsearch.index.mapper.MapperService.MergeReason;
|
||||||
|
import org.elasticsearch.indices.analysis.AnalysisModule;
|
||||||
import org.elasticsearch.plugins.AnalysisPlugin;
|
import org.elasticsearch.plugins.AnalysisPlugin;
|
||||||
import org.elasticsearch.plugins.Plugin;
|
import org.elasticsearch.plugins.Plugin;
|
||||||
import org.elasticsearch.test.ESSingleNodeTestCase;
|
import org.elasticsearch.test.ESSingleNodeTestCase;
|
||||||
|
@ -44,8 +48,10 @@ import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
import static java.util.Collections.singletonList;
|
import static java.util.Collections.singletonList;
|
||||||
|
import static java.util.Collections.singletonMap;
|
||||||
import static org.hamcrest.Matchers.containsString;
|
import static org.hamcrest.Matchers.containsString;
|
||||||
import static org.hamcrest.Matchers.equalTo;
|
import static org.hamcrest.Matchers.equalTo;
|
||||||
|
|
||||||
|
@ -58,6 +64,21 @@ public class KeywordFieldMapperTests extends ESSingleNodeTestCase {
|
||||||
public List<PreConfiguredTokenFilter> getPreConfiguredTokenFilters() {
|
public List<PreConfiguredTokenFilter> getPreConfiguredTokenFilters() {
|
||||||
return singletonList(PreConfiguredTokenFilter.singleton("mock_other_lowercase", true, MockLowerCaseFilter::new));
|
return singletonList(PreConfiguredTokenFilter.singleton("mock_other_lowercase", true, MockLowerCaseFilter::new));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> getTokenizers() {
|
||||||
|
return singletonMap("keyword", (indexSettings, environment, name, settings) -> {
|
||||||
|
class Factory implements TokenizerFactory {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Tokenizer create() {
|
||||||
|
return new MockTokenizer(MockTokenizer.KEYWORD, false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return new Factory();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -21,6 +21,7 @@ package org.elasticsearch.indices.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.CharFilter;
|
import org.apache.lucene.analysis.CharFilter;
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
|
@ -31,6 +32,7 @@ import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.SimpleFSDirectory;
|
import org.apache.lucene.store.SimpleFSDirectory;
|
||||||
import org.elasticsearch.Version;
|
import org.elasticsearch.Version;
|
||||||
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
||||||
|
import org.elasticsearch.common.io.Streams;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.common.xcontent.XContentType;
|
import org.elasticsearch.common.xcontent.XContentType;
|
||||||
import org.elasticsearch.env.Environment;
|
import org.elasticsearch.env.Environment;
|
||||||
|
@ -49,6 +51,7 @@ import org.elasticsearch.index.analysis.StandardTokenizerFactory;
|
||||||
import org.elasticsearch.index.analysis.StopTokenFilterFactory;
|
import org.elasticsearch.index.analysis.StopTokenFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.TokenFilterFactory;
|
import org.elasticsearch.index.analysis.TokenFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.MyFilterTokenFilterFactory;
|
import org.elasticsearch.index.analysis.MyFilterTokenFilterFactory;
|
||||||
|
import org.elasticsearch.index.analysis.TokenizerFactory;
|
||||||
import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider;
|
import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider;
|
||||||
import org.elasticsearch.plugins.AnalysisPlugin;
|
import org.elasticsearch.plugins.AnalysisPlugin;
|
||||||
import org.elasticsearch.test.ESTestCase;
|
import org.elasticsearch.test.ESTestCase;
|
||||||
|
@ -60,6 +63,8 @@ import java.io.BufferedWriter;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
|
import java.io.StringReader;
|
||||||
|
import java.io.UncheckedIOException;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
|
@ -222,7 +227,7 @@ public class AnalysisModuleTests extends ESTestCase {
|
||||||
|
|
||||||
public void testUnderscoreInAnalyzerName() throws IOException {
|
public void testUnderscoreInAnalyzerName() throws IOException {
|
||||||
Settings settings = Settings.builder()
|
Settings settings = Settings.builder()
|
||||||
.put("index.analysis.analyzer._invalid_name.tokenizer", "keyword")
|
.put("index.analysis.analyzer._invalid_name.tokenizer", "standard")
|
||||||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||||
.put(IndexMetaData.SETTING_VERSION_CREATED, "1")
|
.put(IndexMetaData.SETTING_VERSION_CREATED, "1")
|
||||||
.build();
|
.build();
|
||||||
|
@ -256,6 +261,13 @@ public class AnalysisModuleTests extends ESTestCase {
|
||||||
(tokenStream, esVersion) -> new AppendCharFilter(tokenStream, esVersion.toString()))
|
(tokenStream, esVersion) -> new AppendCharFilter(tokenStream, esVersion.toString()))
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() {
|
||||||
|
// Need mock keyword tokenizer here, because alpha / beta versions are broken up by the dash.
|
||||||
|
return singletonMap("keyword", (indexSettings, environment, name, settings) ->
|
||||||
|
() -> new MockTokenizer(MockTokenizer.KEYWORD, false));
|
||||||
|
}
|
||||||
})).getAnalysisRegistry();
|
})).getAnalysisRegistry();
|
||||||
|
|
||||||
Version version = VersionUtils.randomVersion(random());
|
Version version = VersionUtils.randomVersion(random());
|
||||||
|
@ -305,11 +317,11 @@ public class AnalysisModuleTests extends ESTestCase {
|
||||||
|
|
||||||
Version version = VersionUtils.randomVersion(random());
|
Version version = VersionUtils.randomVersion(random());
|
||||||
IndexAnalyzers analyzers = getIndexAnalyzers(registry, Settings.builder()
|
IndexAnalyzers analyzers = getIndexAnalyzers(registry, Settings.builder()
|
||||||
.put("index.analysis.analyzer.no_version.tokenizer", "keyword")
|
.put("index.analysis.analyzer.no_version.tokenizer", "standard")
|
||||||
.put("index.analysis.analyzer.no_version.filter", "no_version")
|
.put("index.analysis.analyzer.no_version.filter", "no_version")
|
||||||
.put("index.analysis.analyzer.lucene_version.tokenizer", "keyword")
|
.put("index.analysis.analyzer.lucene_version.tokenizer", "standard")
|
||||||
.put("index.analysis.analyzer.lucene_version.filter", "lucene_version")
|
.put("index.analysis.analyzer.lucene_version.filter", "lucene_version")
|
||||||
.put("index.analysis.analyzer.elasticsearch_version.tokenizer", "keyword")
|
.put("index.analysis.analyzer.elasticsearch_version.tokenizer", "standard")
|
||||||
.put("index.analysis.analyzer.elasticsearch_version.filter", "elasticsearch_version")
|
.put("index.analysis.analyzer.elasticsearch_version.filter", "elasticsearch_version")
|
||||||
.put(IndexMetaData.SETTING_VERSION_CREATED, version)
|
.put(IndexMetaData.SETTING_VERSION_CREATED, version)
|
||||||
.build());
|
.build());
|
||||||
|
@ -425,12 +437,17 @@ public class AnalysisModuleTests extends ESTestCase {
|
||||||
|
|
||||||
// Simple char filter that appends text to the term
|
// Simple char filter that appends text to the term
|
||||||
public static class AppendCharFilter extends CharFilter {
|
public static class AppendCharFilter extends CharFilter {
|
||||||
private final char[] appendMe;
|
|
||||||
private int offsetInAppendMe = -1;
|
static Reader append(Reader input, String appendMe) {
|
||||||
|
try {
|
||||||
|
return new StringReader(Streams.copyToString(input) + appendMe);
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new UncheckedIOException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public AppendCharFilter(Reader input, String appendMe) {
|
public AppendCharFilter(Reader input, String appendMe) {
|
||||||
super(input);
|
super(append(input, appendMe));
|
||||||
this.appendMe = appendMe.toCharArray();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -440,24 +457,7 @@ public class AnalysisModuleTests extends ESTestCase {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int read(char[] cbuf, int off, int len) throws IOException {
|
public int read(char[] cbuf, int off, int len) throws IOException {
|
||||||
if (offsetInAppendMe < 0) {
|
return input.read(cbuf, off, len);
|
||||||
int read = input.read(cbuf, off, len);
|
|
||||||
if (read == len) {
|
|
||||||
return read;
|
|
||||||
}
|
|
||||||
off += read;
|
|
||||||
len -= read;
|
|
||||||
int allowedLen = Math.min(len, appendMe.length);
|
|
||||||
System.arraycopy(appendMe, 0, cbuf, off, allowedLen);
|
|
||||||
offsetInAppendMe = allowedLen;
|
|
||||||
return read + allowedLen;
|
|
||||||
}
|
|
||||||
if (offsetInAppendMe >= appendMe.length) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
int allowedLen = Math.max(len, appendMe.length - offsetInAppendMe);
|
|
||||||
System.arraycopy(appendMe, offsetInAppendMe, cbuf, off, allowedLen);
|
|
||||||
return allowedLen;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -22,11 +22,18 @@ import org.elasticsearch.action.admin.indices.alias.Alias;
|
||||||
import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequestBuilder;
|
import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequestBuilder;
|
||||||
import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse;
|
import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
import org.elasticsearch.index.analysis.CharFilterFactory;
|
||||||
|
import org.elasticsearch.indices.analysis.AnalysisModule;
|
||||||
|
import org.elasticsearch.plugins.AnalysisPlugin;
|
||||||
|
import org.elasticsearch.plugins.Plugin;
|
||||||
import org.elasticsearch.test.ESIntegTestCase;
|
import org.elasticsearch.test.ESIntegTestCase;
|
||||||
|
import org.elasticsearch.test.MockKeywordPlugin;
|
||||||
import org.hamcrest.core.IsNull;
|
import org.hamcrest.core.IsNull;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
@ -38,6 +45,12 @@ import static org.hamcrest.Matchers.is;
|
||||||
import static org.hamcrest.Matchers.startsWith;
|
import static org.hamcrest.Matchers.startsWith;
|
||||||
|
|
||||||
public class AnalyzeActionIT extends ESIntegTestCase {
|
public class AnalyzeActionIT extends ESIntegTestCase {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Collection<Class<? extends Plugin>> nodePlugins() {
|
||||||
|
return Collections.singleton(MockKeywordPlugin.class);
|
||||||
|
}
|
||||||
|
|
||||||
public void testSimpleAnalyzerTests() throws Exception {
|
public void testSimpleAnalyzerTests() throws Exception {
|
||||||
assertAcked(prepareCreate("test").addAlias(new Alias("alias")));
|
assertAcked(prepareCreate("test").addAlias(new Alias("alias")));
|
||||||
ensureGreen();
|
ensureGreen();
|
||||||
|
|
|
@ -50,14 +50,15 @@ import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder.Field;
|
||||||
import org.elasticsearch.search.sort.SortOrder;
|
import org.elasticsearch.search.sort.SortOrder;
|
||||||
import org.elasticsearch.test.ESIntegTestCase;
|
import org.elasticsearch.test.ESIntegTestCase;
|
||||||
import org.elasticsearch.test.InternalSettingsPlugin;
|
import org.elasticsearch.test.InternalSettingsPlugin;
|
||||||
|
import org.elasticsearch.test.MockKeywordPlugin;
|
||||||
import org.hamcrest.Matcher;
|
import org.hamcrest.Matcher;
|
||||||
import org.hamcrest.Matchers;
|
import org.hamcrest.Matchers;
|
||||||
import org.joda.time.DateTime;
|
import org.joda.time.DateTime;
|
||||||
import org.joda.time.chrono.ISOChronology;
|
import org.joda.time.chrono.ISOChronology;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
@ -105,7 +106,7 @@ public class HighlighterSearchIT extends ESIntegTestCase {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Collection<Class<? extends Plugin>> nodePlugins() {
|
protected Collection<Class<? extends Plugin>> nodePlugins() {
|
||||||
return Collections.singletonList(InternalSettingsPlugin.class);
|
return Arrays.asList(InternalSettingsPlugin.class, MockKeywordPlugin.class);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testHighlightingWithStoredKeyword() throws IOException {
|
public void testHighlightingWithStoredKeyword() throws IOException {
|
||||||
|
|
|
@ -32,15 +32,19 @@ import org.elasticsearch.index.query.MultiMatchQueryBuilder;
|
||||||
import org.elasticsearch.index.query.Operator;
|
import org.elasticsearch.index.query.Operator;
|
||||||
import org.elasticsearch.index.query.QueryBuilders;
|
import org.elasticsearch.index.query.QueryBuilders;
|
||||||
import org.elasticsearch.index.search.MatchQuery;
|
import org.elasticsearch.index.search.MatchQuery;
|
||||||
|
import org.elasticsearch.plugins.Plugin;
|
||||||
import org.elasticsearch.search.SearchHit;
|
import org.elasticsearch.search.SearchHit;
|
||||||
import org.elasticsearch.search.SearchHits;
|
import org.elasticsearch.search.SearchHits;
|
||||||
import org.elasticsearch.search.sort.SortBuilders;
|
import org.elasticsearch.search.sort.SortBuilders;
|
||||||
import org.elasticsearch.search.sort.SortOrder;
|
import org.elasticsearch.search.sort.SortOrder;
|
||||||
import org.elasticsearch.test.ESIntegTestCase;
|
import org.elasticsearch.test.ESIntegTestCase;
|
||||||
|
import org.elasticsearch.test.MockKeywordPlugin;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.concurrent.ExecutionException;
|
import java.util.concurrent.ExecutionException;
|
||||||
|
@ -72,6 +76,11 @@ import static org.hamcrest.Matchers.lessThan;
|
||||||
|
|
||||||
public class MultiMatchQueryIT extends ESIntegTestCase {
|
public class MultiMatchQueryIT extends ESIntegTestCase {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Collection<Class<? extends Plugin>> nodePlugins() {
|
||||||
|
return Collections.singleton(MockKeywordPlugin.class);
|
||||||
|
}
|
||||||
|
|
||||||
@Before
|
@Before
|
||||||
public void init() throws Exception {
|
public void init() throws Exception {
|
||||||
CreateIndexRequestBuilder builder = prepareCreate("test").setSettings(Settings.builder()
|
CreateIndexRequestBuilder builder = prepareCreate("test").setSettings(Settings.builder()
|
||||||
|
|
|
@ -24,7 +24,6 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||||
import org.apache.lucene.analysis.util.TokenizerFactory;
|
import org.apache.lucene.analysis.util.TokenizerFactory;
|
||||||
import org.elasticsearch.common.collect.MapBuilder;
|
import org.elasticsearch.common.collect.MapBuilder;
|
||||||
import org.elasticsearch.index.analysis.HunspellTokenFilterFactory;
|
import org.elasticsearch.index.analysis.HunspellTokenFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.KeywordTokenizerFactory;
|
|
||||||
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
|
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
|
||||||
import org.elasticsearch.index.analysis.PreConfiguredCharFilter;
|
import org.elasticsearch.index.analysis.PreConfiguredCharFilter;
|
||||||
import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
|
import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
|
||||||
|
@ -79,7 +78,7 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
|
||||||
// exposed in ES
|
// exposed in ES
|
||||||
.put("classic", MovedToAnalysisCommon.class)
|
.put("classic", MovedToAnalysisCommon.class)
|
||||||
.put("edgengram", MovedToAnalysisCommon.class)
|
.put("edgengram", MovedToAnalysisCommon.class)
|
||||||
.put("keyword", KeywordTokenizerFactory.class)
|
.put("keyword", MovedToAnalysisCommon.class)
|
||||||
.put("letter", MovedToAnalysisCommon.class)
|
.put("letter", MovedToAnalysisCommon.class)
|
||||||
.put("lowercase", MovedToAnalysisCommon.class)
|
.put("lowercase", MovedToAnalysisCommon.class)
|
||||||
.put("ngram", MovedToAnalysisCommon.class)
|
.put("ngram", MovedToAnalysisCommon.class)
|
||||||
|
|
|
@ -0,0 +1,54 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elasticsearch under one or more contributor
|
||||||
|
* license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright
|
||||||
|
* ownership. Elasticsearch licenses this file to you under
|
||||||
|
* the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
* not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
package org.elasticsearch.test;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
|
import org.elasticsearch.index.analysis.TokenizerFactory;
|
||||||
|
import org.elasticsearch.indices.analysis.AnalysisModule;
|
||||||
|
import org.elasticsearch.plugins.AnalysisPlugin;
|
||||||
|
import org.elasticsearch.plugins.Plugin;
|
||||||
|
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import static java.util.Collections.singletonMap;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Some tests rely on the keyword tokenizer, but this tokenizer isn't part of lucene-core and therefor not available
|
||||||
|
* in some modules. What this test plugin does, is use the mock tokenizer and advertise that as the keyword tokenizer.
|
||||||
|
*
|
||||||
|
* Most tests that need this test plugin use normalizers. When normalizers are constructed they try to resolve the
|
||||||
|
* keyword tokenizer, but if the keyword tokenizer isn't available then constructing normalizers will fail.
|
||||||
|
*/
|
||||||
|
public class MockKeywordPlugin extends Plugin implements AnalysisPlugin {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> getTokenizers() {
|
||||||
|
return singletonMap("keyword", (indexSettings, environment, name, settings) -> {
|
||||||
|
class Factory implements TokenizerFactory {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Tokenizer create() {
|
||||||
|
return new MockTokenizer(MockTokenizer.KEYWORD, false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return new Factory();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue