From 8fe4d878eb54baaf5934633f8ecbef42507d9c2d Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Wed, 19 Feb 2014 19:08:03 +0100 Subject: [PATCH] Added tests for empty token chars on ngram tokenizer This relates to #5120 --- .../analysis/NGramTokenizerFactoryTests.java | 10 ++++++++ .../search/query/SimpleQueryTests.java | 24 +++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/src/test/java/org/elasticsearch/index/analysis/NGramTokenizerFactoryTests.java b/src/test/java/org/elasticsearch/index/analysis/NGramTokenizerFactoryTests.java index c17e3e33928..0607744a726 100644 --- a/src/test/java/org/elasticsearch/index/analysis/NGramTokenizerFactoryTests.java +++ b/src/test/java/org/elasticsearch/index/analysis/NGramTokenizerFactoryTests.java @@ -68,6 +68,16 @@ public class NGramTokenizerFactoryTests extends ElasticsearchTokenStreamTestCase } } + @Test + public void testNoTokenChars() throws IOException { + final Index index = new Index("test"); + final String name = "ngr"; + final Settings indexSettings = ImmutableSettings.EMPTY; + final Settings settings = ImmutableSettings.builder().put("min_gram", 2).put("max_gram", 4).putArray("token_chars", new String[0]).build(); + Tokenizer tokenizer = new NGramTokenizerFactory(index, indexSettings, name, settings).create(new StringReader("1.34")); + assertTokenStreamContents(tokenizer, new String[] {"1.", "1.3", "1.34", ".3", ".34", "34"}); + } + @Test public void testPreTokenization() throws IOException { // Make sure that pretokenization works well and that it can be used even with token chars which are supplementary characters diff --git a/src/test/java/org/elasticsearch/search/query/SimpleQueryTests.java b/src/test/java/org/elasticsearch/search/query/SimpleQueryTests.java index 8c4199bdd98..27e0ef3cd0e 100644 --- a/src/test/java/org/elasticsearch/search/query/SimpleQueryTests.java +++ b/src/test/java/org/elasticsearch/search/query/SimpleQueryTests.java @@ -2185,4 +2185,28 @@ public class SimpleQueryTests extends ElasticsearchIntegrationTest { assertHitCount(client().prepareSearch().setQuery(matchAllQuery()).get(), 1l); } + @Test // see #5120 + public void testNGramCopyField() { + CreateIndexRequestBuilder builder = prepareCreate("test").setSettings(settingsBuilder() + .put(SETTING_NUMBER_OF_SHARDS, 1) + .put(SETTING_NUMBER_OF_REPLICAS, 0) + .put("index.analysis.analyzer.my_ngram_analyzer.type", "custom") + .put("index.analysis.analyzer.my_ngram_analyzer.tokenizer", "my_ngram_tokenizer") + .put("index.analysis.tokenizer.my_ngram_tokenizer.type", "nGram") + .put("index.analysis.tokenizer.my_ngram_tokenizer.min_gram", "1") + .put("index.analysis.tokenizer.my_ngram_tokenizer.max_gram", "10") + .putArray("index.analysis.tokenizer.my_ngram_tokenizer.token_chars", new String[0])); + assertAcked(builder.addMapping("test", "origin", "type=string,copy_to=meta", "meta", "type=string,index_analyzer=my_ngram_analyzer")); + ensureGreen(); + + client().prepareIndex("test", "test", "1").setSource("origin", "C.A1234.5678") + .setRefresh(true) + .get(); + + SearchResponse searchResponse = client().prepareSearch("test") + .setQuery(matchQuery("meta", "1234")) + .get(); + assertHitCount(searchResponse, 1l); + } + }