Added tests for empty token chars on ngram tokenizer

This relates to #5120
This commit is contained in:
Simon Willnauer 2014-02-19 19:08:03 +01:00
parent 1a48c5fc21
commit 8fe4d878eb
2 changed files with 34 additions and 0 deletions

View File

@ -68,6 +68,16 @@ public class NGramTokenizerFactoryTests extends ElasticsearchTokenStreamTestCase
}
}
@Test
public void testNoTokenChars() throws IOException {
final Index index = new Index("test");
final String name = "ngr";
final Settings indexSettings = ImmutableSettings.EMPTY;
final Settings settings = ImmutableSettings.builder().put("min_gram", 2).put("max_gram", 4).putArray("token_chars", new String[0]).build();
Tokenizer tokenizer = new NGramTokenizerFactory(index, indexSettings, name, settings).create(new StringReader("1.34"));
assertTokenStreamContents(tokenizer, new String[] {"1.", "1.3", "1.34", ".3", ".34", "34"});
}
@Test
public void testPreTokenization() throws IOException {
// Make sure that pretokenization works well and that it can be used even with token chars which are supplementary characters

View File

@ -2185,4 +2185,28 @@ public class SimpleQueryTests extends ElasticsearchIntegrationTest {
assertHitCount(client().prepareSearch().setQuery(matchAllQuery()).get(), 1l);
}
@Test // see #5120
public void testNGramCopyField() {
CreateIndexRequestBuilder builder = prepareCreate("test").setSettings(settingsBuilder()
.put(SETTING_NUMBER_OF_SHARDS, 1)
.put(SETTING_NUMBER_OF_REPLICAS, 0)
.put("index.analysis.analyzer.my_ngram_analyzer.type", "custom")
.put("index.analysis.analyzer.my_ngram_analyzer.tokenizer", "my_ngram_tokenizer")
.put("index.analysis.tokenizer.my_ngram_tokenizer.type", "nGram")
.put("index.analysis.tokenizer.my_ngram_tokenizer.min_gram", "1")
.put("index.analysis.tokenizer.my_ngram_tokenizer.max_gram", "10")
.putArray("index.analysis.tokenizer.my_ngram_tokenizer.token_chars", new String[0]));
assertAcked(builder.addMapping("test", "origin", "type=string,copy_to=meta", "meta", "type=string,index_analyzer=my_ngram_analyzer"));
ensureGreen();
client().prepareIndex("test", "test", "1").setSource("origin", "C.A1234.5678")
.setRefresh(true)
.get();
SearchResponse searchResponse = client().prepareSearch("test")
.setQuery(matchQuery("meta", "1234"))
.get();
assertHitCount(searchResponse, 1l);
}
}