From e0b1a6544dfb1cce33bbb70980dbbf7e5d11e062 Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Mon, 4 Dec 2017 21:32:16 +0100 Subject: [PATCH] Fix term vectors generator with keyword and normalizer (#27608) This change applies the normalizer defined on the field when building term vectors dynamically on a keyword field. Fixes #27320 --- .../index/termvectors/TermVectorsService.java | 7 ++- .../action/termvectors/GetTermVectorsIT.java | 45 +++++++++++++++++++ 2 files changed, 51 insertions(+), 1 deletion(-) diff --git a/core/src/main/java/org/elasticsearch/index/termvectors/TermVectorsService.java b/core/src/main/java/org/elasticsearch/index/termvectors/TermVectorsService.java index 495f1dc4bdb..91bd994b131 100644 --- a/core/src/main/java/org/elasticsearch/index/termvectors/TermVectorsService.java +++ b/core/src/main/java/org/elasticsearch/index/termvectors/TermVectorsService.java @@ -217,7 +217,12 @@ public class TermVectorsService { if (perFieldAnalyzer != null && perFieldAnalyzer.containsKey(field)) { analyzer = mapperService.getIndexAnalyzers().get(perFieldAnalyzer.get(field).toString()); } else { - analyzer = mapperService.fullName(field).indexAnalyzer(); + MappedFieldType fieldType = mapperService.fullName(field); + if (fieldType instanceof KeywordFieldMapper.KeywordFieldType) { + analyzer = ((KeywordFieldMapper.KeywordFieldType) fieldType).normalizer(); + } else { + analyzer = fieldType.indexAnalyzer(); + } } if (analyzer == null) { analyzer = mapperService.getIndexAnalyzers().getDefaultIndexAnalyzer(); diff --git a/core/src/test/java/org/elasticsearch/action/termvectors/GetTermVectorsIT.java b/core/src/test/java/org/elasticsearch/action/termvectors/GetTermVectorsIT.java index 520c881aa7e..bab9f14fcff 100644 --- a/core/src/test/java/org/elasticsearch/action/termvectors/GetTermVectorsIT.java +++ b/core/src/test/java/org/elasticsearch/action/termvectors/GetTermVectorsIT.java @@ -1025,6 +1025,51 @@ public class GetTermVectorsIT extends AbstractTermVectorsTestCase { assertEquals("expected to find term statistics in exactly one shard!", 2, sumDocFreq); } + public void testWithKeywordAndNormalizer() throws IOException, ExecutionException, InterruptedException { + // setup indices + String[] indexNames = new String[] {"with_tv", "without_tv"}; + Settings.Builder builder = Settings.builder() + .put(indexSettings()) + .put("index.analysis.analyzer.my_analyzer.tokenizer", "keyword") + .putList("index.analysis.analyzer.my_analyzer.filter", "lowercase") + .putList("index.analysis.normalizer.my_normalizer.filter", "lowercase"); + assertAcked(prepareCreate(indexNames[0]).setSettings(builder.build()) + .addMapping("type1", "field1", "type=text,term_vector=with_positions_offsets,analyzer=my_analyzer")); + assertAcked(prepareCreate(indexNames[1]).setSettings(builder.build()) + .addMapping("type1", "field1", "type=keyword,normalizer=my_normalizer")); + ensureGreen(); + + // index documents with and without term vectors + String[] content = new String[] { "Hello World", "hello world", "HELLO WORLD" }; + + List indexBuilders = new ArrayList<>(); + for (String indexName : indexNames) { + for (int id = 0; id < content.length; id++) { + indexBuilders.add(client().prepareIndex() + .setIndex(indexName) + .setType("type1") + .setId(String.valueOf(id)) + .setSource("field1", content[id])); + } + } + indexRandom(true, indexBuilders); + + // request tvs and compare from each index + for (int id = 0; id < content.length; id++) { + Fields[] fields = new Fields[2]; + for (int j = 0; j < indexNames.length; j++) { + TermVectorsResponse resp = client().prepareTermVector(indexNames[j], "type1", String.valueOf(id)) + .setOffsets(true) + .setPositions(true) + .setSelectedFields("field1") + .get(); + assertThat("doc with index: " + indexNames[j] + ", type1 and id: " + id, resp.isExists(), equalTo(true)); + fields[j] = resp.getFields(); + } + compareTermVectors("field1", fields[0], fields[1]); + } + } + private void checkBestTerms(Terms terms, List expectedTerms) throws IOException { final TermsEnum termsEnum = terms.iterator(); List bestTerms = new ArrayList<>();