Fix term vectors generator with keyword and normalizer (#27608)

This change applies the normalizer defined on the field when building term vectors dynamically on a keyword field.

Fixes #27320
This commit is contained in:
Jim Ferenczi 2017-12-04 21:32:16 +01:00 committed by GitHub
parent da50fa4540
commit e0b1a6544d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 51 additions and 1 deletions

View File

@ -217,7 +217,12 @@ public class TermVectorsService {
if (perFieldAnalyzer != null && perFieldAnalyzer.containsKey(field)) {
analyzer = mapperService.getIndexAnalyzers().get(perFieldAnalyzer.get(field).toString());
} else {
analyzer = mapperService.fullName(field).indexAnalyzer();
MappedFieldType fieldType = mapperService.fullName(field);
if (fieldType instanceof KeywordFieldMapper.KeywordFieldType) {
analyzer = ((KeywordFieldMapper.KeywordFieldType) fieldType).normalizer();
} else {
analyzer = fieldType.indexAnalyzer();
}
}
if (analyzer == null) {
analyzer = mapperService.getIndexAnalyzers().getDefaultIndexAnalyzer();

View File

@ -1025,6 +1025,51 @@ public class GetTermVectorsIT extends AbstractTermVectorsTestCase {
assertEquals("expected to find term statistics in exactly one shard!", 2, sumDocFreq);
}
public void testWithKeywordAndNormalizer() throws IOException, ExecutionException, InterruptedException {
// setup indices
String[] indexNames = new String[] {"with_tv", "without_tv"};
Settings.Builder builder = Settings.builder()
.put(indexSettings())
.put("index.analysis.analyzer.my_analyzer.tokenizer", "keyword")
.putList("index.analysis.analyzer.my_analyzer.filter", "lowercase")
.putList("index.analysis.normalizer.my_normalizer.filter", "lowercase");
assertAcked(prepareCreate(indexNames[0]).setSettings(builder.build())
.addMapping("type1", "field1", "type=text,term_vector=with_positions_offsets,analyzer=my_analyzer"));
assertAcked(prepareCreate(indexNames[1]).setSettings(builder.build())
.addMapping("type1", "field1", "type=keyword,normalizer=my_normalizer"));
ensureGreen();
// index documents with and without term vectors
String[] content = new String[] { "Hello World", "hello world", "HELLO WORLD" };
List<IndexRequestBuilder> indexBuilders = new ArrayList<>();
for (String indexName : indexNames) {
for (int id = 0; id < content.length; id++) {
indexBuilders.add(client().prepareIndex()
.setIndex(indexName)
.setType("type1")
.setId(String.valueOf(id))
.setSource("field1", content[id]));
}
}
indexRandom(true, indexBuilders);
// request tvs and compare from each index
for (int id = 0; id < content.length; id++) {
Fields[] fields = new Fields[2];
for (int j = 0; j < indexNames.length; j++) {
TermVectorsResponse resp = client().prepareTermVector(indexNames[j], "type1", String.valueOf(id))
.setOffsets(true)
.setPositions(true)
.setSelectedFields("field1")
.get();
assertThat("doc with index: " + indexNames[j] + ", type1 and id: " + id, resp.isExists(), equalTo(true));
fields[j] = resp.getFields();
}
compareTermVectors("field1", fields[0], fields[1]);
}
}
private void checkBestTerms(Terms terms, List<String> expectedTerms) throws IOException {
final TermsEnum termsEnum = terms.iterator();
List<String> bestTerms = new ArrayList<>();