Fix term vectors generator with keyword and normalizer (#27608)
This change applies the normalizer defined on the field when building term vectors dynamically on a keyword field. Fixes #27320
This commit is contained in:
parent
da50fa4540
commit
e0b1a6544d
|
@ -217,7 +217,12 @@ public class TermVectorsService {
|
|||
if (perFieldAnalyzer != null && perFieldAnalyzer.containsKey(field)) {
|
||||
analyzer = mapperService.getIndexAnalyzers().get(perFieldAnalyzer.get(field).toString());
|
||||
} else {
|
||||
analyzer = mapperService.fullName(field).indexAnalyzer();
|
||||
MappedFieldType fieldType = mapperService.fullName(field);
|
||||
if (fieldType instanceof KeywordFieldMapper.KeywordFieldType) {
|
||||
analyzer = ((KeywordFieldMapper.KeywordFieldType) fieldType).normalizer();
|
||||
} else {
|
||||
analyzer = fieldType.indexAnalyzer();
|
||||
}
|
||||
}
|
||||
if (analyzer == null) {
|
||||
analyzer = mapperService.getIndexAnalyzers().getDefaultIndexAnalyzer();
|
||||
|
|
|
@ -1025,6 +1025,51 @@ public class GetTermVectorsIT extends AbstractTermVectorsTestCase {
|
|||
assertEquals("expected to find term statistics in exactly one shard!", 2, sumDocFreq);
|
||||
}
|
||||
|
||||
public void testWithKeywordAndNormalizer() throws IOException, ExecutionException, InterruptedException {
|
||||
// setup indices
|
||||
String[] indexNames = new String[] {"with_tv", "without_tv"};
|
||||
Settings.Builder builder = Settings.builder()
|
||||
.put(indexSettings())
|
||||
.put("index.analysis.analyzer.my_analyzer.tokenizer", "keyword")
|
||||
.putList("index.analysis.analyzer.my_analyzer.filter", "lowercase")
|
||||
.putList("index.analysis.normalizer.my_normalizer.filter", "lowercase");
|
||||
assertAcked(prepareCreate(indexNames[0]).setSettings(builder.build())
|
||||
.addMapping("type1", "field1", "type=text,term_vector=with_positions_offsets,analyzer=my_analyzer"));
|
||||
assertAcked(prepareCreate(indexNames[1]).setSettings(builder.build())
|
||||
.addMapping("type1", "field1", "type=keyword,normalizer=my_normalizer"));
|
||||
ensureGreen();
|
||||
|
||||
// index documents with and without term vectors
|
||||
String[] content = new String[] { "Hello World", "hello world", "HELLO WORLD" };
|
||||
|
||||
List<IndexRequestBuilder> indexBuilders = new ArrayList<>();
|
||||
for (String indexName : indexNames) {
|
||||
for (int id = 0; id < content.length; id++) {
|
||||
indexBuilders.add(client().prepareIndex()
|
||||
.setIndex(indexName)
|
||||
.setType("type1")
|
||||
.setId(String.valueOf(id))
|
||||
.setSource("field1", content[id]));
|
||||
}
|
||||
}
|
||||
indexRandom(true, indexBuilders);
|
||||
|
||||
// request tvs and compare from each index
|
||||
for (int id = 0; id < content.length; id++) {
|
||||
Fields[] fields = new Fields[2];
|
||||
for (int j = 0; j < indexNames.length; j++) {
|
||||
TermVectorsResponse resp = client().prepareTermVector(indexNames[j], "type1", String.valueOf(id))
|
||||
.setOffsets(true)
|
||||
.setPositions(true)
|
||||
.setSelectedFields("field1")
|
||||
.get();
|
||||
assertThat("doc with index: " + indexNames[j] + ", type1 and id: " + id, resp.isExists(), equalTo(true));
|
||||
fields[j] = resp.getFields();
|
||||
}
|
||||
compareTermVectors("field1", fields[0], fields[1]);
|
||||
}
|
||||
}
|
||||
|
||||
private void checkBestTerms(Terms terms, List<String> expectedTerms) throws IOException {
|
||||
final TermsEnum termsEnum = terms.iterator();
|
||||
List<String> bestTerms = new ArrayList<>();
|
||||
|
|
Loading…
Reference in New Issue