Fix term vectors generator with keyword and normalizer (#27608)
This change applies the normalizer defined on the field when building term vectors dynamically on a keyword field. Fixes #27320
This commit is contained in:
parent
da50fa4540
commit
e0b1a6544d
|
@ -217,7 +217,12 @@ public class TermVectorsService {
|
||||||
if (perFieldAnalyzer != null && perFieldAnalyzer.containsKey(field)) {
|
if (perFieldAnalyzer != null && perFieldAnalyzer.containsKey(field)) {
|
||||||
analyzer = mapperService.getIndexAnalyzers().get(perFieldAnalyzer.get(field).toString());
|
analyzer = mapperService.getIndexAnalyzers().get(perFieldAnalyzer.get(field).toString());
|
||||||
} else {
|
} else {
|
||||||
analyzer = mapperService.fullName(field).indexAnalyzer();
|
MappedFieldType fieldType = mapperService.fullName(field);
|
||||||
|
if (fieldType instanceof KeywordFieldMapper.KeywordFieldType) {
|
||||||
|
analyzer = ((KeywordFieldMapper.KeywordFieldType) fieldType).normalizer();
|
||||||
|
} else {
|
||||||
|
analyzer = fieldType.indexAnalyzer();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (analyzer == null) {
|
if (analyzer == null) {
|
||||||
analyzer = mapperService.getIndexAnalyzers().getDefaultIndexAnalyzer();
|
analyzer = mapperService.getIndexAnalyzers().getDefaultIndexAnalyzer();
|
||||||
|
|
|
@ -1025,6 +1025,51 @@ public class GetTermVectorsIT extends AbstractTermVectorsTestCase {
|
||||||
assertEquals("expected to find term statistics in exactly one shard!", 2, sumDocFreq);
|
assertEquals("expected to find term statistics in exactly one shard!", 2, sumDocFreq);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testWithKeywordAndNormalizer() throws IOException, ExecutionException, InterruptedException {
|
||||||
|
// setup indices
|
||||||
|
String[] indexNames = new String[] {"with_tv", "without_tv"};
|
||||||
|
Settings.Builder builder = Settings.builder()
|
||||||
|
.put(indexSettings())
|
||||||
|
.put("index.analysis.analyzer.my_analyzer.tokenizer", "keyword")
|
||||||
|
.putList("index.analysis.analyzer.my_analyzer.filter", "lowercase")
|
||||||
|
.putList("index.analysis.normalizer.my_normalizer.filter", "lowercase");
|
||||||
|
assertAcked(prepareCreate(indexNames[0]).setSettings(builder.build())
|
||||||
|
.addMapping("type1", "field1", "type=text,term_vector=with_positions_offsets,analyzer=my_analyzer"));
|
||||||
|
assertAcked(prepareCreate(indexNames[1]).setSettings(builder.build())
|
||||||
|
.addMapping("type1", "field1", "type=keyword,normalizer=my_normalizer"));
|
||||||
|
ensureGreen();
|
||||||
|
|
||||||
|
// index documents with and without term vectors
|
||||||
|
String[] content = new String[] { "Hello World", "hello world", "HELLO WORLD" };
|
||||||
|
|
||||||
|
List<IndexRequestBuilder> indexBuilders = new ArrayList<>();
|
||||||
|
for (String indexName : indexNames) {
|
||||||
|
for (int id = 0; id < content.length; id++) {
|
||||||
|
indexBuilders.add(client().prepareIndex()
|
||||||
|
.setIndex(indexName)
|
||||||
|
.setType("type1")
|
||||||
|
.setId(String.valueOf(id))
|
||||||
|
.setSource("field1", content[id]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
indexRandom(true, indexBuilders);
|
||||||
|
|
||||||
|
// request tvs and compare from each index
|
||||||
|
for (int id = 0; id < content.length; id++) {
|
||||||
|
Fields[] fields = new Fields[2];
|
||||||
|
for (int j = 0; j < indexNames.length; j++) {
|
||||||
|
TermVectorsResponse resp = client().prepareTermVector(indexNames[j], "type1", String.valueOf(id))
|
||||||
|
.setOffsets(true)
|
||||||
|
.setPositions(true)
|
||||||
|
.setSelectedFields("field1")
|
||||||
|
.get();
|
||||||
|
assertThat("doc with index: " + indexNames[j] + ", type1 and id: " + id, resp.isExists(), equalTo(true));
|
||||||
|
fields[j] = resp.getFields();
|
||||||
|
}
|
||||||
|
compareTermVectors("field1", fields[0], fields[1]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private void checkBestTerms(Terms terms, List<String> expectedTerms) throws IOException {
|
private void checkBestTerms(Terms terms, List<String> expectedTerms) throws IOException {
|
||||||
final TermsEnum termsEnum = terms.iterator();
|
final TermsEnum termsEnum = terms.iterator();
|
||||||
List<String> bestTerms = new ArrayList<>();
|
List<String> bestTerms = new ArrayList<>();
|
||||||
|
|
Loading…
Reference in New Issue