Fix Term Vectors with artificial docs and keyword fields (#53504) (#53550)

Previously, Term Vectors API was returning empty results for
artificial documents with keyword fields. Checking only for `string()`
on `IndexableField` is not enough, since for `KeywordFieldType`
`binaryValue()` must be used instead.

Fixes #53494

(cherry picked from commit 1fc3fe3d32f41eab2101c0536751b7c47e63cc48)
This commit is contained in:
Marios Trivyzas 2020-03-13 19:26:14 +01:00 committed by GitHub
parent 690099553c
commit b6c94fd73e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 52 additions and 30 deletions

View File

@ -121,24 +121,6 @@ public abstract class ParseContext implements Iterable<ParseContext.Document>{
return f.toArray(new IndexableField[f.size()]);
}
/**
* Returns an array of values of the field specified as the method parameter.
* This method returns an empty array when there are no
* matching fields. It never returns null.
* If you want the actual numeric field instances back, use {@link #getFields}.
* @param name the name of the field
* @return a <code>String[]</code> of field values
*/
public final String[] getValues(String name) {
List<String> result = new ArrayList<>();
for (IndexableField field : fields) {
if (field.name().equals(name) && field.stringValue() != null) {
result.add(field.stringValue());
}
}
return result.toArray(new String[result.size()]);
}
public IndexableField getField(String name) {
for (IndexableField field : fields) {
if (field.name().equals(name)) {

View File

@ -44,6 +44,7 @@ import org.elasticsearch.index.engine.Engine;
import org.elasticsearch.index.get.GetResult;
import org.elasticsearch.index.mapper.DocumentMapperForType;
import org.elasticsearch.index.mapper.IdFieldMapper;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.ParseContext;
@ -56,6 +57,7 @@ import org.elasticsearch.index.shard.IndexShard;
import org.elasticsearch.search.dfs.AggregatedDfs;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
@ -316,7 +318,7 @@ public class TermVectorsService {
else {
seenFields.add(field.name());
}
String[] values = doc.getValues(field.name());
String[] values = getValues(doc.getFields(field.name()));
documentFields.add(new DocumentField(field.name(), Arrays.asList((Object[]) values)));
}
return generateTermVectors(indexShard,
@ -324,6 +326,25 @@ public class TermVectorsService {
request.offsets(), request.perFieldAnalyzer(), seenFields);
}
/**
* Returns an array of values of the field specified as the method parameter.
* This method returns an empty array when there are no
* matching fields. It never returns null.
* @param fields The <code>IndexableField</code> to get the values from
* @return a <code>String[]</code> of field values
*/
public static String[] getValues(IndexableField[] fields) {
List<String> result = new ArrayList<>();
for (IndexableField field : fields) {
if (field.fieldType() instanceof KeywordFieldMapper.KeywordFieldType) {
result.add(field.binaryValue().utf8ToString());
} else if (field.fieldType() instanceof StringFieldType) {
result.add(field.stringValue());
}
}
return result.toArray(new String[0]);
}
private static ParsedDocument parseDocument(IndexShard indexShard, String index, String type, BytesReference doc,
XContentType xContentType, String routing) {
MapperService mapperService = indexShard.mapperService();

View File

@ -30,6 +30,7 @@ import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.IndexService;
import org.elasticsearch.index.mapper.MapperService.MergeReason;
import org.elasticsearch.index.termvectors.TermVectorsService;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.test.ESSingleNodeTestCase;
import org.elasticsearch.test.InternalSettingsPlugin;
@ -202,7 +203,7 @@ public class DateFieldMapperTests extends ESSingleNodeTestCase {
IndexableField[] fields = doc.rootDoc().getFields("field");
assertEquals(0, fields.length);
assertArrayEquals(new String[] { "field" }, doc.rootDoc().getValues("_ignored"));
assertArrayEquals(new String[] { "field" }, TermVectorsService.getValues(doc.rootDoc().getFields("_ignored")));
}
public void testChangeFormat() throws IOException {

View File

@ -154,11 +154,15 @@ public class DocumentParserTests extends ESSingleNodeTestCase {
.endObject());
ParsedDocument doc = mapper.parse(new SourceToParse("test", "type", "1", bytes, XContentType.JSON));
assertNull(doc.dynamicMappingsUpdate()); // no update!
String[] values = doc.rootDoc().getValues("foo.bar.baz");
assertEquals(3, values.length);
assertEquals("123", values[0]);
assertEquals("456", values[1]);
assertEquals("789", values[2]);
IndexableField[] fields = doc.rootDoc().getFields("foo.bar.baz");
assertEquals(6, fields.length);
assertEquals(123, fields[0].numericValue());
assertEquals("123", fields[1].stringValue());
assertEquals(456, fields[2].numericValue());
assertEquals("456", fields[3].stringValue());
assertEquals(789, fields[4].numericValue());
assertEquals("789", fields[5].stringValue());
}
public void testDotsWithExistingNestedMapper() throws Exception {

View File

@ -20,6 +20,7 @@
package org.elasticsearch.index.mapper;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexableField;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.compress.CompressedXContent;
@ -27,8 +28,10 @@ import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.test.ESSingleNodeTestCase;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
@ -48,8 +51,14 @@ public class FieldNamesFieldMapperTests extends ESSingleNodeTestCase {
}
void assertFieldNames(Set<String> expected, ParsedDocument doc) {
String[] got = doc.rootDoc().getValues("_field_names");
assertEquals(expected, set(got));
IndexableField[] fields = doc.rootDoc().getFields("_field_names");
List<String> result = new ArrayList<>();
for (IndexableField field : fields) {
if (field.fieldType() instanceof FieldNamesFieldMapper.FieldNamesFieldType) {
result.add(field.stringValue());
}
}
assertEquals(expected, set(result.toArray(new String[0])));
}
public void testExtractFieldNames() {

View File

@ -32,6 +32,7 @@ import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.IndexService;
import org.elasticsearch.index.termvectors.TermVectorsService;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.test.ESSingleNodeTestCase;
import org.elasticsearch.test.InternalSettingsPlugin;
@ -194,7 +195,7 @@ public class IpFieldMapperTests extends ESSingleNodeTestCase {
IndexableField[] fields = doc.rootDoc().getFields("field");
assertEquals(0, fields.length);
assertArrayEquals(new String[] { "field" }, doc.rootDoc().getValues("_ignored"));
assertArrayEquals(new String[] { "field" }, TermVectorsService.getValues(doc.rootDoc().getFields("_ignored")));
}
public void testNullValue() throws IOException {

View File

@ -36,6 +36,7 @@ import org.elasticsearch.index.IndexService;
import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
import org.elasticsearch.index.analysis.TokenizerFactory;
import org.elasticsearch.index.mapper.MapperService.MergeReason;
import org.elasticsearch.index.termvectors.TermVectorsService;
import org.elasticsearch.indices.analysis.AnalysisModule;
import org.elasticsearch.plugins.AnalysisPlugin;
import org.elasticsearch.plugins.Plugin;
@ -128,6 +129,9 @@ public class KeywordFieldMapperTests extends ESSingleNodeTestCase {
fieldType = fields[1].fieldType();
assertThat(fieldType.indexOptions(), equalTo(IndexOptions.NONE));
assertEquals(DocValuesType.SORTED_SET, fieldType.docValuesType());
// used by TermVectorsService
assertArrayEquals(new String[] { "1234" }, TermVectorsService.getValues(doc.rootDoc().getFields("field")));
}
public void testIgnoreAbove() throws IOException {

View File

@ -20,7 +20,6 @@
package org.elasticsearch.index.mapper;
import com.carrotsearch.randomizedtesting.annotations.Timeout;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.IndexableField;
import org.elasticsearch.common.Strings;
@ -32,6 +31,7 @@ import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.mapper.NumberFieldMapper.NumberType;
import org.elasticsearch.index.mapper.NumberFieldTypeTests.OutOfRangeSpec;
import org.elasticsearch.index.termvectors.TermVectorsService;
import java.io.ByteArrayInputStream;
import java.io.IOException;
@ -250,7 +250,7 @@ public class NumberFieldMapperTests extends AbstractNumericFieldMapperTestCase {
IndexableField[] fields = doc.rootDoc().getFields("field");
assertEquals(0, fields.length);
assertArrayEquals(new String[] { "field" }, doc.rootDoc().getValues("_ignored"));
assertArrayEquals(new String[] { "field" }, TermVectorsService.getValues(doc.rootDoc().getFields("_ignored")));
}
}
}