Deduplicate `_field_names`. (#26550)
This is a minor optimization that should save some utf8 conversions and indexing.
This commit is contained in:
parent
a6d57f50f6
commit
2bc3eeccde
|
@ -258,9 +258,19 @@ public class FieldNamesFieldMapper extends MetadataFieldMapper {
|
|||
return;
|
||||
}
|
||||
for (ParseContext.Document document : context.docs()) {
|
||||
final List<String> paths = new ArrayList<>();
|
||||
final List<String> paths = new ArrayList<>(document.getFields().size());
|
||||
String previousPath = ""; // used as a sentinel - field names can't be empty
|
||||
for (IndexableField field : document.getFields()) {
|
||||
paths.add(field.name());
|
||||
final String path = field.name();
|
||||
if (path.equals(previousPath)) {
|
||||
// Sometimes mappers create multiple Lucene fields, eg. one for indexing,
|
||||
// one for doc values and one for storing. Deduplicating is not required
|
||||
// for correctness but this simple check helps save utf-8 conversions and
|
||||
// gives Lucene fewer values to deal with.
|
||||
continue;
|
||||
}
|
||||
paths.add(path);
|
||||
previousPath = path;
|
||||
}
|
||||
for (String path : paths) {
|
||||
for (String fieldName : extractFieldNames(path)) {
|
||||
|
|
|
@ -38,6 +38,7 @@ import java.util.Arrays;
|
|||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.SortedSet;
|
||||
import java.util.TreeSet;
|
||||
import java.util.function.Supplier;
|
||||
|
@ -56,7 +57,7 @@ public class FieldNamesFieldMapperTests extends ESSingleNodeTestCase {
|
|||
return new TreeSet<>(Arrays.asList(values));
|
||||
}
|
||||
|
||||
void assertFieldNames(SortedSet<String> expected, ParsedDocument doc) {
|
||||
void assertFieldNames(Set<String> expected, ParsedDocument doc) {
|
||||
String[] got = doc.rootDoc().getValues("_field_names");
|
||||
assertEquals(expected, set(got));
|
||||
}
|
||||
|
@ -120,6 +121,26 @@ public class FieldNamesFieldMapperTests extends ESSingleNodeTestCase {
|
|||
assertFieldNames(set("field", "field.keyword", "_id", "_version", "_seq_no", "_primary_term", "_source"), doc);
|
||||
}
|
||||
|
||||
public void testDedup() throws Exception {
|
||||
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("_field_names").field("enabled", true).endObject()
|
||||
.endObject().endObject().string();
|
||||
DocumentMapper docMapper = createIndex("test").mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping));
|
||||
FieldNamesFieldMapper fieldNamesMapper = docMapper.metadataMapper(FieldNamesFieldMapper.class);
|
||||
assertTrue(fieldNamesMapper.fieldType().isEnabled());
|
||||
|
||||
ParsedDocument doc = docMapper.parse(SourceToParse.source("test", "type", "1", XContentFactory.jsonBuilder()
|
||||
.startObject()
|
||||
.field("field", 3) // will create 2 lucene fields under the hood: index and doc values
|
||||
.endObject()
|
||||
.bytes(),
|
||||
XContentType.JSON));
|
||||
|
||||
Set<String> fields = set("field", "_id", "_version", "_seq_no", "_primary_term", "_source");
|
||||
assertFieldNames(fields, doc);
|
||||
assertEquals(fields.size(), doc.rootDoc().getValues("_field_names").length);
|
||||
}
|
||||
|
||||
public void testDisabled() throws Exception {
|
||||
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("_field_names").field("enabled", false).endObject()
|
||||
|
|
Loading…
Reference in New Issue