Deduplicate `_field_names`. (#26550)

This is a minor optimization that should save some utf8 conversions and indexing.
This commit is contained in:
Adrien Grand 2017-09-11 10:57:08 +02:00 committed by GitHub
parent a6d57f50f6
commit 2bc3eeccde
2 changed files with 34 additions and 3 deletions

View File

@ -258,9 +258,19 @@ public class FieldNamesFieldMapper extends MetadataFieldMapper {
return;
}
for (ParseContext.Document document : context.docs()) {
final List<String> paths = new ArrayList<>();
final List<String> paths = new ArrayList<>(document.getFields().size());
String previousPath = ""; // used as a sentinel - field names can't be empty
for (IndexableField field : document.getFields()) {
paths.add(field.name());
final String path = field.name();
if (path.equals(previousPath)) {
// Sometimes mappers create multiple Lucene fields, eg. one for indexing,
// one for doc values and one for storing. Deduplicating is not required
// for correctness but this simple check helps save utf-8 conversions and
// gives Lucene fewer values to deal with.
continue;
}
paths.add(path);
previousPath = path;
}
for (String path : paths) {
for (String fieldName : extractFieldNames(path)) {

View File

@ -38,6 +38,7 @@ import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.function.Supplier;
@ -56,7 +57,7 @@ public class FieldNamesFieldMapperTests extends ESSingleNodeTestCase {
return new TreeSet<>(Arrays.asList(values));
}
void assertFieldNames(SortedSet<String> expected, ParsedDocument doc) {
void assertFieldNames(Set<String> expected, ParsedDocument doc) {
String[] got = doc.rootDoc().getValues("_field_names");
assertEquals(expected, set(got));
}
@ -120,6 +121,26 @@ public class FieldNamesFieldMapperTests extends ESSingleNodeTestCase {
assertFieldNames(set("field", "field.keyword", "_id", "_version", "_seq_no", "_primary_term", "_source"), doc);
}
public void testDedup() throws Exception {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("_field_names").field("enabled", true).endObject()
.endObject().endObject().string();
DocumentMapper docMapper = createIndex("test").mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping));
FieldNamesFieldMapper fieldNamesMapper = docMapper.metadataMapper(FieldNamesFieldMapper.class);
assertTrue(fieldNamesMapper.fieldType().isEnabled());
ParsedDocument doc = docMapper.parse(SourceToParse.source("test", "type", "1", XContentFactory.jsonBuilder()
.startObject()
.field("field", 3) // will create 2 lucene fields under the hood: index and doc values
.endObject()
.bytes(),
XContentType.JSON));
Set<String> fields = set("field", "_id", "_version", "_seq_no", "_primary_term", "_source");
assertFieldNames(fields, doc);
assertEquals(fields.size(), doc.rootDoc().getValues("_field_names").length);
}
public void testDisabled() throws Exception {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("_field_names").field("enabled", false).endObject()