Change type of ingest doc meta-data field 'TIMESTAMP' to `Date` (#22234)

With this commit we change the data type of the 'TIMESTAMP'
meta-data field from a formatted date string to a plain
`java.util.Date` instance. The main reason for this change is
that our benchmarks have indicated that this contributes
significantly to the time spent in the ingest pipeline.

The overhead in terms of indexing throughput of the ingest
pipeline is about 15% and breaks down roughly as follows:

* 5% overhead caused by the conversion from `XContent` -> `Map`
* 5% overhead caused by the timestamp formatting
* 5% overhead caused by the conversion `Map` -> `XContent`

Relates #22074
This commit is contained in:
Daniel Mitterdorfer 2016-12-19 09:10:58 +01:00 committed by GitHub
parent ccfeac8dd5
commit 6327e35414
4 changed files with 19 additions and 20 deletions

View File

@ -27,18 +27,14 @@ import org.elasticsearch.index.mapper.RoutingFieldMapper;
import org.elasticsearch.index.mapper.SourceFieldMapper; import org.elasticsearch.index.mapper.SourceFieldMapper;
import org.elasticsearch.index.mapper.TypeFieldMapper; import org.elasticsearch.index.mapper.TypeFieldMapper;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Base64; import java.util.Base64;
import java.util.Date; import java.util.Date;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Locale;
import java.util.Map; import java.util.Map;
import java.util.Objects; import java.util.Objects;
import java.util.TimeZone;
/** /**
* Represents a single document being captured before indexing and holds the source and metadata (like id, type and index). * Represents a single document being captured before indexing and holds the source and metadata (like id, type and index).
@ -68,9 +64,7 @@ public final class IngestDocument {
} }
this.ingestMetadata = new HashMap<>(); this.ingestMetadata = new HashMap<>();
DateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSZZ", Locale.ROOT); this.ingestMetadata.put(TIMESTAMP, new Date());
df.setTimeZone(TimeZone.getTimeZone("UTC"));
this.ingestMetadata.put(TIMESTAMP, df.format(new Date()));
} }
/** /**
@ -595,6 +589,8 @@ public final class IngestDocument {
value instanceof Long || value instanceof Float || value instanceof Long || value instanceof Float ||
value instanceof Double || value instanceof Boolean) { value instanceof Double || value instanceof Boolean) {
return value; return value;
} else if (value instanceof Date) {
return ((Date) value).clone();
} else { } else {
throw new IllegalArgumentException("unexpected value type [" + value.getClass() + "]"); throw new IllegalArgumentException("unexpected value type [" + value.getClass() + "]");
} }

View File

@ -22,21 +22,17 @@ package org.elasticsearch.ingest;
import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.ESTestCase;
import org.junit.Before; import org.junit.Before;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
import java.util.Date; import java.util.Date;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Locale;
import java.util.Map; import java.util.Map;
import static org.elasticsearch.ingest.IngestDocumentMatcher.assertIngestDocument; import static org.elasticsearch.ingest.IngestDocumentMatcher.assertIngestDocument;
import static org.hamcrest.Matchers.both; import static org.hamcrest.Matchers.both;
import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.endsWith;
import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.greaterThanOrEqualTo; import static org.hamcrest.Matchers.greaterThanOrEqualTo;
import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.instanceOf;
@ -48,13 +44,14 @@ import static org.hamcrest.Matchers.sameInstance;
public class IngestDocumentTests extends ESTestCase { public class IngestDocumentTests extends ESTestCase {
private static final Date BOGUS_TIMESTAMP = new Date(0L);
private IngestDocument ingestDocument; private IngestDocument ingestDocument;
@Before @Before
public void setIngestDocument() { public void setIngestDocument() {
Map<String, Object> document = new HashMap<>(); Map<String, Object> document = new HashMap<>();
Map<String, Object> ingestMap = new HashMap<>(); Map<String, Object> ingestMap = new HashMap<>();
ingestMap.put("timestamp", "bogus_timestamp"); ingestMap.put("timestamp", BOGUS_TIMESTAMP);
document.put("_ingest", ingestMap); document.put("_ingest", ingestMap);
document.put("foo", "bar"); document.put("foo", "bar");
document.put("int", 123); document.put("int", 123);
@ -86,9 +83,9 @@ public class IngestDocumentTests extends ESTestCase {
assertThat(ingestDocument.getFieldValue("_index", String.class), equalTo("index")); assertThat(ingestDocument.getFieldValue("_index", String.class), equalTo("index"));
assertThat(ingestDocument.getFieldValue("_type", String.class), equalTo("type")); assertThat(ingestDocument.getFieldValue("_type", String.class), equalTo("type"));
assertThat(ingestDocument.getFieldValue("_id", String.class), equalTo("id")); assertThat(ingestDocument.getFieldValue("_id", String.class), equalTo("id"));
assertThat(ingestDocument.getFieldValue("_ingest.timestamp", String.class), assertThat(ingestDocument.getFieldValue("_ingest.timestamp", Date.class),
both(notNullValue()).and(not(equalTo("bogus_timestamp")))); both(notNullValue()).and(not(equalTo(BOGUS_TIMESTAMP))));
assertThat(ingestDocument.getFieldValue("_source._ingest.timestamp", String.class), equalTo("bogus_timestamp")); assertThat(ingestDocument.getFieldValue("_source._ingest.timestamp", Date.class), equalTo(BOGUS_TIMESTAMP));
} }
public void testGetSourceObject() { public void testGetSourceObject() {
@ -972,11 +969,8 @@ public class IngestDocumentTests extends ESTestCase {
long before = System.currentTimeMillis(); long before = System.currentTimeMillis();
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random()); IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random());
long after = System.currentTimeMillis(); long after = System.currentTimeMillis();
String timestampString = (String) ingestDocument.getIngestMetadata().get("timestamp"); Date timestamp = (Date) ingestDocument.getIngestMetadata().get(IngestDocument.TIMESTAMP);
assertThat(timestampString, notNullValue()); assertThat(timestamp, notNullValue());
assertThat(timestampString, endsWith("+0000"));
DateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSZZ", Locale.ROOT);
Date timestamp = df.parse(timestampString);
assertThat(timestamp.getTime(), greaterThanOrEqualTo(before)); assertThat(timestamp.getTime(), greaterThanOrEqualTo(before));
assertThat(timestamp.getTime(), lessThanOrEqualTo(after)); assertThat(timestamp.getTime(), lessThanOrEqualTo(after));
} }

View File

@ -35,6 +35,7 @@ way to reindex old indices is to use the `reindex` API.
* <<breaking_60_plugins_changes>> * <<breaking_60_plugins_changes>>
* <<breaking_60_indices_changes>> * <<breaking_60_indices_changes>>
* <<breaking_60_scripting_changes>> * <<breaking_60_scripting_changes>>
* <<breaking_60_ingest_changes>>
include::migrate_6_0/cat.asciidoc[] include::migrate_6_0/cat.asciidoc[]
@ -57,3 +58,5 @@ include::migrate_6_0/plugins.asciidoc[]
include::migrate_6_0/indices.asciidoc[] include::migrate_6_0/indices.asciidoc[]
include::migrate_6_0/scripting.asciidoc[] include::migrate_6_0/scripting.asciidoc[]
include::migrate_6_0/ingest.asciidoc[]

View File

@ -0,0 +1,6 @@
[[breaking_60_ingest_changes]]
=== Ingest changes
==== Timestamp meta-data field type has changed
The type of the "timestamp" meta-data field has changed from `java.lang.String` to `java.util.Date`.