From 6327e35414c92fe911701b2d7163a330b7370e33 Mon Sep 17 00:00:00 2001 From: Daniel Mitterdorfer Date: Mon, 19 Dec 2016 09:10:58 +0100 Subject: [PATCH] Change type of ingest doc meta-data field 'TIMESTAMP' to `Date` (#22234) With this commit we change the data type of the 'TIMESTAMP' meta-data field from a formatted date string to a plain `java.util.Date` instance. The main reason for this change is that our benchmarks have indicated that this contributes significantly to the time spent in the ingest pipeline. The overhead in terms of indexing throughput of the ingest pipeline is about 15% and breaks down roughly as follows: * 5% overhead caused by the conversion from `XContent` -> `Map` * 5% overhead caused by the timestamp formatting * 5% overhead caused by the conversion `Map` -> `XContent` Relates #22074 --- .../elasticsearch/ingest/IngestDocument.java | 10 +++------- .../ingest/IngestDocumentTests.java | 20 +++++++------------ docs/reference/migration/migrate_6_0.asciidoc | 3 +++ .../migration/migrate_6_0/ingest.asciidoc | 6 ++++++ 4 files changed, 19 insertions(+), 20 deletions(-) create mode 100644 docs/reference/migration/migrate_6_0/ingest.asciidoc diff --git a/core/src/main/java/org/elasticsearch/ingest/IngestDocument.java b/core/src/main/java/org/elasticsearch/ingest/IngestDocument.java index edb92b6e837..eaae1a3e881 100644 --- a/core/src/main/java/org/elasticsearch/ingest/IngestDocument.java +++ b/core/src/main/java/org/elasticsearch/ingest/IngestDocument.java @@ -27,18 +27,14 @@ import org.elasticsearch.index.mapper.RoutingFieldMapper; import org.elasticsearch.index.mapper.SourceFieldMapper; import org.elasticsearch.index.mapper.TypeFieldMapper; -import java.text.DateFormat; -import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Arrays; import java.util.Base64; import java.util.Date; import java.util.HashMap; import java.util.List; -import java.util.Locale; import java.util.Map; import java.util.Objects; -import java.util.TimeZone; /** * Represents a single document being captured before indexing and holds the source and metadata (like id, type and index). @@ -68,9 +64,7 @@ public final class IngestDocument { } this.ingestMetadata = new HashMap<>(); - DateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSZZ", Locale.ROOT); - df.setTimeZone(TimeZone.getTimeZone("UTC")); - this.ingestMetadata.put(TIMESTAMP, df.format(new Date())); + this.ingestMetadata.put(TIMESTAMP, new Date()); } /** @@ -595,6 +589,8 @@ public final class IngestDocument { value instanceof Long || value instanceof Float || value instanceof Double || value instanceof Boolean) { return value; + } else if (value instanceof Date) { + return ((Date) value).clone(); } else { throw new IllegalArgumentException("unexpected value type [" + value.getClass() + "]"); } diff --git a/core/src/test/java/org/elasticsearch/ingest/IngestDocumentTests.java b/core/src/test/java/org/elasticsearch/ingest/IngestDocumentTests.java index e16be95d2e6..1bd5676f474 100644 --- a/core/src/test/java/org/elasticsearch/ingest/IngestDocumentTests.java +++ b/core/src/test/java/org/elasticsearch/ingest/IngestDocumentTests.java @@ -22,21 +22,17 @@ package org.elasticsearch.ingest; import org.elasticsearch.test.ESTestCase; import org.junit.Before; -import java.text.DateFormat; -import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Date; import java.util.HashMap; import java.util.List; -import java.util.Locale; import java.util.Map; import static org.elasticsearch.ingest.IngestDocumentMatcher.assertIngestDocument; import static org.hamcrest.Matchers.both; import static org.hamcrest.Matchers.containsString; -import static org.hamcrest.Matchers.endsWith; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThanOrEqualTo; import static org.hamcrest.Matchers.instanceOf; @@ -48,13 +44,14 @@ import static org.hamcrest.Matchers.sameInstance; public class IngestDocumentTests extends ESTestCase { + private static final Date BOGUS_TIMESTAMP = new Date(0L); private IngestDocument ingestDocument; @Before public void setIngestDocument() { Map document = new HashMap<>(); Map ingestMap = new HashMap<>(); - ingestMap.put("timestamp", "bogus_timestamp"); + ingestMap.put("timestamp", BOGUS_TIMESTAMP); document.put("_ingest", ingestMap); document.put("foo", "bar"); document.put("int", 123); @@ -86,9 +83,9 @@ public class IngestDocumentTests extends ESTestCase { assertThat(ingestDocument.getFieldValue("_index", String.class), equalTo("index")); assertThat(ingestDocument.getFieldValue("_type", String.class), equalTo("type")); assertThat(ingestDocument.getFieldValue("_id", String.class), equalTo("id")); - assertThat(ingestDocument.getFieldValue("_ingest.timestamp", String.class), - both(notNullValue()).and(not(equalTo("bogus_timestamp")))); - assertThat(ingestDocument.getFieldValue("_source._ingest.timestamp", String.class), equalTo("bogus_timestamp")); + assertThat(ingestDocument.getFieldValue("_ingest.timestamp", Date.class), + both(notNullValue()).and(not(equalTo(BOGUS_TIMESTAMP)))); + assertThat(ingestDocument.getFieldValue("_source._ingest.timestamp", Date.class), equalTo(BOGUS_TIMESTAMP)); } public void testGetSourceObject() { @@ -972,11 +969,8 @@ public class IngestDocumentTests extends ESTestCase { long before = System.currentTimeMillis(); IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random()); long after = System.currentTimeMillis(); - String timestampString = (String) ingestDocument.getIngestMetadata().get("timestamp"); - assertThat(timestampString, notNullValue()); - assertThat(timestampString, endsWith("+0000")); - DateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSZZ", Locale.ROOT); - Date timestamp = df.parse(timestampString); + Date timestamp = (Date) ingestDocument.getIngestMetadata().get(IngestDocument.TIMESTAMP); + assertThat(timestamp, notNullValue()); assertThat(timestamp.getTime(), greaterThanOrEqualTo(before)); assertThat(timestamp.getTime(), lessThanOrEqualTo(after)); } diff --git a/docs/reference/migration/migrate_6_0.asciidoc b/docs/reference/migration/migrate_6_0.asciidoc index f1712d29cf9..abc476a7d1b 100644 --- a/docs/reference/migration/migrate_6_0.asciidoc +++ b/docs/reference/migration/migrate_6_0.asciidoc @@ -35,6 +35,7 @@ way to reindex old indices is to use the `reindex` API. * <> * <> * <> +* <> include::migrate_6_0/cat.asciidoc[] @@ -57,3 +58,5 @@ include::migrate_6_0/plugins.asciidoc[] include::migrate_6_0/indices.asciidoc[] include::migrate_6_0/scripting.asciidoc[] + +include::migrate_6_0/ingest.asciidoc[] diff --git a/docs/reference/migration/migrate_6_0/ingest.asciidoc b/docs/reference/migration/migrate_6_0/ingest.asciidoc new file mode 100644 index 00000000000..db2caabe43a --- /dev/null +++ b/docs/reference/migration/migrate_6_0/ingest.asciidoc @@ -0,0 +1,6 @@ +[[breaking_60_ingest_changes]] +=== Ingest changes + +==== Timestamp meta-data field type has changed + +The type of the "timestamp" meta-data field has changed from `java.lang.String` to `java.util.Date`. \ No newline at end of file