From d404ee35333a9aa16a96c9d1c991bbee17f95005 Mon Sep 17 00:00:00 2001 From: gameldar Date: Thu, 22 Dec 2016 00:18:33 +0800 Subject: [PATCH 1/3] Add ingest-attachment-with-arrays section to ingest attachments doc Added a new section detailing how to use the attachment processor within an array. This reverts commit #22296 and instead links to the foreach processor. --- docs/plugins/ingest-attachment.asciidoc | 52 +++++++++++++++++++++- docs/reference/ingest/ingest-node.asciidoc | 4 ++ 2 files changed, 55 insertions(+), 1 deletion(-) diff --git a/docs/plugins/ingest-attachment.asciidoc b/docs/plugins/ingest-attachment.asciidoc index 44ff41a6dfb..1471fbdae0d 100644 --- a/docs/plugins/ingest-attachment.asciidoc +++ b/docs/plugins/ingest-attachment.asciidoc @@ -106,4 +106,54 @@ NOTE: Extracting contents from binary data is a resource intensive operation and consumes a lot of resources. It is highly recommended to run pipelines using this processor in a dedicated ingest node. -NOTE: To process an array of attachments the {ref}/foreach-processor.html[foreach processor] is required. +[[ingest-attachment-with-arrays]] +==== Using the Attachment Processor with arrays + +To use the attachment processor within an array of attachments the +{ref}/foreach-processor.html[foreach processor] is required. This +enables the attachment processor to be run on the individual elements +of the array. + +For example, given the following source: + +[source,js] +-------------------------------------------------- +{ + "attachments" : [ + { + "filename" : "ipsum.txt", + "data" : "dGhpcyBpcwpqdXN0IHNvbWUgdGV4dAo=" + }, + { + "filename" : "test.txt", + "data" : "VGhpcyBpcyBhIHRlc3QK" + } + ] +} +-------------------------------------------------- + +In this case, we want to process the data field in each element +of the attachments field and insert +the properties into the document so the following `foreach` +processor is used: + +[source,js] +-------------------------------------------------- +{ + "foreach": { + "field": "attachments", + "processor": { + "attachment": { + "target_field": "_ingest._value.attachment", + "field": "_ingest._value.data" + } + } + } +} +-------------------------------------------------- +Note that the `target_field` needs to be set, otherwise the +default value is used which is a top level field `attachment`. The +properties on this top level field will contain the value of the +first attachment only. However, by specifying the +`target_field` on to a value on `_ingest._value` it will correctly +associate the properties with the correct attachment. diff --git a/docs/reference/ingest/ingest-node.asciidoc b/docs/reference/ingest/ingest-node.asciidoc index 8b6a4478115..c34904e92fd 100644 --- a/docs/reference/ingest/ingest-node.asciidoc +++ b/docs/reference/ingest/ingest-node.asciidoc @@ -1166,6 +1166,10 @@ In this example, if the `remove` processor does fail, then the array elements that have been processed thus far will be updated. +Another advanced example can be found in {plugins}/using-ingest-attachment.html[attachment processor documentation]. + + + [[grok-processor]] === Grok Processor From e3eb36388221649935a1f3e722103013f111c309 Mon Sep 17 00:00:00 2001 From: Gameldar Date: Thu, 22 Dec 2016 20:52:08 +0800 Subject: [PATCH 2/3] Link directly to the attachments in arrays section The link should be made to the relevant section of the ingest attachments documentation, rather than the top of the page. --- docs/reference/ingest/ingest-node.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/ingest/ingest-node.asciidoc b/docs/reference/ingest/ingest-node.asciidoc index c34904e92fd..d6acabd87f1 100644 --- a/docs/reference/ingest/ingest-node.asciidoc +++ b/docs/reference/ingest/ingest-node.asciidoc @@ -1166,7 +1166,7 @@ In this example, if the `remove` processor does fail, then the array elements that have been processed thus far will be updated. -Another advanced example can be found in {plugins}/using-ingest-attachment.html[attachment processor documentation]. +Another advanced example can be found in the {plugins}/ingest-attachment-with-arrays.html[attachment processor documentation]. From b100f1850564cdc22bbac34660a6bc0039eb57cd Mon Sep 17 00:00:00 2001 From: gameldar Date: Fri, 23 Dec 2016 13:48:44 +0800 Subject: [PATCH 3/3] Fix the ingest attachment array examples Fix up the ingest attachment array handling example so they are full examples and validated by the build system correctly. --- docs/plugins/ingest-attachment.asciidoc | 73 +++++++++++++++++++++++-- 1 file changed, 67 insertions(+), 6 deletions(-) diff --git a/docs/plugins/ingest-attachment.asciidoc b/docs/plugins/ingest-attachment.asciidoc index 1471fbdae0d..3aae6c0e146 100644 --- a/docs/plugins/ingest-attachment.asciidoc +++ b/docs/plugins/ingest-attachment.asciidoc @@ -131,6 +131,7 @@ For example, given the following source: ] } -------------------------------------------------- +// NOTCONSOLE In this case, we want to process the data field in each element of the attachments field and insert @@ -139,18 +140,78 @@ processor is used: [source,js] -------------------------------------------------- +PUT _ingest/pipeline/attachment { - "foreach": { - "field": "attachments", - "processor": { - "attachment": { - "target_field": "_ingest._value.attachment", - "field": "_ingest._value.data" + "description" : "Extract attachment information from arrays", + "processors" : [ + { + "foreach": { + "field": "attachments", + "processor": { + "attachment": { + "target_field": "_ingest._value.attachment", + "field": "_ingest._value.data" + } + } } } + ] +} +PUT my_index/my_type/my_id?pipeline=attachment +{ + "attachments" : [ + { + "filename" : "ipsum.txt", + "data" : "dGhpcyBpcwpqdXN0IHNvbWUgdGV4dAo=" + }, + { + "filename" : "test.txt", + "data" : "VGhpcyBpcyBhIHRlc3QK" + } + ] +} +GET my_index/my_type/my_id +-------------------------------------------------- +// CONSOLE + +Returns this: +[source,js] +-------------------------------------------------- +{ + "_index" : "my_index", + "_type" : "my_type", + "_id" : "my_id", + "_version" : 1, + "found" : true, + "_source" : { + "attachments" : [ + { + "filename" : "ipsum.txt", + "data" : "dGhpcyBpcwpqdXN0IHNvbWUgdGV4dAo=", + "attachment" : { + "content_type" : "text/plain; charset=ISO-8859-1", + "language" : "en", + "content" : "this is\njust some text", + "content_length" : 24 + } + }, + { + "filename" : "test.txt", + "data" : "VGhpcyBpcyBhIHRlc3QK", + "attachment" : { + "content_type" : "text/plain; charset=ISO-8859-1", + "language" : "en", + "content" : "This is a test", + "content_length" : 16 + } + } + ] } } -------------------------------------------------- +// TESTRESPONSE + + Note that the `target_field` needs to be set, otherwise the default value is used which is a top level field `attachment`. The properties on this top level field will contain the value of the