From a8d3053128298395fbdf3a629a5384d46288a5f2 Mon Sep 17 00:00:00 2001 From: James Rodewig <40268737+jrodewig@users.noreply.github.com> Date: Tue, 17 Nov 2020 15:15:25 -0500 Subject: [PATCH] [DOCS] Clarify ingest attachment example (#65143) (#65160) --- docs/plugins/ingest-attachment.asciidoc | 60 ++++++++++++++++--------- 1 file changed, 38 insertions(+), 22 deletions(-) diff --git a/docs/plugins/ingest-attachment.asciidoc b/docs/plugins/ingest-attachment.asciidoc index aa6100ada53..901bc19974f 100644 --- a/docs/plugins/ingest-attachment.asciidoc +++ b/docs/plugins/ingest-attachment.asciidoc @@ -30,10 +30,27 @@ include::install_remove.asciidoc[] | `ignore_missing` | no | `false` | If `true` and `field` does not exist, the processor quietly exits without modifying the document |====== -For example, this: +[discrete] +[[ingest-attachment-json-ex]] +==== Example + +If attaching files to JSON documents, you must first encode the file as a base64 +string. On Unix-like systems, you can do this using a `base64` command: + +[source,shell] +---- +base64 -in myfile.rtf +---- + +The command returns the base64-encoded string for the file. The following base64 +string is for an `.rtf` file containing the text `Lorem ipsum dolor sit amet`: +`e1xydGYxXGFuc2kNCkxvcmVtIGlwc3VtIGRvbG9yIHNpdCBhbWV0DQpccGFyIH0=`. + +Use an attachment processor to decode the string and extract the file's +properties: [source,console] --------------------------------------------------- +---- PUT _ingest/pipeline/attachment { "description" : "Extract attachment information", @@ -45,20 +62,20 @@ PUT _ingest/pipeline/attachment } ] } -PUT my-index-00001/_doc/my_id?pipeline=attachment +PUT my-index-000001/_doc/my_id?pipeline=attachment { "data": "e1xydGYxXGFuc2kNCkxvcmVtIGlwc3VtIGRvbG9yIHNpdCBhbWV0DQpccGFyIH0=" } -GET my-index-00001/_doc/my_id --------------------------------------------------- +GET my-index-000001/_doc/my_id +---- -Returns this: +The document's `attachment` object contains extracted properties for the file: [source,console-result] --------------------------------------------------- +---- { "found": true, - "_index": "my-index-00001", + "_index": "my-index-000001", "_type": "_doc", "_id": "my_id", "_version": 1, @@ -74,14 +91,13 @@ Returns this: } } } --------------------------------------------------- +---- // TESTRESPONSE[s/"_seq_no": \d+/"_seq_no" : $body._seq_no/ s/"_primary_term" : 1/"_primary_term" : $body._primary_term/] - -To specify only some fields to be extracted: +To extract only certain `attachment` fields, specify the `properties` array: [source,console] --------------------------------------------------- +---- PUT _ingest/pipeline/attachment { "description" : "Extract attachment information", @@ -94,7 +110,7 @@ PUT _ingest/pipeline/attachment } ] } --------------------------------------------------- +---- NOTE: Extracting contents from binary data is a resource intensive operation and consumes a lot of resources. It is highly recommended to run pipelines @@ -175,11 +191,11 @@ PUT _ingest/pipeline/attachment } ] } -PUT my-index-00001/_doc/my_id?pipeline=attachment +PUT my-index-000001/_doc/my_id?pipeline=attachment { "data": "e1xydGYxXGFuc2kNCkxvcmVtIGlwc3VtIGRvbG9yIHNpdCBhbWV0DQpccGFyIH0=" } -GET my-index-00001/_doc/my_id +GET my-index-000001/_doc/my_id -------------------------------------------------- Returns this: @@ -188,7 +204,7 @@ Returns this: -------------------------------------------------- { "found": true, - "_index": "my-index-00001", + "_index": "my-index-000001", "_type": "_doc", "_id": "my_id", "_version": 1, @@ -223,12 +239,12 @@ PUT _ingest/pipeline/attachment } ] } -PUT my-index-00001/_doc/my_id_2?pipeline=attachment +PUT my-index-000001/_doc/my_id_2?pipeline=attachment { "data": "e1xydGYxXGFuc2kNCkxvcmVtIGlwc3VtIGRvbG9yIHNpdCBhbWV0DQpccGFyIH0=", "max_size": 5 } -GET my-index-00001/_doc/my_id_2 +GET my-index-000001/_doc/my_id_2 -------------------------------------------------- Returns this: @@ -237,7 +253,7 @@ Returns this: -------------------------------------------------- { "found": true, - "_index": "my-index-00001", + "_index": "my-index-000001", "_type": "_doc", "_id": "my_id_2", "_version": 1, @@ -309,7 +325,7 @@ PUT _ingest/pipeline/attachment } ] } -PUT my-index-00001/_doc/my_id?pipeline=attachment +PUT my-index-000001/_doc/my_id?pipeline=attachment { "attachments" : [ { @@ -322,7 +338,7 @@ PUT my-index-00001/_doc/my_id?pipeline=attachment } ] } -GET my-index-00001/_doc/my_id +GET my-index-000001/_doc/my_id -------------------------------------------------- Returns this: @@ -330,7 +346,7 @@ Returns this: [source,console-result] -------------------------------------------------- { - "_index" : "my-index-00001", + "_index" : "my-index-000001", "_type" : "_doc", "_id" : "my_id", "_version" : 1,