[DOCS] Clarify ingest attachment example (#65143) (#65160)

This commit is contained in:
James Rodewig 2020-11-17 15:15:25 -05:00 committed by GitHub
parent 9471b6345e
commit a8d3053128
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 38 additions and 22 deletions

View File

@ -30,10 +30,27 @@ include::install_remove.asciidoc[]
| `ignore_missing` | no | `false` | If `true` and `field` does not exist, the processor quietly exits without modifying the document | `ignore_missing` | no | `false` | If `true` and `field` does not exist, the processor quietly exits without modifying the document
|====== |======
For example, this: [discrete]
[[ingest-attachment-json-ex]]
==== Example
If attaching files to JSON documents, you must first encode the file as a base64
string. On Unix-like systems, you can do this using a `base64` command:
[source,shell]
----
base64 -in myfile.rtf
----
The command returns the base64-encoded string for the file. The following base64
string is for an `.rtf` file containing the text `Lorem ipsum dolor sit amet`:
`e1xydGYxXGFuc2kNCkxvcmVtIGlwc3VtIGRvbG9yIHNpdCBhbWV0DQpccGFyIH0=`.
Use an attachment processor to decode the string and extract the file's
properties:
[source,console] [source,console]
-------------------------------------------------- ----
PUT _ingest/pipeline/attachment PUT _ingest/pipeline/attachment
{ {
"description" : "Extract attachment information", "description" : "Extract attachment information",
@ -45,20 +62,20 @@ PUT _ingest/pipeline/attachment
} }
] ]
} }
PUT my-index-00001/_doc/my_id?pipeline=attachment PUT my-index-000001/_doc/my_id?pipeline=attachment
{ {
"data": "e1xydGYxXGFuc2kNCkxvcmVtIGlwc3VtIGRvbG9yIHNpdCBhbWV0DQpccGFyIH0=" "data": "e1xydGYxXGFuc2kNCkxvcmVtIGlwc3VtIGRvbG9yIHNpdCBhbWV0DQpccGFyIH0="
} }
GET my-index-00001/_doc/my_id GET my-index-000001/_doc/my_id
-------------------------------------------------- ----
Returns this: The document's `attachment` object contains extracted properties for the file:
[source,console-result] [source,console-result]
-------------------------------------------------- ----
{ {
"found": true, "found": true,
"_index": "my-index-00001", "_index": "my-index-000001",
"_type": "_doc", "_type": "_doc",
"_id": "my_id", "_id": "my_id",
"_version": 1, "_version": 1,
@ -74,14 +91,13 @@ Returns this:
} }
} }
} }
-------------------------------------------------- ----
// TESTRESPONSE[s/"_seq_no": \d+/"_seq_no" : $body._seq_no/ s/"_primary_term" : 1/"_primary_term" : $body._primary_term/] // TESTRESPONSE[s/"_seq_no": \d+/"_seq_no" : $body._seq_no/ s/"_primary_term" : 1/"_primary_term" : $body._primary_term/]
To extract only certain `attachment` fields, specify the `properties` array:
To specify only some fields to be extracted:
[source,console] [source,console]
-------------------------------------------------- ----
PUT _ingest/pipeline/attachment PUT _ingest/pipeline/attachment
{ {
"description" : "Extract attachment information", "description" : "Extract attachment information",
@ -94,7 +110,7 @@ PUT _ingest/pipeline/attachment
} }
] ]
} }
-------------------------------------------------- ----
NOTE: Extracting contents from binary data is a resource intensive operation and NOTE: Extracting contents from binary data is a resource intensive operation and
consumes a lot of resources. It is highly recommended to run pipelines consumes a lot of resources. It is highly recommended to run pipelines
@ -175,11 +191,11 @@ PUT _ingest/pipeline/attachment
} }
] ]
} }
PUT my-index-00001/_doc/my_id?pipeline=attachment PUT my-index-000001/_doc/my_id?pipeline=attachment
{ {
"data": "e1xydGYxXGFuc2kNCkxvcmVtIGlwc3VtIGRvbG9yIHNpdCBhbWV0DQpccGFyIH0=" "data": "e1xydGYxXGFuc2kNCkxvcmVtIGlwc3VtIGRvbG9yIHNpdCBhbWV0DQpccGFyIH0="
} }
GET my-index-00001/_doc/my_id GET my-index-000001/_doc/my_id
-------------------------------------------------- --------------------------------------------------
Returns this: Returns this:
@ -188,7 +204,7 @@ Returns this:
-------------------------------------------------- --------------------------------------------------
{ {
"found": true, "found": true,
"_index": "my-index-00001", "_index": "my-index-000001",
"_type": "_doc", "_type": "_doc",
"_id": "my_id", "_id": "my_id",
"_version": 1, "_version": 1,
@ -223,12 +239,12 @@ PUT _ingest/pipeline/attachment
} }
] ]
} }
PUT my-index-00001/_doc/my_id_2?pipeline=attachment PUT my-index-000001/_doc/my_id_2?pipeline=attachment
{ {
"data": "e1xydGYxXGFuc2kNCkxvcmVtIGlwc3VtIGRvbG9yIHNpdCBhbWV0DQpccGFyIH0=", "data": "e1xydGYxXGFuc2kNCkxvcmVtIGlwc3VtIGRvbG9yIHNpdCBhbWV0DQpccGFyIH0=",
"max_size": 5 "max_size": 5
} }
GET my-index-00001/_doc/my_id_2 GET my-index-000001/_doc/my_id_2
-------------------------------------------------- --------------------------------------------------
Returns this: Returns this:
@ -237,7 +253,7 @@ Returns this:
-------------------------------------------------- --------------------------------------------------
{ {
"found": true, "found": true,
"_index": "my-index-00001", "_index": "my-index-000001",
"_type": "_doc", "_type": "_doc",
"_id": "my_id_2", "_id": "my_id_2",
"_version": 1, "_version": 1,
@ -309,7 +325,7 @@ PUT _ingest/pipeline/attachment
} }
] ]
} }
PUT my-index-00001/_doc/my_id?pipeline=attachment PUT my-index-000001/_doc/my_id?pipeline=attachment
{ {
"attachments" : [ "attachments" : [
{ {
@ -322,7 +338,7 @@ PUT my-index-00001/_doc/my_id?pipeline=attachment
} }
] ]
} }
GET my-index-00001/_doc/my_id GET my-index-000001/_doc/my_id
-------------------------------------------------- --------------------------------------------------
Returns this: Returns this:
@ -330,7 +346,7 @@ Returns this:
[source,console-result] [source,console-result]
-------------------------------------------------- --------------------------------------------------
{ {
"_index" : "my-index-00001", "_index" : "my-index-000001",
"_type" : "_doc", "_type" : "_doc",
"_id" : "my_id", "_id" : "my_id",
"_version" : 1, "_version" : 1,