From 04626de6ae91cac5a7696a89dc52811de552e332 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Mon, 12 Aug 2019 20:36:10 +0200 Subject: [PATCH] Add initial version of enrich processor docs. (#45084) Relates to #32789 --- docs/reference/ingest/ingest-node.asciidoc | 367 ++++++++++++++++++ .../ingest/processors/enrich.asciidoc | 100 +++++ 2 files changed, 467 insertions(+) create mode 100644 docs/reference/ingest/processors/enrich.asciidoc diff --git a/docs/reference/ingest/ingest-node.asciidoc b/docs/reference/ingest/ingest-node.asciidoc index 4a9ed038964..223df5f106b 100644 --- a/docs/reference/ingest/ingest-node.asciidoc +++ b/docs/reference/ingest/ingest-node.asciidoc @@ -784,6 +784,372 @@ metadata field to provide the error message. -------------------------------------------------- // NOTCONSOLE +[role="xpack"] +[testenv="basic"] +[[ingest-enriching-data]] +== Enriching data with ingest node + +The <> allows documents to be enriched with data from +an enrich index that is managed by an enrich policy prior to indexing. + +The data that is used by the enrich index is managed by the user in regular indices. +An enrich policy is configuration that indicates how an enrich index is created from +the data in the user's maintained indices. When an enrich policy is executed +a new enrich index is created for that policy, which the enrich process can then use. + +An enrich policy also controls what kind of enrichment the `enrich` processor is able to do. + +[[enrich-policy-definition]] +=== Enrich Policy Definition + +The <> requires more than just the configuration in a pipeline. +The main piece to configure is the enrich policy: + +[[enrich-policy-options]] +.Enrich policy options +[options="header"] +|====== +| Name | Required | Default | Description +| `type` | yes | - | The policy type. +| `indices` | yes | - | The indices to fetch the data from. +| `query` | no | `match_all` query | The query to be used to select which documents are included. +| `enrich_key` | yes | - | The field that the enrich processor will query against. +| `enrich_values` | yes | - | The fields to include in the enrich index. +|====== + +[[enrich-policy-types]] +==== Policy types + +An enrich processor is associated with a policy via the `policy_name` option. +The policy type of the policy determines what kind of enrichment an `enrich` processor is able to do. + +The following policy types are currently supported: + +* `exact_match` - Can lookup exactly one document and use its content to enrich the document being ingested. + +[[enrich-processor-getting-started]] +=== Getting started + +Create a regular index that contains data you like to enrich your incoming documents with: + +[source,js] +-------------------------------------------------- +PUT /users/_doc/1?refresh +{ + "email": "mardy.brown@email.me", + "first_name": "Mardy", + "last_name": "Brown", + "address": "6649 N Blue Gum St", + "city": "New Orleans", + "county": "Orleans", + "state": "LA", + "zip": 70116, + "phone1":"504-621-8927", + "phone2": "504-845-1427", + "web": "mardy-brown.me" +} +-------------------------------------------------- +// CONSOLE + +Create an enrich policy: + +[source,js] +-------------------------------------------------- +PUT /_enrich/policy/users-policy +{ + "type": "exact_match", + "indices": "users", + "enrich_key": "email", + "enrich_values": ["first_name", "last_name", "address", "city", "zip", "state"] +} +-------------------------------------------------- +// CONSOLE +// TEST[continued] + +Which returns: + +[source,js] +-------------------------------------------------- +{ + "acknowledged": true +} +-------------------------------------------------- +// TESTRESPONSE + +Execute that enrich policy: + +[source,js] +-------------------------------------------------- +POST /_enrich/policy/users-policy/_execute +-------------------------------------------------- +// CONSOLE +// TEST[continued] + +Which returns: + +[source,js] +-------------------------------------------------- +{ + "acknowledged": true +} +-------------------------------------------------- +// TESTRESPONSE + +Create the pipeline and enrich a document: + +[source,js] +-------------------------------------------------- +PUT _ingest/pipeline/user_lookup +{ + "description" : "Enriching user details to messages", + "processors" : [ + { + "enrich" : { + "policy_name": "users-policy", + "enrich_key" : "email", + "targets": ["address", "city", "zip", "state"] + } + } + ] +} + +PUT my_index/_doc/my_id?pipeline=user_lookup +{ + "email": "mardy.brown@email.me" +} + +GET my_index/_doc/my_id +-------------------------------------------------- +// CONSOLE +// TEST[continued] + +Which returns: + +[source,js] +-------------------------------------------------- +{ + "found": true, + "_index": "my_index", + "_type": "_doc", + "_id": "my_id", + "_version": 1, + "_seq_no": 55, + "_primary_term": 1, + "_source": { + "zip": 70116, + "address": "6649 N Blue Gum St", + "city": "New Orleans", + "state": "LA", + "email": "mardy.brown@email.me" + } +} +-------------------------------------------------- +// TESTRESPONSE[s/"_seq_no": \d+/"_seq_no" : $body._seq_no/ s/"_primary_term":1/"_primary_term" : $body._primary_term/] + +////////////////////////// + +[source,js] +-------------------------------------------------- +DELETE /_enrich/policy/users-policy +-------------------------------------------------- +// CONSOLE +// TEST[continued] + +////////////////////////// + +[[enrich-policy-apis]] +=== Enrich Policy APIs + +Also there are several APIs in order to manage and execute enrich policies: + +* <>. +* <>. +* <>. +* <>. +* <>. + +If security is enabled then the user managing enrich policies will need to have +the `enrich_user` builtin role. Also the user will need to have read privileges +for the indices the enrich policy is referring to. + +[[put-policy-api]] +==== Put Policy API + +The put policy api allows a policy to be stored by an user specified id in the url and +the enrich policy definition as body. + +Request: + +[source,js] +-------------------------------------------------- +PUT /_enrich/policy/my-policy +{ + "type": "exact_match", + "indices": "users", + "enrich_key": "email", + "enrich_values": ["first_name", "last_name", "address", "city", "zip", "state"] +} +-------------------------------------------------- +// CONSOLE + +Response: + +[source,js] +-------------------------------------------------- +{ + "acknowledged": true +} +-------------------------------------------------- +// TESTRESPONSE + +[[get-policy-api]] +==== Get Policy API + +The get policy api allows a policy to be retrieved by id. + +Request" + +[source,js] +-------------------------------------------------- +GET /_enrich/policy/my-policy +-------------------------------------------------- +// CONSOLE +// TEST[continued] + +Response: + +[source,js] +-------------------------------------------------- +{ + "type": "exact_match", + "indices": ["users"], + "enrich_key": "email", + "enrich_values": ["first_name", "last_name", "address", "city", "zip", "state"] +} +-------------------------------------------------- +// TESTRESPONSE + +[[list-policies-api]] +==== List Policies API + +The list policies api allows all policies to be returned. + +Request: + +[source,js] +-------------------------------------------------- +GET /_enrich/policy +-------------------------------------------------- +// CONSOLE +// TEST[continued] + +Response: + +[source,js] +-------------------------------------------------- +{ + "policies": [ + { + "name" : "my-policy", + "type" : "exact_match", + "indices" : ["users"], + "enrich_key" : "email", + "enrich_values" : [ + "first_name", + "last_name", + "address", + "city", + "zip", + "state" + ] + } + ] +} +-------------------------------------------------- +// TESTRESPONSE + +[[execute-policy-api]] +==== Execute Policy API + +The execute policy api executes a policy based on the provided id. +It may take some time before this API returns a response. +Executing a policy involves creating a new enrich index, indexing the documents from +the indices specified in policy into the enrich index and some other operations. + +Note that this api needs to be re-executed in order to incorporate new changes +in the index the policy is pointing to after the policy has been executed. + +This API creates an index with the `.enrich-*` prefix in the name. This index purpose +is the be used by the enrich processor only and should not be used by anything else. +Internally old `.enrich-*` are removed by an internal cleanup mechanism. + +////////////////////////// + +[source,js] +-------------------------------------------------- +PUT /users/_doc/1?refresh +{ + "email": "mardy.brown@email.me", + "first_name": "Mardy", + "last_name": "Brown", + "address": "6649 N Blue Gum St", + "city": "New Orleans", + "county": "Orleans", + "state": "LA", + "zip": 70116, + "phone1":"504-621-8927", + "phone2": "504-845-1427", + "web": "mardy-brown.me" +} +-------------------------------------------------- +// CONSOLE +// TEST[continued] + +////////////////////////// + +Request: + +[source,js] +-------------------------------------------------- +POST /_enrich/policy/my-policy/_execute +-------------------------------------------------- +// CONSOLE +// TEST[continued] + +Response: + +[source,js] +-------------------------------------------------- +{ + "acknowledged": true +} +-------------------------------------------------- +// TESTRESPONSE + +[[delete-policy-api]] +===== Delete Policy API + +The delete policy api allows a policy to be removed by id. + +Request: + +[source,js] +-------------------------------------------------- +DELETE /_enrich/policy/my-policy +-------------------------------------------------- +// CONSOLE +// TEST[continued] + +Response: + +[source,js] +-------------------------------------------------- +{ + "acknowledged": true +} +-------------------------------------------------- +// TESTRESPONSE + [[ingest-processors]] == Processors @@ -859,6 +1225,7 @@ include::processors/date-index-name.asciidoc[] include::processors/dissect.asciidoc[] include::processors/dot-expand.asciidoc[] include::processors/drop.asciidoc[] +include::processors/enrich.asciidoc[] include::processors/fail.asciidoc[] include::processors/foreach.asciidoc[] include::processors/geoip.asciidoc[] diff --git a/docs/reference/ingest/processors/enrich.asciidoc b/docs/reference/ingest/processors/enrich.asciidoc new file mode 100644 index 00000000000..6c8d923551f --- /dev/null +++ b/docs/reference/ingest/processors/enrich.asciidoc @@ -0,0 +1,100 @@ +[role="xpack"] +[testenv="basic"] +[[enrich-processor]] +=== Enrich Processor + +The `enrich` processor can enrich documents with data from another index. +See <> section for more information how to set this up and +check out the <> to get familiar with enrich policies and related APIs. +a +[[enrich-options]] +.Enrich Options +[options="header"] +|====== +| Name | Required | Default | Description +| `policy_name` | yes | - | The name of the enrich policy to use. +| `enrich_key` | no | Policy enrich_key | The field to get the value from for the enrich lookup. +| `ignore_missing` | no | `false` | If `true` and `enrich_key` does not exist, the processor quietly exits without modifying the document +| `override` | no | true | If processor will update fields with pre-existing non-null-valued field. When set to `false`, such fields will not be touched. +| `targets` | no 1) | - | Describes what fields should be added to the document being indexed from the lookup document +| `set_from` | no 1) | - | Same as `targets`, but allows fields from the lookup document to added under a different name to the document being indexed +include::common-options.asciidoc[] +|====== + +1) Either `targets` or `set_from` must be specified. + +[[enrich-processor-set-from]] +==== Enrich `set_from` option + +This option should be used in the case that the field in the looked up document should be placed under +a different field in the document being ingested. + +The `set_from` accepts an array with two fields: +* `source` - The name of the field in the lookup document +* `target` - The name of the field in the document being ingested that should hold the source field's value. + +For example: + +////////////////////////// + +[source,js] +-------------------------------------------------- +PUT /_enrich/policy/users-policy +{ + "type": "exact_match", + "indices": "users", + "enrich_key": "email", + "enrich_values": ["first_name", "last_name", "address", "city", "zip", "state"] +} +-------------------------------------------------- +// CONSOLE +// TEST + +////////////////////////// + +[source,js] +-------------------------------------------------- +PUT _ingest/pipeline/user_lookup +{ + "description" : "Enriching user details to messages", + "processors" : [ + { + "enrich" : { + "policy_name": "users-policy", + "enrich_key" : "email", + "set_from": [ + { + "source": "address", + "target": "address-line-1" + }, + { + "source": "city", + "target": "residence" + }, + { + "source": "zip", + "target": "zipcode" + }, + { + "source": "state", + "target": "us_state" + } + ] + } + } + ] +} +-------------------------------------------------- +// CONSOLE +// TEST[continued] + +////////////////////////// + +[source,js] +-------------------------------------------------- +DELETE /_enrich/policy/users-policy +-------------------------------------------------- +// CONSOLE +// TEST[continued] + +//////////////////////////