diff --git a/docs/reference/analysis/charfilters/mapping-charfilter.asciidoc b/docs/reference/analysis/charfilters/mapping-charfilter.asciidoc index 5bce2beed8a..d8cfd99d32e 100644 --- a/docs/reference/analysis/charfilters/mapping-charfilter.asciidoc +++ b/docs/reference/analysis/charfilters/mapping-charfilter.asciidoc @@ -1,5 +1,8 @@ [[analysis-mapping-charfilter]] -=== Mapping Char Filter +=== Mapping character filter +++++ +Mapping +++++ The `mapping` character filter accepts a map of keys and values. Whenever it encounters a string of characters that is the same as a key, it replaces them @@ -8,75 +11,53 @@ with the value associated with that key. Matching is greedy; the longest pattern matching at a given point wins. Replacements are allowed to be the empty string. -[float] -=== Configuration +The `mapping` filter uses Lucene's +{lucene-analysis-docs}/charfilter/MappingCharFilter.html[MappingCharFilter]. -The `mapping` character filter accepts the following parameters: +[[analysis-mapping-charfilter-analyze-ex]] +==== Example -[horizontal] -`mappings`:: - - A array of mappings, with each element having the form `key => value`. - -`mappings_path`:: - - A path, either absolute or relative to the `config` directory, to a UTF-8 - encoded text mappings file containing a `key => value` mapping per line. - -Either the `mappings` or `mappings_path` parameter must be provided. - -[float] -=== Example configuration - -In this example, we configure the `mapping` character filter to replace Arabic -numerals with their Latin equivalents: +The following <> request uses the `mapping` filter +to convert Hindu-Arabic numerals (٠‎١٢٣٤٥٦٧٨‎٩‎) into their Arabic-Latin +equivalents (0123456789), changing the text `My license plate is ٢٥٠١٥` to +`My license plate is 25015`. [source,console] ----------------------------- -PUT my_index +---- +GET /_analyze { - "settings": { - "analysis": { - "analyzer": { - "my_analyzer": { - "tokenizer": "keyword", - "char_filter": [ - "my_char_filter" - ] - } - }, - "char_filter": { - "my_char_filter": { - "type": "mapping", - "mappings": [ - "٠ => 0", - "١ => 1", - "٢ => 2", - "٣ => 3", - "٤ => 4", - "٥ => 5", - "٦ => 6", - "٧ => 7", - "٨ => 8", - "٩ => 9" - ] - } - } + "tokenizer": "keyword", + "char_filter": [ + { + "type": "mapping", + "mappings": [ + "٠ => 0", + "١ => 1", + "٢ => 2", + "٣ => 3", + "٤ => 4", + "٥ => 5", + "٦ => 6", + "٧ => 7", + "٨ => 8", + "٩ => 9" + ] } - } -} - -POST my_index/_analyze -{ - "analyzer": "my_analyzer", + ], "text": "My license plate is ٢٥٠١٥" } ----------------------------- +---- -///////////////////// +The filter produces the following text: +[source,text] +---- +[ My license plate is 25015 ] +---- + +//// [source,console-result] ----------------------------- +---- { "tokens": [ { @@ -88,24 +69,45 @@ POST my_index/_analyze } ] } ----------------------------- +---- +//// -///////////////////// +[[analysis-mapping-charfilter-configure-parms]] +==== Configurable parameters +`mappings`:: +(Required*, array of strings) +Array of mappings, with each element having the form `key => value`. ++ +Either this or the `mappings_path` parameter must be specified. -The above example produces the following term: +`mappings_path`:: +(Required*, string) +Path to a file containing `key => value` mappings. ++ +This path must be absolute or relative to the `config` location, and the file +must be UTF-8 encoded. Each mapping in the file must be separated by a line +break. ++ +Either this or the `mappings` parameter must be specified. -[source,text] ---------------------------- -[ My license plate is 25015 ] ---------------------------- +[[analysis-mapping-charfilter-customize]] +==== Customize and add to an analyzer -Keys and values can be strings with multiple characters. The following -example replaces the `:)` and `:(` emoticons with a text equivalent: +To customize the `mappings` filter, duplicate it to create the basis for a new +custom character filter. You can modify the filter using its configurable +parameters. + +The following <> request +configures a new <> using a custom +`mappings` filter, `my_mappings_char_filter`. + +The `my_mappings_char_filter` filter replaces the `:)` and `:(` emoticons +with a text equivalent. [source,console] ----------------------------- -PUT my_index +---- +PUT /my_index { "settings": { "analysis": { @@ -113,12 +115,12 @@ PUT my_index "my_analyzer": { "tokenizer": "standard", "char_filter": [ - "my_char_filter" + "my_mappings_char_filter" ] } }, "char_filter": { - "my_char_filter": { + "my_mappings_char_filter": { "type": "mapping", "mappings": [ ":) => _happy_", @@ -129,67 +131,43 @@ PUT my_index } } } +---- -POST my_index/_analyze +The following <> request uses the custom +`my_mappings_char_filter` to replace `:(` with `_sad_` in +the text `I'm delighted about it :(`. + +[source,console] +---- +GET /my_index/_analyze { - "analyzer": "my_analyzer", + "tokenizer": "keyword", + "char_filter": [ "my_mappings_char_filter" ], "text": "I'm delighted about it :(" } ----------------------------- +---- +// TEST[continued] - -///////////////////// - -[source,console-result] ----------------------------- -{ - "tokens": [ - { - "token": "I'm", - "start_offset": 0, - "end_offset": 3, - "type": "", - "position": 0 - }, - { - "token": "delighted", - "start_offset": 4, - "end_offset": 13, - "type": "", - "position": 1 - }, - { - "token": "about", - "start_offset": 14, - "end_offset": 19, - "type": "", - "position": 2 - }, - { - "token": "it", - "start_offset": 20, - "end_offset": 22, - "type": "", - "position": 3 - }, - { - "token": "_sad_", - "start_offset": 23, - "end_offset": 25, - "type": "", - "position": 4 - } - ] -} ----------------------------- - - -///////////////////// - - -The above example produces the following terms: +The filter produces the following text: [source,text] --------------------------- -[ I'm, delighted, about, it, _sad_ ] +[ I'm delighted about it _sad_ ] --------------------------- + +//// +[source,console-result] +---- +{ + "tokens": [ + { + "token": "I'm delighted about it _sad_", + "start_offset": 0, + "end_offset": 25, + "type": "word", + "position": 0 + } + ] +} +---- +////