diff --git a/docs/reference/analysis/tokenfilters/shingle-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/shingle-tokenfilter.asciidoc
index 5f6bec96dba..d806bb2e9ac 100644
--- a/docs/reference/analysis/tokenfilters/shingle-tokenfilter.asciidoc
+++ b/docs/reference/analysis/tokenfilters/shingle-tokenfilter.asciidoc
@@ -4,46 +4,507 @@
Shingle
++++
-NOTE: Shingles are generally used to help speed up phrase queries. Rather
-than building filter chains by hand, you may find it easier to use the
-<> option on a text field.
+Add shingles, or word https://en.wikipedia.org/wiki/N-gram[n-grams], to a token
+stream by concatenating adjacent tokens. By default, the `shingle` token filter
+outputs two-word shingles and unigrams.
-A token filter of type `shingle` that constructs shingles (token
-n-grams) from a token stream. In other words, it creates combinations of
-tokens as a single token. For example, the sentence "please divide this
-sentence into shingles" might be tokenized into shingles "please
-divide", "divide this", "this sentence", "sentence into", and "into
-shingles".
+For example, many tokenizers convert `the lazy dog` to `[ the, lazy, dog ]`. You
+can use the `shingle` filter to add two-word shingles to this stream:
+`[ the, the lazy, lazy, lazy dog, dog ]`.
-This filter handles position increments > 1 by inserting filler tokens
-(tokens with termtext "_"). It does not handle a position increment of
-0.
+TIP: Shingles are often used to help speed up phrase queries, such as
+<>. Rather than creating shingles
+using the `shingles` filter, we recommend you use the
+<> mapping parameter on the appropriate
+<> field instead.
-The following are settings that can be set for a `shingle` token filter
-type:
+This filter uses Lucene's
+{lucene-analysis-docs}/shingle/ShingleFilter.html[ShingleFilter].
-[cols="<,<",options="header",]
-|=======================================================================
-|Setting |Description
-|`max_shingle_size` |The maximum shingle size. Defaults to `2`.
+[[analysis-shingle-tokenfilter-analyze-ex]]
+==== Example
-|`min_shingle_size` |The minimum shingle size. Defaults to `2`.
+The following <> request uses the `shingle`
+filter to add two-word shingles to the token stream for `quick brown fox jumps`:
-|`output_unigrams` |If `true` the output will contain the input tokens
-(unigrams) as well as the shingles. Defaults to `true`.
+[source,console]
+----
+GET /_analyze
+{
+ "tokenizer": "whitespace",
+ "filter": [ "shingle" ],
+ "text": "quick brown fox jumps"
+}
+----
-|`output_unigrams_if_no_shingles` |If `output_unigrams` is `false` the
-output will contain the input tokens (unigrams) if no shingles are
-available. Note if `output_unigrams` is set to `true` this setting has
-no effect. Defaults to `false`.
+The filter produces the following tokens:
-|`token_separator` |The string to use when joining adjacent tokens to
-form a shingle. Defaults to `" "`.
-|`filler_token` | The string to use as a replacement for each position
-at which there is no actual token in the stream. For instance this string is
-used if the position increment is greater than one when a `stop` filter is used
-together with the `shingle` filter. Defaults to `"_"`
-|=======================================================================
+[source,text]
+----
+[ quick, quick brown, brown, brown fox, fox, fox jumps, jumps ]
+----
-The index level setting `index.max_shingle_diff` controls the maximum allowed
-difference between `max_shingle_size` and `min_shingle_size`.
+////
+[source,console-result]
+----
+{
+ "tokens": [
+ {
+ "token": "quick",
+ "start_offset": 0,
+ "end_offset": 5,
+ "type": "word",
+ "position": 0
+ },
+ {
+ "token": "quick brown",
+ "start_offset": 0,
+ "end_offset": 11,
+ "type": "shingle",
+ "position": 0,
+ "positionLength": 2
+ },
+ {
+ "token": "brown",
+ "start_offset": 6,
+ "end_offset": 11,
+ "type": "word",
+ "position": 1
+ },
+ {
+ "token": "brown fox",
+ "start_offset": 6,
+ "end_offset": 15,
+ "type": "shingle",
+ "position": 1,
+ "positionLength": 2
+ },
+ {
+ "token": "fox",
+ "start_offset": 12,
+ "end_offset": 15,
+ "type": "word",
+ "position": 2
+ },
+ {
+ "token": "fox jumps",
+ "start_offset": 12,
+ "end_offset": 21,
+ "type": "shingle",
+ "position": 2,
+ "positionLength": 2
+ },
+ {
+ "token": "jumps",
+ "start_offset": 16,
+ "end_offset": 21,
+ "type": "word",
+ "position": 3
+ }
+ ]
+}
+----
+////
+
+To produce shingles of 2-3 words, add the following arguments to the analyze API
+request:
+
+* `min_shingle_size`: `2`
+* `max_shingle_size`: `3`
+
+[source,console]
+----
+GET /_analyze
+{
+ "tokenizer": "whitespace",
+ "filter": [
+ {
+ "type": "shingle",
+ "min_shingle_size": 2,
+ "max_shingle_size": 3
+ }
+ ],
+ "text": "quick brown fox jumps"
+}
+----
+
+The filter produces the following tokens:
+
+[source,text]
+----
+[ quick, quick brown, quick brown fox, brown, brown fox, brown fox jumps, fox, fox jumps, jumps ]
+----
+
+////
+[source, console-result]
+----
+{
+ "tokens": [
+ {
+ "token": "quick",
+ "start_offset": 0,
+ "end_offset": 5,
+ "type": "word",
+ "position": 0
+ },
+ {
+ "token": "quick brown",
+ "start_offset": 0,
+ "end_offset": 11,
+ "type": "shingle",
+ "position": 0,
+ "positionLength": 2
+ },
+ {
+ "token": "quick brown fox",
+ "start_offset": 0,
+ "end_offset": 15,
+ "type": "shingle",
+ "position": 0,
+ "positionLength": 3
+ },
+ {
+ "token": "brown",
+ "start_offset": 6,
+ "end_offset": 11,
+ "type": "word",
+ "position": 1
+ },
+ {
+ "token": "brown fox",
+ "start_offset": 6,
+ "end_offset": 15,
+ "type": "shingle",
+ "position": 1,
+ "positionLength": 2
+ },
+ {
+ "token": "brown fox jumps",
+ "start_offset": 6,
+ "end_offset": 21,
+ "type": "shingle",
+ "position": 1,
+ "positionLength": 3
+ },
+ {
+ "token": "fox",
+ "start_offset": 12,
+ "end_offset": 15,
+ "type": "word",
+ "position": 2
+ },
+ {
+ "token": "fox jumps",
+ "start_offset": 12,
+ "end_offset": 21,
+ "type": "shingle",
+ "position": 2,
+ "positionLength": 2
+ },
+ {
+ "token": "jumps",
+ "start_offset": 16,
+ "end_offset": 21,
+ "type": "word",
+ "position": 3
+ }
+ ]
+}
+----
+////
+
+To only include shingles in the output, add an `output_unigrams` argument of
+`false` to the request.
+
+[source,console]
+----
+GET /_analyze
+{
+ "tokenizer": "whitespace",
+ "filter": [
+ {
+ "type": "shingle",
+ "min_shingle_size": 2,
+ "max_shingle_size": 3,
+ "output_unigrams": false
+ }
+ ],
+ "text": "quick brown fox jumps"
+}
+----
+
+The filter produces the following tokens:
+
+[source,text]
+----
+[ quick brown, quick brown fox, brown fox, brown fox jumps, fox jumps ]
+----
+
+////
+[source, console-result]
+----
+{
+ "tokens": [
+ {
+ "token": "quick brown",
+ "start_offset": 0,
+ "end_offset": 11,
+ "type": "shingle",
+ "position": 0
+ },
+ {
+ "token": "quick brown fox",
+ "start_offset": 0,
+ "end_offset": 15,
+ "type": "shingle",
+ "position": 0,
+ "positionLength": 2
+ },
+ {
+ "token": "brown fox",
+ "start_offset": 6,
+ "end_offset": 15,
+ "type": "shingle",
+ "position": 1
+ },
+ {
+ "token": "brown fox jumps",
+ "start_offset": 6,
+ "end_offset": 21,
+ "type": "shingle",
+ "position": 1,
+ "positionLength": 2
+ },
+ {
+ "token": "fox jumps",
+ "start_offset": 12,
+ "end_offset": 21,
+ "type": "shingle",
+ "position": 2
+ }
+ ]
+}
+----
+////
+
+[[analysis-shingle-tokenfilter-analyzer-ex]]
+==== Add to an analyzer
+
+The following <> request uses the
+`shingle` filter to configure a new <>.
+
+[source,console]
+----
+PUT /my_index
+{
+ "settings": {
+ "analysis": {
+ "analyzer": {
+ "standard_shingle": {
+ "tokenizer": "standard",
+ "filter": [ "shingle" ]
+ }
+ }
+ }
+ }
+}
+----
+
+[[analysis-shingle-tokenfilter-configure-parms]]
+==== Configurable parameters
+
+`max_shingle_size`::
+(Optional, integer)
+Maximum number of tokens to concatenate when creating shingles. Defaults to `2`.
++
+NOTE: This value cannot be lower than the `min_shingle_size` argument, which
+defaults to `2`. The difference between this value and the `min_shingle_size`
+argument cannot exceed the <>
+index-level setting, which defaults to `3`.
+
+`min_shingle_size`::
+(Optional, integer)
+Minimum number of tokens to concatenate when creating shingles. Defaults to `2`.
++
+NOTE: This value cannot exceed the `max_shingle_size` argument, which defaults
+to `2`. The difference between the `max_shingle_size` argument and this value
+cannot exceed the <>
+index-level setting, which defaults to `3`.
+
+`output_unigrams`::
+(Optional, boolean)
+If `true`, the output includes the original input tokens. If `false`, the output
+only includes shingles; the original input tokens are removed. Defaults to
+`true`.
+
+`output_unigrams_if_no_shingles`::
+If `true`, the output includes the original input tokens only if no shingles are
+produced; if shingles are produced, the output only includes shingles. Defaults
+to `false`.
++
+IMPORTANT: If both this and the `output_unigrams` parameter are `true`, only the
+`output_unigrams` argument is used.
+
+`token_separator`::
+(Optional, string)
+Separator used to concatenate adjacent tokens to form a shingle. Defaults to a
+space (`" "`).
+
+`filler_token`::
++
+--
+(Optional, string)
+String used in shingles as a replacement for empty positions that do not contain
+a token. This filler token is only used in shingles, not original unigrams.
+Defaults to an underscore (`_`).
+
+Some token filters, such as the `stop` filter, create empty positions when
+removing stop words with a position increment greater than one.
+
+.*Example*
+[%collapsible]
+====
+In the following <> request, the `stop` filter
+removes the stop word `a` from `fox jumps a lazy dog`, creating an empty
+position. The subsequent `shingle` filter replaces this empty position with a
+plus sign (`+`) in shingles.
+
+[source,console]
+----
+GET /_analyze
+{
+ "tokenizer": "whitespace",
+ "filter": [
+ {
+ "type": "stop",
+ "stopwords": [ "a" ]
+ },
+ {
+ "type": "shingle",
+ "filler_token": "+"
+ }
+ ],
+ "text": "fox jumps a lazy dog"
+}
+----
+
+The filter produces the following tokens:
+
+[source,text]
+----
+[ fox, fox jumps, jumps, jumps +, + lazy, lazy, lazy dog, dog ]
+----
+
+////
+[source, console-result]
+----
+{
+ "tokens" : [
+ {
+ "token" : "fox",
+ "start_offset" : 0,
+ "end_offset" : 3,
+ "type" : "word",
+ "position" : 0
+ },
+ {
+ "token" : "fox jumps",
+ "start_offset" : 0,
+ "end_offset" : 9,
+ "type" : "shingle",
+ "position" : 0,
+ "positionLength" : 2
+ },
+ {
+ "token" : "jumps",
+ "start_offset" : 4,
+ "end_offset" : 9,
+ "type" : "word",
+ "position" : 1
+ },
+ {
+ "token" : "jumps +",
+ "start_offset" : 4,
+ "end_offset" : 12,
+ "type" : "shingle",
+ "position" : 1,
+ "positionLength" : 2
+ },
+ {
+ "token" : "+ lazy",
+ "start_offset" : 12,
+ "end_offset" : 16,
+ "type" : "shingle",
+ "position" : 2,
+ "positionLength" : 2
+ },
+ {
+ "token" : "lazy",
+ "start_offset" : 12,
+ "end_offset" : 16,
+ "type" : "word",
+ "position" : 3
+ },
+ {
+ "token" : "lazy dog",
+ "start_offset" : 12,
+ "end_offset" : 20,
+ "type" : "shingle",
+ "position" : 3,
+ "positionLength" : 2
+ },
+ {
+ "token" : "dog",
+ "start_offset" : 17,
+ "end_offset" : 20,
+ "type" : "word",
+ "position" : 4
+ }
+ ]
+}
+----
+////
+====
+--
+
+[[analysis-shingle-tokenfilter-customize]]
+==== Customize
+
+To customize the `shingle` filter, duplicate it to create the basis for a new
+custom token filter. You can modify the filter using its configurable
+parameters.
+
+For example, the following <> request
+uses a custom `shingle` filter, `my_shingle_filter`, to configure a new
+<>.
+
+The `my_shingle_filter` filter uses a `min_shingle_size` of `2` and a
+`max_shingle_size` of `5`, meaning it produces shingles of 2-5 words.
+The filter also includes a `output_unigrams` argument of `false`, meaning that
+only shingles are included in the output.
+
+[source,console]
+----
+PUT /my_index
+{
+ "settings": {
+ "analysis": {
+ "analyzer": {
+ "en": {
+ "tokenizer": "standard",
+ "filter": [ "my_shingle_filter" ]
+ }
+ },
+ "filter": {
+ "my_shingle_filter": {
+ "type": "shingle",
+ "min_shingle_size": 2,
+ "max_shingle_size": 5,
+ "output_unigrams": false
+ }
+ }
+ }
+ }
+}
+----
diff --git a/docs/reference/index-modules.asciidoc b/docs/reference/index-modules.asciidoc
index 4fb31e31e85..6a9c1a988ea 100644
--- a/docs/reference/index-modules.asciidoc
+++ b/docs/reference/index-modules.asciidoc
@@ -165,10 +165,12 @@ specific index module:
The maximum allowed difference between min_gram and max_gram for NGramTokenizer and NGramTokenFilter.
Defaults to `1`.
+[[index-max-shingle-diff]]
`index.max_shingle_diff`::
- The maximum allowed difference between max_shingle_size and min_shingle_size for ShingleTokenFilter.
- Defaults to `3`.
+ The maximum allowed difference between max_shingle_size and min_shingle_size
+ for the <>. Defaults to
+ `3`.
`index.blocks.read_only`::