Document 5.0 mapping changes.

2016-03-18 17:01:27 +01:00 · 2016-03-18 17:01:27 +01:00 · b42f66c8ac
parent 3764b3ff80
commit b42f66c8ac
47 changed files with 430 additions and 527 deletions
--- a/docs/reference/aggregations/bucket/nested-aggregation.asciidoc
+++ b/docs/reference/aggregations/bucket/nested-aggregation.asciidoc
@ -16,7 +16,7 @@ price for the product. The mapping could look like:
            "resellers" : { <1>
                "type" : "nested",
                "properties" : {
-                    "name" : { "type" : "string" },
+                    "name" : { "type" : "text" },
                    "price" : { "type" : "double" }
                }
            }
--- a/docs/reference/aggregations/bucket/reverse-nested-aggregation.asciidoc
+++ b/docs/reference/aggregations/bucket/reverse-nested-aggregation.asciidoc
@ -22,12 +22,12 @@ the issue documents as nested documents. The mapping could look like:

    "issue" : {
        "properties" : {
-            "tags" : { "type" : "string" }
+            "tags" : { "type" : "text" }
            "comments" : { <1>
                "type" : "nested"
                "properties" : {
-                    "username" : { "type" : "string", "index" : "not_analyzed" },
-                    "comment" : { "type" : "string" }
+                    "username" : { "type" : "keyword" },
+                    "comment" : { "type" : "text" }
                }
            }
        }
--- a/docs/reference/analysis/analyzers/keyword-analyzer.asciidoc
+++ b/docs/reference/analysis/analyzers/keyword-analyzer.asciidoc
@ -4,4 +4,4 @@
 An analyzer of type `keyword` that "tokenizes" an entire stream as a
 single token. This is useful for data like zip codes, ids and so on.
 Note, when using mapping definitions, it might make more sense to simply
-mark the field as `not_analyzed`.
+map the field as a <<keyword,`keyword`>>.
--- a/docs/reference/docs/termvectors.asciidoc
+++ b/docs/reference/docs/termvectors.asciidoc
@ -136,13 +136,13 @@ curl -s -XPUT 'http://localhost:9200/twitter/' -d '{
    "tweet": {
      "properties": {
        "text": {
-          "type": "string",
+          "type": "text",
          "term_vector": "with_positions_offsets_payloads",
          "store" : true,
          "analyzer" : "fulltext_analyzer"
         },
         "fullname": {
-          "type": "string",
+          "type": "text",
          "term_vector": "with_positions_offsets_payloads",
          "analyzer" : "fulltext_analyzer"
        }
--- a/docs/reference/docs/update-by-query.asciidoc
+++ b/docs/reference/docs/update-by-query.asciidoc
@ -281,7 +281,7 @@ PUT test
    "test": {
      "dynamic": false,   <1>
      "properties": {
-        "text": {"type": "string"}
+        "text": {"type": "text"}
      }
    }
  }
@ -300,8 +300,8 @@ POST test/test?refresh
 PUT test/_mapping/test   <2>
 {
  "properties": {
-    "text": {"type": "string"},
-    "flag": {"type": "string", "analyzer": "keyword"}
+    "text": {"type": "text"},
+    "flag": {"type": "text", "analyzer": "keyword"}
  }
 }
 --------------------------------------------------
--- a/docs/reference/index-modules/similarity.asciidoc
+++ b/docs/reference/index-modules/similarity.asciidoc
@ -39,7 +39,7 @@ Here we configure the DFRSimilarity so it can be referenced as
 {
  "book" : {
    "properties" : {
-      "title" : { "type" : "string", "similarity" : "my_similarity" }
+      "title" : { "type" : "text", "similarity" : "my_similarity" }
    }
 }
 --------------------------------------------------
--- a/docs/reference/indices/aliases.asciidoc
+++ b/docs/reference/indices/aliases.asciidoc
@ -116,8 +116,7 @@ curl -XPUT 'http://localhost:9200/test1' -d '{
    "type1": {
      "properties": {
        "user" : {
-          "type": "string",
-          "index": "not_analyzed"
+          "type": "keyword"
        }
      }
    }
--- a/docs/reference/indices/create-index.asciidoc
+++ b/docs/reference/indices/create-index.asciidoc
@ -78,7 +78,7 @@ curl -XPOST localhost:9200/test -d '{
    "mappings" : {
        "type1" : {
            "properties" : {
-                "field1" : { "type" : "string", "index" : "not_analyzed" }
+                "field1" : { "type" : "text" }
            }
        }
    }
--- a/docs/reference/indices/get-field-mapping.asciidoc
+++ b/docs/reference/indices/get-field-mapping.asciidoc
@ -22,7 +22,7 @@ For which the response is (assuming `text` is a default string field):
         "text": {
            "full_name": "text",
            "mapping": {
-               "text": { "type": "string" }
+               "text": { "type": "text" }
            }
         }
      }
@ -73,13 +73,13 @@ For example, consider the following mapping:
 {
     "article": {
         "properties": {
-             "id": { "type": "string" },
-             "title":  { "type": "string"},
-             "abstract": { "type": "string"},
+             "id": { "type": "text" },
+             "title":  { "type": "text"},
+             "abstract": { "type": "text"},
             "author": {
                 "properties": {
-                     "id": { "type": "string" },
-                     "name": { "type": "string" }
+                     "id": { "type": "text" },
+                     "name": { "type": "text" }
                 }
             }
         }
@ -105,19 +105,19 @@ returns:
         "abstract": {
            "full_name": "abstract",
            "mapping": {
-               "abstract": { "type": "string" }
+               "abstract": { "type": "text" }
            }
         },
         "author.id": {
            "full_name": "author.id",
            "mapping": {
-               "id": { "type": "string" }
+               "id": { "type": "text" }
            }
         },
         "name": {
            "full_name": "author.name",
            "mapping": {
-               "name": { "type": "string" }
+               "name": { "type": "text" }
            }
         }
      }
--- a/docs/reference/indices/put-mapping.asciidoc
+++ b/docs/reference/indices/put-mapping.asciidoc
@ -12,7 +12,7 @@ PUT twitter <1>
    "tweet": {
      "properties": {
        "message": {
-          "type": "string"
+          "type": "text"
        }
      }
    }
@ -23,7 +23,7 @@ PUT twitter/_mapping/user <2>
 {
  "properties": {
    "name": {
-      "type": "string"
+      "type": "text"
    }
  }
 }
@ -32,7 +32,7 @@ PUT twitter/_mapping/tweet <3>
 {
  "properties": {
    "user_name": {
-      "type": "string"
+      "type": "text"
    }
  }
 }
@ -86,13 +86,12 @@ PUT my_index <1>
        "name": {
          "properties": {
            "first": {
-              "type": "string"
+              "type": "text"
            }
          }
        },
        "user_id": {
-          "type": "string",
-          "index": "not_analyzed"
+          "type": "keyword"
        }
      }
    }
@ -105,13 +104,12 @@ PUT my_index/_mapping/user
    "name": {
      "properties": {
        "last": { <2>
-          "type": "string"
+          "type": "text"
        }
      }
    },
    "user_id": {
-      "type": "string",
-      "index": "not_analyzed",
+      "type": "keyword",
      "ignore_above": 100 <3>
    }
  }
@ -149,7 +147,7 @@ PUT my_index
    "type_one": {
      "properties": {
        "text": { <1>
-          "type": "string",
+          "type": "text",
          "analyzer": "standard"
        }
      }
@ -157,7 +155,7 @@ PUT my_index
    "type_two": {
      "properties": {
        "text": { <1>
-          "type": "string",
+          "type": "text",
          "analyzer": "standard"
        }
      }
@ -169,7 +167,7 @@ PUT my_index/_mapping/type_one <2>
 {
  "properties": {
    "text": {
-      "type": "string",
+      "type": "text",
      "analyzer": "standard",
      "search_analyzer": "whitespace"
    }
@ -180,7 +178,7 @@ PUT my_index/_mapping/type_one?update_all_types <3>
 {
  "properties": {
    "text": {
-      "type": "string",
+      "type": "text",
      "analyzer": "standard",
      "search_analyzer": "whitespace"
    }
--- a/docs/reference/mapping.asciidoc
+++ b/docs/reference/mapping.asciidoc
@ -46,7 +46,7 @@ Fields with the same name in different mapping types in the same index

 Each field has a data `type` which can be:

-* a simple type like <<string,`string`>>, <<date,`date`>>, <<number,`long`>>,
+* a simple type like <<text,`text`>>, <<keyword,`keyword`>>, <<date,`date`>>, <<number,`long`>>,
  <<number,`double`>>, <<boolean,`boolean`>> or <<ip,`ip`>>.
 * a type which supports the hierarchical nature of JSON such as
  <<object,`object`>> or <<nested,`nested`>>.
@ -55,7 +55,7 @@ Each field has a data `type` which can be:

 It is often useful to index the same field in different ways for different
 purposes. For instance, a `string` field could be <<mapping-index,indexed>> as
-an `analyzed` field for full-text search, and as a `not_analyzed` field for
+a `text` field for full-text search, and as a `keyword` field for
 sorting or aggregations.  Alternatively, you could index a string field with
 the <<analysis-standard-analyzer,`standard` analyzer>>, the
 <<english-analyzer,`english`>> analyzer, and the
@ -134,18 +134,17 @@ PUT my_index <1>
    "user": { <2>
      "_all":       { "enabled": false  }, <3>
      "properties": { <4>
-        "title":    { "type": "string"  }, <5>
-        "name":     { "type": "string"  }, <5>
+        "title":    { "type": "text"  }, <5>
+        "name":     { "type": "text"  }, <5>
        "age":      { "type": "integer" }  <5>
      }
    },
    "blogpost": { <2>
      "properties": { <4>
-        "title":    { "type": "string"  }, <5>
-        "body":     { "type": "string"  }, <5>
+        "title":    { "type": "text"  }, <5>
+        "body":     { "type": "text"  }, <5>
        "user_id":  {
-          "type":   "string", <5>
-          "index":  "not_analyzed"
+          "type":   "keyword" <5>
        },
        "created":  {
          "type":   "date", <5>
--- a/docs/reference/mapping/dynamic/default-mapping.asciidoc
+++ b/docs/reference/mapping/dynamic/default-mapping.asciidoc
@ -56,11 +56,10 @@ PUT _template/logging
          "strings": { <4>
            "match_mapping_type": "string",
            "mapping": {
-              "type": "string",
+              "type": "text",
              "fields": {
                "raw": {
-                  "type":  "string",
-                  "index": "not_analyzed",
+                  "type":  "keyword",
                  "ignore_above": 256
                }
              }
@ -79,4 +78,4 @@ PUT logs-2015.10.01/event/1
 <1> The `logging` template will match any indices beginning with `logs-`.
 <2> Matching indices will be created with a single primary shard.
 <3> The `_all` field will be disabled by default for new type mappings.
-<4> String fields will be created with an `analyzed` main field, and a `not_analyzed` `.raw` field.
+<4> String fields will be created with a `text` main field, and a `keyword` `.raw` field.
--- a/docs/reference/mapping/dynamic/field-mapping.asciidoc
+++ b/docs/reference/mapping/dynamic/field-mapping.asciidoc
@ -22,7 +22,7 @@ string::                            Either a <<date,`date`>> field
                                        (if the value passes <<date-detection,date detection>>),
                                    a <<number,`double`>> or <<number,`long`>> field
                                        (if the value passes <<numeric-detection,numeric detection>>)
-                                    or an <<mapping-index,`analyzed`>> <<string,`string`>> field.
+                                    or an <<text,`text`>> field.

 These are the only <<mapping-types,field datatypes>> that are dynamically
 detected.  All other datatypes must be mapped explicitly.
@ -81,7 +81,7 @@ PUT my_index/my_type/1 <1>
 --------------------------------------------------
 // AUTOSENSE

-<1> The `create_date` field has been added as a <<string,`string`>> field.
+<1> The `create_date` field has been added as a <<text,`text`>> field.

 ===== Customising detected date formats

--- a/docs/reference/mapping/dynamic/templates.asciidoc
+++ b/docs/reference/mapping/dynamic/templates.asciidoc
@ -52,7 +52,7 @@ can be automatically detected: `boolean`, `date`, `double`, `long`, `object`,
 `string`.  It also accepts `*` to match all datatypes.

 For example, if we wanted to map all integer fields as `integer` instead of
-`long`, and all `string` fields as both `analyzed` and `not_analyzed`, we
+`long`, and all `string` fields as both `text` and `keyword`, we
 could use the following template:

 [source,js]
@ -74,11 +74,10 @@ PUT my_index
          "strings": {
            "match_mapping_type": "string",
            "mapping": {
-              "type": "string",
+              "type": "text",
              "fields": {
                "raw": {
-                  "type":  "string",
-                  "index": "not_analyzed",
+                  "type":  "keyword",
                  "ignore_above": 256
                }
              }
@ -99,7 +98,7 @@ PUT my_index/my_type/1
 --------------------------------------------------
 // AUTOSENSE
 <1> The `my_integer` field is mapped as an `integer`.
-<2> The `my_string` field is mapped as an analyzed `string`, with a `not_analyzed` <<multi-fields,multi field>>.
+<2> The `my_string` field is mapped as a `text`, with a `keyword` <<multi-fields,multi field>>.


 [[match-unmatch]]
@ -180,7 +179,7 @@ PUT my_index
            "path_match":   "name.*",
            "path_unmatch": "*.middle",
            "mapping": {
-              "type":       "string",
+              "type":       "text",
              "copy_to":    "full_name"
            }
          }
@ -221,7 +220,7 @@ PUT my_index
            "match_mapping_type": "string",
            "match": "*",
            "mapping": {
-              "type": "string",
+              "type": "text",
              "analyzer": "{name}"
            }
          }
--- a/docs/reference/mapping/fields/all-field.asciidoc
+++ b/docs/reference/mapping/fields/all-field.asciidoc
@ -45,7 +45,7 @@ from each field as a string. It does not combine the _terms_ from each field.

 =============================================================================

-The `_all` field is just a <<string,`string`>> field, and accepts the same
+The `_all` field is just a <<text,`text`>> field, and accepts the same
 parameters that  other string fields accept, including `analyzer`,
 `term_vectors`, `index_options`, and `store`.

@ -136,7 +136,7 @@ PUT my_index
      },
      "properties": {
        "content": {
-          "type": "string"
+          "type": "text"
        }
      }
    }
@ -172,11 +172,11 @@ PUT myindex
    "mytype": {
      "properties": {
        "title": { <1>
-          "type": "string",
+          "type": "text",
          "boost": 2
        },
        "content": { <1>
-          "type": "string"
+          "type": "text"
        }
      }
    }
@ -210,15 +210,15 @@ PUT myindex
    "mytype": {
      "properties": {
        "first_name": {
-          "type":    "string",
+          "type":    "text",
          "copy_to": "full_name" <1>
        },
        "last_name": {
-          "type":    "string",
+          "type":    "text",
          "copy_to": "full_name" <1>
        },
        "full_name": {
-          "type":    "string"
+          "type":    "text"
        }
      }
    }
--- a/docs/reference/mapping/fields/parent-field.asciidoc
+++ b/docs/reference/mapping/fields/parent-field.asciidoc
@ -127,7 +127,7 @@ global ordinals for the `_parent` field.
 Global ordinals, by default, are built lazily: the first parent-child query or
 aggregation after a refresh will trigger building of global ordinals. This can
 introduce a significant latency spike for your users. You can use
-<<fielddata-loading,eager_global_ordinals>> to shift the cost of building global
+<<global-ordinals,eager_global_ordinals>> to shift the cost of building global
 ordinals from query time to refresh time, by mapping the `_parent` field as follows:

 [source,js]
@ -139,9 +139,7 @@ PUT my_index
    "my_child": {
      "_parent": {
        "type": "my_parent",
-        "fielddata": {
-          "loading": "eager_global_ordinals"
-        }
+        "eager_global_ordinals": true
      }
    }
  }
--- a/docs/reference/mapping/params/analyzer.asciidoc
+++ b/docs/reference/mapping/params/analyzer.asciidoc
@ -47,10 +47,10 @@ PUT my_index
    "my_type": {
      "properties": {
        "text": { <1>
-          "type": "string",
+          "type": "text",
          "fields": {
            "english": { <2>
-              "type":     "string",
+              "type":     "text",
              "analyzer": "english"
            }
          }
@ -124,7 +124,7 @@ PUT /my_index
      "my_type":{
         "properties":{
            "title": {
-               "type":"string",
+               "type":"text",
               "analyzer":"my_analyzer", <3>
               "search_analyzer":"my_stop_analyzer", <4>
               "search_quote_analyzer":"my_analyzer" <5>
--- a/docs/reference/mapping/params/boost.asciidoc
+++ b/docs/reference/mapping/params/boost.asciidoc
@ -12,11 +12,11 @@ PUT my_index
    "my_type": {
      "properties": {
        "title": {
-          "type": "string",
+          "type": "text",
          "boost": 2 <1>
        },
        "content": {
-          "type": "string"
+          "type": "text"
        }
      }
    }
--- a/docs/reference/mapping/params/copy-to.asciidoc
+++ b/docs/reference/mapping/params/copy-to.asciidoc
@ -15,15 +15,15 @@ PUT /my_index
    "my_type": {
      "properties": {
        "first_name": {
-          "type": "string",
+          "type": "text",
          "copy_to": "full_name" <1>
        },
        "last_name": {
-          "type": "string",
+          "type": "text",
          "copy_to": "full_name" <1>
        },
        "full_name": {
-          "type": "string"
+          "type": "text"
        }
      }
    }
--- a/docs/reference/mapping/params/doc-values.asciidoc
+++ b/docs/reference/mapping/params/doc-values.asciidoc
@ -29,12 +29,10 @@ PUT my_index
    "my_type": {
      "properties": {
        "status_code": { <1>
-          "type":       "string",
-          "index":      "not_analyzed"
+          "type":       "keyword"
        },
        "session_id": { <2>
-          "type":       "string",
-          "index":      "not_analyzed",
+          "type":       "keyword",
          "doc_values": false
        }
      }
--- a/docs/reference/mapping/params/dynamic.asciidoc
+++ b/docs/reference/mapping/params/dynamic.asciidoc
@ -67,7 +67,7 @@ PUT my_index
        "user": { <2>
          "properties": {
            "name": {
-              "type": "string"
+              "type": "text"
            },
            "social_networks": { <3>
              "dynamic": true,
--- a/docs/reference/mapping/params/enabled.asciidoc
+++ b/docs/reference/mapping/params/enabled.asciidoc
@ -21,8 +21,7 @@ PUT my_index
    "session": {
      "properties": {
        "user_id": {
-          "type":  "string",
-          "index": "not_analyzed"
+          "type":  "keyword"
        },
        "last_updated": {
          "type": "date"
--- a/docs/reference/mapping/params/fielddata.asciidoc
+++ b/docs/reference/mapping/params/fielddata.asciidoc
@ -12,28 +12,28 @@ documents, we need to be able to look up the document and find the terms that
 it has in a field.

 Most fields can use index-time, on-disk <<doc-values,`doc_values`>> to support
-this type of data access pattern, but `analyzed` string fields do not support
-`doc_values`.
+this type of data access pattern, but `text` fields do not support `doc_values`.

-Instead, `analyzed` strings use a query-time data structure called
+Instead, `text` strings use a query-time data structure called
 `fielddata`.  This data structure is built on demand the first time that a
 field is used for aggregations, sorting, or is accessed in a script.  It is built
 by reading the entire inverted index for each segment from disk, inverting the
 term ↔︎ document relationship, and storing the result in memory, in the
 JVM heap.

-Loading fielddata is an expensive process so, once it has been loaded, it
-remains in memory for the lifetime of the segment.
+Loading fielddata is an expensive process so it is disabled by default. Also,
+when enabled, once it has been loaded, it remains in memory for the lifetime of
+the segment.

 [WARNING]
 .Fielddata can fill up your heap space
 ==============================================================================
 Fielddata can consume a lot of heap space, especially when loading high
-cardinality `analyzed` string fields.  Most of the time, it doesn't make sense
-to sort or aggregate on `analyzed` string fields (with the notable exception
+cardinality `text` fields.  Most of the time, it doesn't make sense
+to sort or aggregate on `text` fields (with the notable exception
 of the
 <<search-aggregations-bucket-significantterms-aggregation,`significant_terms`>>
-aggregation).  Always think about whether a `not_analyzed` field (which can
+aggregation).  Always think about whether a <<keyword,`keyword`>> field (which can
 use `doc_values`) would be  a better fit for your use case.
 ==============================================================================

@ -42,71 +42,6 @@ same name in the same index.  Its value can be updated on existing fields
 using the <<indices-put-mapping,PUT mapping API>>.


-[[fielddata-format]]
-==== `fielddata.format`
-
-For `analyzed` string fields, the fielddata `format` controls whether
-fielddata should be enabled or not.  It accepts: `disabled` and `paged_bytes`
-(enabled, which is the default).  To disable fielddata loading, you can use
-the following mapping:
-
-[source,js]
--------------------------------------------------
-PUT my_index
-{
-  "mappings": {
-    "my_type": {
-      "properties": {
-        "text": {
-          "type": "string",
-          "fielddata": {
-            "format": "disabled" <1>
-          }
-        }
-      }
-    }
-  }
-}
--------------------------------------------------
-// AUTOSENSE
-<1> The `text` field cannot be used for sorting, aggregations, or in scripts.
-
-.Fielddata and other datatypes
-[NOTE]
-==================================================
-
-Historically, other field datatypes also used fielddata, but this has been replaced
-by index-time, disk-based <<doc-values,`doc_values`>>.
-
-==================================================
-
-
-[[fielddata-loading]]
-==== `fielddata.loading`
-
-This per-field setting controls when fielddata is loaded into memory. It
-accepts three options:
-
-[horizontal]
-`lazy`::
-
-    Fielddata is only loaded into memory when it is needed. (default)
-
-`eager`::
-
-    Fielddata is loaded into memory before a new search segment becomes
-    visible to search.  This can reduce the latency that a user may experience
-    if their search request has to trigger lazy loading from a big segment.
-
-`eager_global_ordinals`::
-
-    Loading fielddata into memory is only part of the work that is required.
-    After loading the fielddata for each segment, Elasticsearch builds the
-    <<global-ordinals>> data structure to make a list of all unique terms
-    across all the segments in a shard.  By default, global ordinals are built
-    lazily.  If the field has a very high cardinality, global ordinals may
-    take some time to build, in which case you can use eager loading instead.
-
 [[global-ordinals]]
 .Global ordinals
 *****************************************
@ -141,15 +76,10 @@ can move the loading time from the first search request, to the refresh itself.
 *****************************************

 [[field-data-filtering]]
-==== `fielddata.filter`
+==== `fielddata_frequency_filter`

 Fielddata filtering can be used to reduce the number of terms loaded into
-memory, and thus reduce memory usage. Terms can be filtered by _frequency_ or
-by _regular expression_, or a combination of the two:
-
-Filtering by frequency::
-+
--
+memory, and thus reduce memory usage. Terms can be filtered by _frequency_:

 The frequency filter allows you to only load terms whose term frequency falls
 between a `min` and `max` value, which can be expressed an absolute
@ -169,7 +99,7 @@ PUT my_index
    "my_type": {
      "properties": {
        "tag": {
-          "type": "string",
+          "type": "text",
          "fielddata": {
            "filter": {
              "frequency": {
@ -186,44 +116,3 @@ PUT my_index
 }
 --------------------------------------------------
 // AUTOSENSE
--
-
-Filtering by regex::
-+
--
-Terms can also be filtered by regular expression - only values which
-match the regular expression are loaded. Note: the regular expression is
-applied to each term in the field, not to the whole field value. For
-instance, to only load hashtags from a tweet, we can use a regular
-expression which matches terms beginning with `#`:
-
-[source,js]
--------------------------------------------------
-PUT my_index
-{
-  "mappings": {
-    "my_type": {
-      "properties": {
-        "tweet": {
-          "type": "string",
-          "analyzer": "whitespace",
-          "fielddata": {
-            "filter": {
-              "regex": {
-                "pattern": "^#.*"
-              }
-            }
-          }
-        }
-      }
-    }
-  }
-}
--------------------------------------------------
-// AUTOSENSE
--
-
-These filters can be updated on an existing field mapping and will take
-effect the next time the fielddata for a segment is loaded. Use the
-<<indices-clearcache,Clear Cache>> API
-to reload the fielddata using the new filters.
--- a/docs/reference/mapping/params/ignore-above.asciidoc
+++ b/docs/reference/mapping/params/ignore-above.asciidoc
@ -1,12 +1,7 @@
 [[ignore-above]]
 === `ignore_above`

-Strings longer than the `ignore_above` setting will not be processed by the
-<<analyzer,analyzer>> and will not be indexed. This is mainly useful for
-<<mapping-index,`not_analyzed`>> string fields, which are typically used for
-filtering, aggregations, and sorting.  These are structured fields and it
-doesn't usually make sense to allow very long terms to be indexed in these
-fields.
+Strings longer than the `ignore_above` setting will not be indexed or stored.

 [source,js]
 --------------------------------------------------
@ -16,8 +11,7 @@ PUT my_index
    "my_type": {
      "properties": {
        "message": {
-          "type": "string",
-          "index": "not_analyzed",
+          "type": "keyword",
          "ignore_above": 20 <1>
        }
      }
--- a/docs/reference/mapping/params/include-in-all.asciidoc
+++ b/docs/reference/mapping/params/include-in-all.asciidoc
@ -14,10 +14,10 @@ PUT my_index
    "my_type": {
      "properties": {
        "title": { <1>
-          "type": "string"
+          "type": "text"
        }
        "content": { <1>
-          "type": "string"
+          "type": "text"
        },
        "date": { <2>
          "type": "date",
@ -50,18 +50,18 @@ PUT my_index
    "my_type": {
      "include_in_all": false, <1>
      "properties": {
-        "title":          { "type": "string" },
+        "title":          { "type": "text" },
        "author": {
          "include_in_all": true, <2>
          "properties": {
-            "first_name": { "type": "string" },
-            "last_name":  { "type": "string" }
+            "first_name": { "type": "text" },
+            "last_name":  { "type": "text" }
          }
        },
        "editor": {
          "properties": {
-            "first_name": { "type": "string" }, <3>
-            "last_name":  { "type": "string", "include_in_all": true } <3>
+            "first_name": { "type": "text" }, <3>
+            "last_name":  { "type": "text", "include_in_all": true } <3>
          }
        }
      }
--- a/docs/reference/mapping/params/index-options.asciidoc
+++ b/docs/reference/mapping/params/index-options.asciidoc
@ -39,7 +39,7 @@ PUT my_index
    "my_type": {
      "properties": {
        "text": {
-          "type": "string",
+          "type": "text",
          "index_options": "offsets"
        }
      }
--- a/docs/reference/mapping/params/index.asciidoc
+++ b/docs/reference/mapping/params/index.asciidoc
@ -1,48 +1,6 @@
 [[mapping-index]]
 === `index`

-The `index` option controls how field values are indexed and, thus, how they
-are searchable.  It accepts three values:
+The `index` option controls whether field values are indexed. It accepts `true`
+or `false`. Fields that are not indexed are not queryable.

-[horizontal]
-`no`::
-
-    Do not add this field value to the index. With this setting, the field
-    will not be queryable.
-
-`not_analyzed`::
-
-    Add the field value to the index unchanged, as a single term.  This is the
-    default for all fields that support this option except for
-    <<string,`string`>> fields.  `not_analyzed` fields are usually used with
-    <<term-level-queries,term-level queries>> for structured search.
-
-`analyzed`::
-
-    This option applies only to `string` fields, for which it is the default.
-    The string field value is first <<analysis,analyzed>> to convert the
-    string into terms (e.g. a list of individual words), which are then
-    indexed.  At search time, the query string is passed through
-    (<<search-analyzer,usually>>) the same analyzer to generate terms
-    in the same format as those in the index.  It is this process that enables
-    <<full-text-queries,full text search>>.
-
-For example, you can create a `not_analyzed` string field with the following:
-
-[source,js]
--------------------------------------------------
-PUT /my_index
-{
-  "mappings": {
-    "my_type": {
-      "properties": {
-        "status_code": {
-          "type": "string",
-          "index": "not_analyzed"
-        }
-      }
-    }
-  }
-}
--------------------------------------------------
-// AUTOSENSE
--- a/docs/reference/mapping/params/multi-fields.asciidoc
+++ b/docs/reference/mapping/params/multi-fields.asciidoc
@ -3,8 +3,8 @@

 It is often useful to index the same field in different ways for different
 purposes.  This is the purpose of _multi-fields_. For instance, a `string`
-field could be <<mapping-index,indexed>> as an `analyzed` field for full-text
-search, and as a `not_analyzed` field for sorting or aggregations:
+field could be mapped as a `text` field for full-text
+search, and as a `keyword` field for sorting or aggregations:

 [source,js]
 --------------------------------------------------
@ -14,11 +14,10 @@ PUT /my_index
    "my_type": {
      "properties": {
        "city": {
-          "type": "string",
+          "type": "text",
          "fields": {
            "raw": { <1>
-              "type":  "string",
-              "index": "not_analyzed"
+              "type":  "keyword"
            }
          }
        }
@ -57,8 +56,8 @@ GET /my_index/_search
 }
 --------------------------------------------------
 // AUTOSENSE
-<1> The `city.raw` field is a `not_analyzed` version of the `city` field.
-<2> The analyzed `city` field can be used for full text search.
+<1> The `city.raw` field is a `keyword` version of the `city` field.
+<2> The `city` field can be used for full text search.
 <3> The `city.raw` field can be used for sorting and aggregations

 NOTE: Multi-fields do not change the original `_source` field.
@ -83,10 +82,10 @@ PUT my_index
    "my_type": {
      "properties": {
        "text": { <1>
-          "type": "string",
+          "type": "text",
          "fields": {
            "english": { <2>
-              "type":     "string",
+              "type":     "text",
              "analyzer": "english"
            }
          }
--- a/docs/reference/mapping/params/norms.asciidoc
+++ b/docs/reference/mapping/params/norms.asciidoc
@ -4,14 +4,14 @@
 Norms store various normalization factors that are later used at query time
 in order to compute the score of a document relatively to a query.

-Although useful for scoring, norms also require quite a lot of memory
+Although useful for scoring, norms also require quite a lot of disk
 (typically in the order of one byte per document per field in your index, even
 for documents that don't have this specific field). As a consequence, if you
 don't need scoring on a specific field, you should disable norms on that
 field. In  particular, this is the case for fields that are used solely for
 filtering or aggregations.

-TIP: The `norms.enabled` setting must have the same setting for fields of the
+TIP: The `norms` setting must have the same setting for fields of the
 same name in the same index.  Norms can be disabled on existing fields using
 the <<indices-put-mapping,PUT mapping API>>.

@ -24,10 +24,8 @@ PUT my_index/_mapping/my_type
 {
  "properties": {
    "title": {
-      "type": "string",
-      "norms": {
-        "enabled": false
-      }
+      "type": "text",
+      "norms": false
    }
  }
 }
@ -41,31 +39,3 @@ results since some documents won't have norms anymore while other documents
 might still have norms.


-==== Lazy loading of norms
-
-Norms can be loaded into memory eagerly (`eager`), whenever a new segment
-comes online, or they can loaded lazily (`lazy`, default), only when the field
-is queried.
-
-Eager loading can be configured as follows:
-
-[source,js]
------------
-PUT my_index/_mapping/my_type
-{
-  "properties": {
-    "title": {
-      "type": "string",
-      "norms": {
-        "loading": "eager"
-      }
-    }
-  }
-}
------------
-// AUTOSENSE
-
-TIP: The `norms.loading` setting must have the same setting for fields of the
-same name in the same index.  Its value can be updated on existing fields
-using the <<indices-put-mapping,PUT mapping API>>.
-
--- a/docs/reference/mapping/params/null-value.asciidoc
+++ b/docs/reference/mapping/params/null-value.asciidoc
@ -16,8 +16,7 @@ PUT my_index
    "my_type": {
      "properties": {
        "status_code": {
-          "type":       "string",
-          "index":      "not_analyzed",
+          "type":       "keyword",
          "null_value": "NULL" <1>
        }
      }
@ -50,6 +49,4 @@ GET my_index/_search
 <3> A query for `NULL` returns document 1, but not document 2.

 IMPORTANT: The `null_value` needs to be the same datatype as the field.  For
-instance, a `long` field cannot have a string `null_value`.  String fields
-which are `analyzed` will also pass the `null_value` through the configured
-analyzer.
+instance, a `long` field cannot have a string `null_value`.
--- a/docs/reference/mapping/params/position-increment-gap.asciidoc
+++ b/docs/reference/mapping/params/position-increment-gap.asciidoc
@ -57,7 +57,7 @@ PUT my_index
    "groups": {
      "properties": {
        "names": {
-          "type": "string",
+          "type": "text",
          "position_increment_gap": 0 <1>
        }
      }
--- a/docs/reference/mapping/params/properties.asciidoc
+++ b/docs/reference/mapping/params/properties.asciidoc
@ -23,14 +23,14 @@ PUT my_index
        "manager": { <2>
          "properties": {
            "age":  { "type": "integer" },
-            "name": { "type": "string"  }
+            "name": { "type": "text"  }
          }
        },
        "employees": { <3>
          "type": "nested",
          "properties": {
            "age":  { "type": "integer" },
-            "name": { "type": "string"  }
+            "name": { "type": "text"  }
          }
        }
      }
--- a/docs/reference/mapping/params/search-analyzer.asciidoc
+++ b/docs/reference/mapping/params/search-analyzer.asciidoc
@ -41,7 +41,7 @@ PUT /my_index
    "my_type": {
      "properties": {
        "text": {
-          "type": "string",
+          "type": "text",
          "analyzer": "autocomplete", <2>
          "search_analyzer": "standard" <2>
        }
--- a/docs/reference/mapping/params/similarity.asciidoc
+++ b/docs/reference/mapping/params/similarity.asciidoc
@ -5,8 +5,8 @@ Elasticsearch allows you to configure a scoring algorithm or _similarity_ per
 field. The `similarity` setting provides a simple way of choosing a similarity
 algorithm other than the default TF/IDF, such as `BM25`.

-Similarities are mostly useful for <<string,`string`>> fields, especially
-`analyzed` string fields, but can also apply to other field types.
+Similarities are mostly useful for <<text,`text`>> fields, but can also apply
+to other field types.

 Custom similarities can be configured by tuning the parameters of the built-in
 similarities. For more details about this expert options, see the
@ -37,10 +37,10 @@ PUT my_index
    "my_type": {
      "properties": {
        "default_field": { <1>
-          "type": "string"
+          "type": "text"
        },
        "bm25_field": {
-          "type": "string",
+          "type": "text",
          "similarity": "BM25" <2>
        }
      }
--- a/docs/reference/mapping/params/store.asciidoc
+++ b/docs/reference/mapping/params/store.asciidoc
@ -24,7 +24,7 @@ PUT /my_index
    "my_type": {
      "properties": {
        "title": {
-          "type": "string",
+          "type": "text",
          "store": true <1>
        },
        "date": {
@ -32,7 +32,7 @@ PUT /my_index
          "store": true <1>
        },
        "content": {
-          "type": "string"
+          "type": "text"
        }
      }
    }
--- a/docs/reference/mapping/params/term-vector.asciidoc
+++ b/docs/reference/mapping/params/term-vector.asciidoc
@ -35,7 +35,7 @@ PUT my_index
    "my_type": {
      "properties": {
        "text": {
-          "type":        "string",
+          "type":        "text",
          "term_vector": "with_positions_offsets"
        }
      }
--- a/docs/reference/mapping/types.asciidoc
+++ b/docs/reference/mapping/types.asciidoc
@ -7,7 +7,7 @@ document:
 [float]
 === Core datatypes

-<<string>>::    `string`
+string::        <<text,`text`>> and <<keyword,`keyword`>>
 <<number>>::    `long`, `integer`, `short`, `byte`, `double`, `float`
 <<date>>::      `date`
 <<boolean>>::   `boolean`
@ -45,9 +45,9 @@ Attachment datatype::
 === Multi-fields

 It is often useful to index the same field in different ways for different
-purposes. For instance, a `string` field could be <<mapping-index,indexed>> as
-an `analyzed` field for full-text search, and as a `not_analyzed` field for
-sorting or aggregations.  Alternatively, you could index a string field with
+purposes. For instance, a `string` field could be mapped as
+a `text` field for full-text search, and as a `keyword` field for
+sorting or aggregations.  Alternatively, you could index a text field with
 the <<analysis-standard-analyzer,`standard` analyzer>>, the
 <<english-analyzer,`english`>> analyzer, and the
 <<french-analyzer,`french` analyzer>>.
@ -69,6 +69,8 @@ include::types/geo-shape.asciidoc[]

 include::types/ip.asciidoc[]

+include::types/keyword.asciidoc[]
+
 include::types/nested.asciidoc[]

 include::types/numeric.asciidoc[]
@ -77,6 +79,8 @@ include::types/object.asciidoc[]

 include::types/string.asciidoc[]

+include::types/text.asciidoc[]
+
 include::types/token-count.asciidoc[]


--- a/docs/reference/mapping/types/binary.asciidoc
+++ b/docs/reference/mapping/types/binary.asciidoc
@ -13,7 +13,7 @@ PUT my_index
    "my_type": {
      "properties": {
        "name": {
-          "type": "string"
+          "type": "text"
        },
        "blob": {
          "type": "binary"
--- a/docs/reference/mapping/types/keyword.asciidoc
+++ b/docs/reference/mapping/types/keyword.asciidoc
@ -0,0 +1,111 @@
+[[keyword]]
+=== Keyword datatype
+
+A field to index structured content such as email addresses, hostnames, status
+codes, zip codes or tags.
+
+They are typically used for filtering (_Find me all blog posts where
++status++ is ++published++_), for sorting, and for aggregations. Keyword
+fields are ony searchable by their exact value.
+
+If you need to index full text content such as email bodies or product
+descriptions, it is likely that you should rather use a <<text,`text`>> field.
+
+Below is an example of a mapping for a keyword field:
+
+[source,js]
+--------------------------------
+PUT my_index
+{
+  "mappings": {
+    "my_type": {
+      "properties": {
+        "tags": {
+          "type":  "keyword"
+        }
+      }
+    }
+  }
+}
+--------------------------------
+// AUTOSENSE
+
+[[keyword-params]]
+==== Parameters for keyword fields
+
+The following parameters are accepted by `string` fields:
+
+[horizontal]
+
+<<mapping-boost,`boost`>>::
+
+    Mapping field-level query time boosting. Accepts a floating point number, defaults
+    to `1.0`.
+
+<<doc-values,`doc_values`>>::
+
+    Should the field be stored on disk in a column-stride fashion, so that it
+    can later be used for sorting, aggregations, or scripting? Accepts `true`
+    (default) or `false`.
+
+<<global-ordinals,`eager_global_ordinals`>>::
+
+    Should global ordinals be loaded eagerly on refresh? Accepts `true` or `false`
+    (default). Enabling this is a good idea on fields that are frequently used for
+    terms aggregations.
+
+<<multi-fields,`fields`>>::
+
+    Multi-fields allow the same string value to be indexed in multiple ways for
+    different purposes, such as one field for search and a multi-field for
+    sorting and aggregations.
+
+<<ignore-above,`ignore_above`>>::
+
+    Do not index or analyze any string longer than this value.  Defaults to
+    `2147483647` so that all values would be accepted.
+
+<<include-in-all,`include_in_all`>>::
+
+    Whether or not the field value should be included in the
+    <<mapping-all-field,`_all`>> field? Accepts `true` or `false`.  Defaults
+    to `false` if <<mapping-index,`index`>> is set to `no`, or if a parent
+    <<object,`object`>> field sets `include_in_all` to `false`.
+    Otherwise defaults to `true`.
+
+<<mapping-index,`index`>>::
+
+    Should the field be searchable? Accepts `true` (default) or `false`.
+
+<<index-options,`index_options`>>::
+
+    What information should be stored in the index, for scoring purposes.
+    Defaults to `docs` but can also be set to `freqs` to take term frequency into account
+    when computing scores.
+
+<<norms,`norms`>>::
+
+    Whether field-length should be taken into account when scoring queries.
+    Accepts `true` or `false` (default).
+
+<<null-value,`null_value`>>::
+
+    Accepts a string value which is substituted for any explicit `null`
+    values.  Defaults to `null`, which means the field is treated as missing.
+
+<<mapping-store,`store`>>::
+
+    Whether the field value should be stored and retrievable separately from
+    the <<mapping-source-field,`_source`>> field. Accepts `true` or `false`
+    (default).
+
+<<search-analyzer,`search_analyzer`>>::
+
+    The <<analyzer,`analyzer`>> that should be used at search time on
+    <<mapping-index,`analyzed`>> fields. Defaults to the `analyzer` setting.
+
+<<similarity,`similarity`>>::
+
+    Which scoring algorithm or _similarity_ should be used. Defaults
+    to `classic`, which uses TF/IDF.
+
--- a/docs/reference/mapping/types/object.asciidoc
+++ b/docs/reference/mapping/types/object.asciidoc
@ -46,16 +46,15 @@ PUT my_index
    "my_type": { <1>
      "properties": {
        "region": {
-          "type": "string",
-          "index": "not_analyzed"
+          "type": "keyword"
        },
        "manager": { <2>
          "properties": {
            "age":  { "type": "integer" },
            "name": { <3>
              "properties": {
-                "first": { "type": "string" },
-                "last":  { "type": "string" }
+                "first": { "type": "text" },
+                "last":  { "type": "text" }
              }
            }
          }
--- a/docs/reference/mapping/types/string.asciidoc
+++ b/docs/reference/mapping/types/string.asciidoc
@ -1,179 +1,4 @@
 [[string]]
 === String datatype

-Fields of type `string` accept text values.  Strings may be sub-divided into:
-
-Full text::
-+
--
-
-Full text values, like the body of an email, are typically used for text based
-relevance searches, such as: _Find the most relevant documents that match a
-query for "quick brown fox"_.
-
-These fields are `analyzed`, that is they are passed through an
-<<analysis,analyzer>> to convert the string into a list of individual terms
-before being indexed. The analysis process allows Elasticsearch to search for
-individual words _within_  each full text field.  Full text fields are not
-used for sorting and seldom used for aggregations (although the
-<<search-aggregations-bucket-significantterms-aggregation,significant terms aggregation>> is a notable exception).
-
--
-
-Keywords::
-
-Keywords are exact values like email addresses, hostnames, status codes, or
-tags.  They are typically used for filtering (_Find me all blog posts where
-++status++ is ++published++_), for sorting, and for aggregations. Keyword
-fields are `not_analyzed`.  Instead, the exact string value is added to the
-index as a single term.
-
-Below is an example of a mapping for a full text (`analyzed`) and a keyword
-(`not_analyzed`) string field:
-
-[source,js]
--------------------------------
-PUT my_index
-{
-  "mappings": {
-    "my_type": {
-      "properties": {
-        "full_name": { <1>
-          "type":  "string"
-        },
-        "status": {
-          "type":  "string", <2>
-          "index": "not_analyzed"
-        }
-      }
-    }
-  }
-}
--------------------------------
-// AUTOSENSE
-<1> The `full_name` field is an `analyzed` full text field -- `index:analyzed` is the default.
-<2> The `status` field is a `not_analyzed` keyword field.
-
-Sometimes it is useful to have both a full text (`analyzed`) and a keyword
-(`not_analyzed`) version of the same field: one for full text search and the
-other for aggregations and sorting. This can be achieved with
-<<multi-fields,multi-fields>>.
-
-
-[[string-params]]
-==== Parameters for string fields
-
-The following parameters are accepted by `string` fields:
-
-[horizontal]
-
-<<analyzer,`analyzer`>>::
-
-    The <<analysis,analyzer>> which should be used for
-    <<mapping-index,`analyzed`>> string fields, both at index-time and at
-    search-time (unless overridden by the  <<search-analyzer,`search_analyzer`>>).
-    Defaults to the default index analyzer, or the
-    <<analysis-standard-analyzer,`standard` analyzer>>.
-
-<<mapping-boost,`boost`>>::
-
-    Mapping field-level query time boosting. Accepts a floating point number, defaults
-    to `1.0`.
-
-<<doc-values,`doc_values`>>::
-
-    Should the field be stored on disk in a column-stride fashion, so that it
-    can later be used for sorting, aggregations, or scripting? Accepts `true`
-    or `false`. Defaults to `true` for `not_analyzed` fields. Analyzed fields
-    do not support doc values.
-
-<<fielddata,`fielddata`>>::
-
-    Can the field use in-memory fielddata for sorting, aggregations,
-    or scripting? Accepts `disabled` or `paged_bytes` (default).
-    Not analyzed fields will use <<doc-values,doc values>> in preference
-    to fielddata.
-
-<<multi-fields,`fields`>>::
-
-    Multi-fields allow the same string value to be indexed in multiple ways for
-    different purposes, such as one field for search and a multi-field for
-    sorting and aggregations, or the same string value analyzed by different
-    analyzers.
-
-<<ignore-above,`ignore_above`>>::
-
-    Do not index or analyze any string longer than this value.  Defaults to `0` (disabled).
-
-<<include-in-all,`include_in_all`>>::
-
-    Whether or not the field value should be included in the
-    <<mapping-all-field,`_all`>> field? Accepts `true` or `false`.  Defaults
-    to `false` if <<mapping-index,`index`>> is set to `no`, or if a parent
-    <<object,`object`>> field sets `include_in_all` to `false`.
-    Otherwise defaults to `true`.
-
-<<mapping-index,`index`>>::
-
-    Should the field be searchable? Accepts `analyzed` (default, treat as full-text field),
-    `not_analyzed` (treat as keyword field) and `no`.
-
-<<index-options,`index_options`>>::
-
-    What information should be stored in the index, for search and highlighting purposes.
-    Defaults to `positions` for <<mapping-index,`analyzed`>> fields, and to `docs` for
-    `not_analyzed` fields.
-
-
-<<norms,`norms`>>::
-+
--
-
-Whether field-length should be taken into account when scoring queries.
-Defaults depend on the <<mapping-index,`index`>> setting:
-
-* `analyzed` fields default to `{ "enabled": true, "loading": "lazy" }`.
-* `not_analyzed` fields default to `{ "enabled": false }`.
--
-
-<<null-value,`null_value`>>::
-
-    Accepts a string value which is substituted for any explicit `null`
-    values.  Defaults to `null`, which means the field is treated as missing.
-    If the field is `analyzed`, the `null_value` will also be analyzed.
-
-<<position-increment-gap,`position_increment_gap`>>::
-
-    The number of fake term positions which should be inserted between
-    each element of an array of strings. Defaults to 0.
-    The number of fake term position which should be inserted between each
-    element of an array of strings. Defaults to the position_increment_gap
-    configured on the analyzer which defaults to 100. 100 was chosen because it
-    prevents phrase queries with reasonably large slops (less than 100) from
-    matching terms across field values.
-
-<<mapping-store,`store`>>::
-
-    Whether the field value should be stored and retrievable separately from
-    the <<mapping-source-field,`_source`>> field. Accepts `true` or `false`
-    (default).
-
-<<search-analyzer,`search_analyzer`>>::
-
-    The <<analyzer,`analyzer`>> that should be used at search time on
-    <<mapping-index,`analyzed`>> fields. Defaults to the `analyzer` setting.
-	
-<<search-quote-analyzer,`search_quote_analyzer`>>::
-
-    The <<analyzer,`analyzer`>> that should be used at search time when a
-    phrase is encountered. Defaults to the `search_analyzer` setting.
-
-<<similarity,`similarity`>>::
-
-    Which scoring algorithm or _similarity_ should be used. Defaults
-    to `classic`, which uses TF/IDF.
-
-<<term-vector,`term_vector`>>::
-
-    Whether term vectors should be stored for an <<mapping-index,`analyzed`>>
-    field. Defaults to `no`.
+NOTE: The `string` field has been removed in favor of the `text` and `keyword` fields.
--- a/docs/reference/mapping/types/text.asciidoc
+++ b/docs/reference/mapping/types/text.asciidoc
@ -0,0 +1,139 @@
+[[text]]
+=== Text datatype
+
+A field to index full-text values, such as the body of on email or the
+description of a product. These fields are `analyzed`, that is they are passed through an
+<<analysis,analyzer>> to convert the string into a list of individual terms
+before being indexed. The analysis process allows Elasticsearch to search for
+individual words _within_  each full text field.  Text fields are not
+used for sorting and seldom used for aggregations (although the
+<<search-aggregations-bucket-significantterms-aggregation,significant terms aggregation>> 
+is a notable exception).
+
+If you need to index structured content such as email addresses, hostnames, status
+codes, or tags, it is likely that you should rather use a <<keyword,`keyword`>> field.
+
+Below is an example of a mapping for a text field:
+
+[source,js]
+--------------------------------
+PUT my_index
+{
+  "mappings": {
+    "my_type": {
+      "properties": {
+        "full_name": {
+          "type":  "text"
+        }
+      }
+    }
+  }
+}
+--------------------------------
+// AUTOSENSE
+
+Sometimes it is useful to have both a full text (`text`) and a keyword
+(`keyword`) version of the same field: one for full text search and the
+other for aggregations and sorting. This can be achieved with
+<<multi-fields,multi-fields>>.
+
+[[text-params]]
+==== Parameters for text fields
+
+The following parameters are accepted by `text` fields:
+
+[horizontal]
+
+<<analyzer,`analyzer`>>::
+
+    The <<analysis,analyzer>> which should be used for
+    <<mapping-index,`analyzed`>> string fields, both at index-time and at
+    search-time (unless overridden by the  <<search-analyzer,`search_analyzer`>>).
+    Defaults to the default index analyzer, or the
+    <<analysis-standard-analyzer,`standard` analyzer>>.
+
+<<mapping-boost,`boost`>>::
+
+    Mapping field-level query time boosting. Accepts a floating point number, defaults
+    to `1.0`.
+
+<<global-ordinals,`eager_global_ordinals`>>::
+
+    Should global ordinals be loaded eagerly on refresh? Accepts `true` or `false`
+    (default). Enabling this is a good idea on fields that are frequently used for
+    (significant) terms aggregations.
+
+<<fielddata,`fielddata`>>::
+
+    Can the field use in-memory fielddata for sorting, aggregations,
+    or scripting? Accepts `true` or `false` (default).
+
+<<field-data-filtering,`fielddata_frequency_filter`>>::
+
+    Expert settings which allow to decide which values to load in memory when `fielddata`
+    is enabled. By default all values are loaded.
+
+<<multi-fields,`fields`>>::
+
+    Multi-fields allow the same string value to be indexed in multiple ways for
+    different purposes, such as one field for search and a multi-field for
+    sorting and aggregations, or the same string value analyzed by different
+    analyzers.
+
+<<include-in-all,`include_in_all`>>::
+
+    Whether or not the field value should be included in the
+    <<mapping-all-field,`_all`>> field? Accepts `true` or `false`.  Defaults
+    to `false` if <<mapping-index,`index`>> is set to `no`, or if a parent
+    <<object,`object`>> field sets `include_in_all` to `false`.
+    Otherwise defaults to `true`.
+
+<<mapping-index,`index`>>::
+
+    Should the field be searchable? Accepts `true` (default) or `false`.
+
+<<index-options,`index_options`>>::
+
+    What information should be stored in the index, for search and highlighting purposes.
+    Defaults to `positions`.
+
+<<norms,`norms`>>::
+
+    Whether field-length should be taken into account when scoring queries.
+    Accepts `true` (default) or `false`.
+
+<<position-increment-gap,`position_increment_gap`>>::
+
+    The number of fake term positions which should be inserted between
+    each element of an array of strings. Defaults to 0.
+    The number of fake term position which should be inserted between each
+    element of an array of strings. Defaults to the position_increment_gap
+    configured on the analyzer which defaults to 100. 100 was chosen because it
+    prevents phrase queries with reasonably large slops (less than 100) from
+    matching terms across field values.
+
+<<mapping-store,`store`>>::
+
+    Whether the field value should be stored and retrievable separately from
+    the <<mapping-source-field,`_source`>> field. Accepts `true` or `false`
+    (default).
+
+<<search-analyzer,`search_analyzer`>>::
+
+    The <<analyzer,`analyzer`>> that should be used at search time on
+    <<mapping-index,`analyzed`>> fields. Defaults to the `analyzer` setting.
+
+<<search-quote-analyzer,`search_quote_analyzer`>>::
+
+    The <<analyzer,`analyzer`>> that should be used at search time when a
+    phrase is encountered. Defaults to the `search_analyzer` setting.
+
+<<similarity,`similarity`>>::
+
+    Which scoring algorithm or _similarity_ should be used. Defaults
+    to `classic`, which uses TF/IDF.
+
+<<term-vector,`term_vector`>>::
+
+    Whether term vectors should be stored for an <<mapping-index,`analyzed`>>
+    field. Defaults to `no`.
--- a/docs/reference/mapping/types/token-count.asciidoc
+++ b/docs/reference/mapping/types/token-count.asciidoc
@ -15,7 +15,7 @@ PUT my_index
    "my_type": {
      "properties": {
        "name": { <1>
-          "type": "string",
+          "type": "text",
          "fields": {
            "length": { <2>
              "type":     "token_count",
--- a/docs/reference/migration/migrate_5_0/mapping.asciidoc
+++ b/docs/reference/migration/migrate_5_0/mapping.asciidoc
@ -16,6 +16,26 @@ values.  For backwards compatibility purposes, during the 5.x series:
  with `string` fields are no longer possible with `text`/`keyword` fields
  such as enabling `term_vectors` on a not-analyzed `keyword` field.

+==== Default string mappings
+
+String mappings now have the following default mappings:
+
+[source,json]
+---------------
+{
+  "type": "text",
+  "fields": {
+    "keyword": {
+      "type": "keyword",
+      "ignore_above": 256
+    }
+  }
+}
+---------------
+
+This allows to perform full-text search on the original field name and to sort
+and run aggregations on the sub keyword field.
+
 ==== `index` property

 On all field datatypes (except for the deprecated `string` field), the `index`
@ -35,12 +55,22 @@ now defaults to using `float` instead of `double`. The reasoning is that
 floats should be more than enough for most cases but would decrease storage
 requirements significantly.

+==== `norms`
+
+`norms` now take a boolean instead of an object. This boolean is the replacement
+for `norms.enabled`. There is no replacement for `norms.loading` since eager
+loading of norms is not useful anymore now that norms are disk-based.
+
 ==== `fielddata.format`

 Setting `fielddata.format: doc_values` in the mappings used to implicitly
 enable doc-values on a field. This no longer works: the only way to enable or
 disable doc-values is by using the `doc_values` property of mappings.

+==== `fielddata.frequency.regex`
+
+Regex filters are not supported anymore and will be dropped on upgrade.
+
 ==== Source-transform removed

 The source `transform` feature has been removed. Instead, use an ingest pipeline
--- a/docs/reference/query-dsl/exists-query.asciidoc
+++ b/docs/reference/query-dsl/exists-query.asciidoc
@ -47,7 +47,7 @@ instance, if the `user` field were mapped as follows:
 [source,js]
 --------------------------------------------------
  "user": {
-    "type": "string",
+    "type": "text",
    "null_value": "_null_"
  }
 --------------------------------------------------
--- a/docs/reference/query-dsl/mlt-query.asciidoc
+++ b/docs/reference/query-dsl/mlt-query.asciidoc
@ -116,18 +116,18 @@ curl -s -XPUT 'http://localhost:9200/imdb/' -d '{
    "movies": {
      "properties": {
        "title": {
-          "type": "string",
+          "type": "text",
          "term_vector": "yes"
         },
         "description": {
-          "type": "string"
+          "type": "text"
        },
        "tags": {
-          "type": "string",
+          "type": "text",
          "fields" : {
            "raw": {
-              "type" : "string",
-              "index" : "not_analyzed",
+              "type" : "text",
+              "analyzer": "keyword",
              "term_vector" : "yes"
            }
          }
--- a/docs/reference/query-dsl/term-query.asciidoc
+++ b/docs/reference/query-dsl/term-query.asciidoc
@ -49,13 +49,13 @@ GET /_search
 .Why doesn't the `term` query match my document?
 **************************************************

-String fields can be `analyzed` (treated as full text, like the body of an
-email), or `not_analyzed` (treated as exact values, like an email address or a
-zip code).  Exact values (like numbers, dates, and `not_analyzed` strings) have
+String fields can be of type `text` (treated as full text, like the body of an
+email), or `keyword` (treated as exact values, like an email address or a
+zip code).  Exact values (like numbers, dates, and keywords) have
 the exact value specified in the field added to the inverted index in order
 to make them searchable.

-By default, however, `string` fields are `analyzed`. This means that their
+However, `text` fields are `analyzed`. This means that their
 values are first passed through an <<analysis,analyzer>> to produce a list of
 terms, which are then added to the inverted index.

@ -70,7 +70,7 @@ within a big block of full text.

 The `term` query looks for the *exact* term in the field's inverted index --
 it doesn't know anything about the field's analyzer.  This makes it useful for
-looking up values in `not_analyzed` string fields, or in numeric or date
+looking up values in keyword fields, or in numeric or date
 fields.  When querying full text fields, use the
 <<query-dsl-match-query,`match` query>> instead, which understands how the field
 has been analyzed.
@ -86,11 +86,10 @@ PUT my_index
    "my_type": {
      "properties": {
        "full_text": {
-          "type":  "string" <1>
+          "type":  "text" <1>
        },
        "exact_value": {
-          "type":  "string",
-          "index": "not_analyzed" <2>
+          "type":  "keyword" <2>
        }
      }
    }
@ -105,8 +104,8 @@ PUT my_index/my_type/1
 --------------------------------------------------
 // AUTOSENSE

-<1> The `full_text` field is `analyzed` by default.
-<2> The `exact_value` field is set to be `not_analyzed`.
+<1> The `full_text` field is of type `text` and will be analyzed.
+<2> The `exact_value` field is of type `keyword` and will NOT be analyzed.
 <3> The `full_text` inverted index will contain the terms: [`quick`, `foxes`].
 <4> The `exact_value` inverted index will contain the exact term: [`Quick Foxes!`].