Merge branch 'master' of https://github.com/elasticsearch/elasticsearch

2014-07-30 18:39:55 +02:00 · 2014-07-30 18:39:55 +02:00 · 8693e87e73
parent 85967f974c e3b3b6c055
commit 8693e87e73
1480 changed files with 180009 additions and 22825 deletions
--- a/README.textile
+++ b/README.textile
@ -37,7 +37,7 @@ First of all, DON'T PANIC. It will take 5 minutes to get the gist of what Elasti
 h3. Installation
 * "Download":http://www.elasticsearch.org/download and unzip the Elasticsearch official distribution.
-* Run @bin/elasticsearch@ on unix, or @bin/elasticsearch.bat@ on windows.
+* Run @bin/elasticsearch@ on unix, or @bin\elasticsearch.bat@ on windows.
 * Run @curl -X GET http://localhost:9200/@.
 * Start more servers ...
--- a/TESTING.asciidoc
+++ b/TESTING.asciidoc
@ -20,13 +20,13 @@ mvn clean package -DskipTests
 To disable and enable network transport, set the `Des.node.mode`.
-Use network transport (default):
+Use network transport:
 ------------------------------------
 -Des.node.mode=network
 ------------------------------------
-Use local transport:
+Use local transport (default since 1.3):
 -------------------------------------
 -Des.node.mode=local
@ -62,6 +62,29 @@ Run any test methods that contain 'esi' (like: ...r*esi*ze...).
 mvn test "-Dtests.method=*esi*"
 -------------------------------
 You can also filter tests by certain annotations ie:
  * `@Slow` - tests that are know to take a long time to execute
  * `@Nightly` - tests that only run in nightly builds (disabled by default)
  * `@Integration` - integration tests
  * `@Backwards` - backwards compatibility tests (disabled by default)
  * `@AwaitsFix` - tests that are waiting for a bugfix (disabled by default)
  * `@BadApple` - tests that are known to fail randomly (disabled by default)
 Those annotation names can be combined into a filter expression like:
 ------------------------------------------------
 mvn test -Dtests.filter="@nightly and not @slow" 
 ------------------------------------------------
 to run all nightly test but not the ones that are slow. `tests.filter` supports
 the boolean operators `and, or, not` and grouping ie:
 ---------------------------------------------------------------
 mvn test -Dtests.filter="@nightly and not(@slow or @backwards)" 
 ---------------------------------------------------------------
 === Seed and repetitions.
 Run with a given seed (seed is a hex-encoded long).
@ -184,14 +207,23 @@ To run backwards compatibiilty tests untar or unzip a release and run the tests
 with the following command:
 ---------------------------------------------------------------------------
-mvn test -Dtests.bwc=true -Dtests.bwc.version=x.y.z -Dtests.bwc.path=/path/to/elasticsearch
+mvn test -Dtests.filter="@backwards" -Dtests.bwc.version=x.y.z -Dtests.bwc.path=/path/to/elasticsearch
 ---------------------------------------------------------------------------
 If the elasticsearch release is placed under `./backwards/elasticsearch-x.y.z` the path
 can be omitted:
 ---------------------------------------------------------------------------
-mvn test -Dtests.bwc=true -Dtests.bwc.version=x.y.z 
+mvn test -Dtests.filter="@backwards" -Dtests.bwc.version=x.y.z
 ---------------------------------------------------------------------------
 To setup the bwc test environment execute the following steps (provided you are
 already in your elasticsearch clone):
 ---------------------------------------------------------------------------
 $ mkdir backwards && cd backwards
 $ curl -O https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-1.2.1.tar.gz
 $ tar -xzf elasticsearch-1.2.1.tar.gz 
 ---------------------------------------------------------------------------
 == Testing the REST layer
--- a/bin/elasticsearch.bat
+++ b/bin/elasticsearch.bat
@ -62,9 +62,14 @@ REM The path to the heap dump location, note directory must exists and have enou
 REM space for a full heap dump.
 REM JAVA_OPTS=%JAVA_OPTS% -XX:HeapDumpPath=$ES_HOME/logs/heapdump.hprof
 REM Disables explicit GC
 set JAVA_OPTS=%JAVA_OPTS% -XX:+DisableExplicitGC
 set ES_CLASSPATH=%ES_CLASSPATH%;%ES_HOME%/lib/${project.build.finalName}.jar;%ES_HOME%/lib/*;%ES_HOME%/lib/sigar/*
 set ES_PARAMS=-Delasticsearch -Des-foreground=yes -Des.path.home="%ES_HOME%"
 TITLE Elasticsearch ${project.version}
 "%JAVA_HOME%\bin\java" %JAVA_OPTS% %ES_JAVA_OPTS% %ES_PARAMS% %* -cp "%ES_CLASSPATH%" "org.elasticsearch.bootstrap.Elasticsearch"
 goto finally
--- a/bin/elasticsearch.in.sh
+++ b/bin/elasticsearch.in.sh
@ -62,3 +62,6 @@ JAVA_OPTS="$JAVA_OPTS -XX:+HeapDumpOnOutOfMemoryError"
 # The path to the heap dump location, note directory must exists and have enough
 # space for a full heap dump.
 #JAVA_OPTS="$JAVA_OPTS -XX:HeapDumpPath=$ES_HOME/logs/heapdump.hprof"
 # Disables explicit GC
 JAVA_OPTS="$JAVA_OPTS -XX:+DisableExplicitGC"
--- a/bin/plugin
+++ b/bin/plugin
@ -45,5 +45,5 @@ while [ $# -gt 0 ]; do
  shift
 done
-exec $JAVA $JAVA_OPTS -Xmx64m -Xms16m -Delasticsearch -Des.path.home="$ES_HOME" $properties -cp "$ES_HOME/lib/*" org.elasticsearch.plugins.PluginManager $args
+exec "$JAVA" $JAVA_OPTS -Xmx64m -Xms16m -Delasticsearch -Des.path.home="$ES_HOME" $properties -cp "$ES_HOME/lib/*" org.elasticsearch.plugins.PluginManager $args
--- a/bin/plugin.bat
+++ b/bin/plugin.bat
@ -7,6 +7,7 @@ if NOT DEFINED JAVA_HOME goto err
 set SCRIPT_DIR=%~dp0
 for %%I in ("%SCRIPT_DIR%..") do set ES_HOME=%%~dpfI
 TITLE Elasticsearch Plugin Manager ${project.version}
 "%JAVA_HOME%\bin\java" %JAVA_OPTS% -Xmx64m -Xms16m -Des.path.home="%ES_HOME%" -cp "%ES_HOME%/lib/*;" "org.elasticsearch.plugins.PluginManager" %*
 goto finally
--- a/bin/service.bat
+++ b/bin/service.bat
@ -43,6 +43,8 @@ set SERVICE_ID=%1
 if "%LOG_OPTS%" == "" set LOG_OPTS=--LogPath "%LOG_DIR%" --LogPrefix "%SERVICE_ID%" --StdError auto --StdOutput auto
 TITLE Elasticsearch Service ${project.version}
 if /i %SERVICE_CMD% == install goto doInstall
 if /i %SERVICE_CMD% == remove goto doRemove
 if /i %SERVICE_CMD% == start goto doStart
@ -160,6 +162,9 @@ REM The path to the heap dump location, note directory must exists and have enou
 REM space for a full heap dump.
 REM JAVA_OPTS=%JAVA_OPTS% -XX:HeapDumpPath=$ES_HOME/logs/heapdump.hprof
 REM Disables explicit GC
 set JAVA_OPTS=%JAVA_OPTS% -XX:+DisableExplicitGC
 if "%DATA_DIR%" == "" set DATA_DIR=%ES_HOME%\data
 if "%WORK_DIR%" == "" set WORK_DIR=%ES_HOME%
--- a/config/elasticsearch.yml
+++ b/config/elasticsearch.yml
@ -375,3 +375,11 @@
 #monitor.jvm.gc.old.warn: 10s
 #monitor.jvm.gc.old.info: 5s
 #monitor.jvm.gc.old.debug: 2s
 ################################## Security ################################
 # Uncomment if you want to enable JSONP as a valid return transport on the
 # http server. With this enabled, it may pose a security risk, so disabling
 # it unless you need it is recommended (it is disabled by default).
 #
 #http.jsonp.enable: true
--- a/core-signatures.txt
+++ b/core-signatures.txt
@ -18,8 +18,6 @@ java.util.Collections#sort(java.util.List,java.util.Comparator)
 java.io.StringReader#<init>(java.lang.String) @ Use FastStringReader instead
 org.apache.lucene.util.RamUsageEstimator#sizeOf(java.lang.Object) @ This can be a perfromance trap 
@defaultMessage Reference management is tricky, leave it to SearcherManager
 org.apache.lucene.index.IndexReader#decRef()
 org.apache.lucene.index.IndexReader#incRef()
@ -53,11 +51,21 @@ java.lang.Object#notifyAll()
 java.lang.Math#abs(int)
 java.lang.Math#abs(long)
@defaultMessage Please do not try to stop the world
 java.lang.System#gc()
@defaultMessage Use Long.compare instead we are on Java7
 com.google.common.primitives.Longs#compare(long,long)
-@defaultMessage we have an optimized XStringField to reduce analysis creation overhead
+@defaultMessage Use Channels.* methods to write to channels. Do not write directly.
-org.apache.lucene.document.Field#<init>(java.lang.String,java.lang.String,org.apache.lucene.document.FieldType)
+java.nio.channels.WritableByteChannel#write(java.nio.ByteBuffer)
 java.nio.channels.FileChannel#write(java.nio.ByteBuffer, long)
 java.nio.channels.GatheringByteChannel#write(java.nio.ByteBuffer[], int, int)
 java.nio.channels.GatheringByteChannel#write(java.nio.ByteBuffer[])
 java.nio.channels.ReadableByteChannel#read(java.nio.ByteBuffer)
 java.nio.channels.ScatteringByteChannel#read(java.nio.ByteBuffer[])
 java.nio.channels.ScatteringByteChannel#read(java.nio.ByteBuffer[], int, int)
 java.nio.channels.FileChannel#read(java.nio.ByteBuffer, long)
-@defaultMessage Use XNativeFSLockFactory instead of the buggy NativeFSLockFactory see LUCENE-5738 - remove once Lucene 4.9 is released
+@defaultMessage Use Lucene.parseLenient instead it strips off minor version
-org.apache.lucene.store.NativeFSLockFactory
+org.apache.lucene.util.Version#parseLeniently(java.lang.String)
--- a/dev-tools/build_randomization.rb
+++ b/dev-tools/build_randomization.rb
@ -10,16 +10,129 @@
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on 
+# software distributed under the License is distributed on
 # an 'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
 # either express or implied. See the License for the specific
 # language governing permissions and limitations under the License
 #
-# generate property file for the jdk randomization test
+# NAME
-# 
+#    build_randomization.rb --  Generate property file for the JDK randomization test
 #
 # SYNOPSIS
 #    build_randomization.rb [-d] [-l|t]
 #
 # DESCRIPTION
 #    This script takes the randomization choices described in RANDOM_CHOICE and generates apporpriate JAVA property file 'prop.txt'
 #    This property file also contain the appropriate JDK selection, randomized.  JDK randomization is based on what is available on the Jenkins tools
 #    directory.  This script is used by Jenkins test system to conduct Elasticsearch server randomization testing.
 #
 #    In hash RANDOM_CHOISES, the key of randomization hash maps to key of java property.  The value of the hash describes the possible value of the randomization
 #
 #    For example  RANDOM_CHOICES = { 'es.node.mode' => {:choices => ['local', 'network'], :method => :get_random_one} } means
 #    es.node.mode will be set to either 'local' or 'network', each with 50% of probability
 #
 # OPTIONS SUMMARY
 #    The options are as follows:
 #
 #       -d, --debug   Increase logging verbosity for debugging purpose
 #       -t, --test    Run in test mode.  The script will execute unit tests.
 #       -l, --local   Run in local mode.  In this mode, directory structure will be created under current directory to mimick 
 #                     Jenkins' server directory layout. This mode is mainly used for development.
 require 'enumerator'
 require 'getoptlong'
 require 'log4r'
 require 'optparse'
 require 'rubygems'
 require 'yaml'
 include Log4r
 RANDOM_CHOICES = {
  'tests.jvm.argline' => [
                {:choices => ['-server'], :method => 'get_random_one'},
                {:choices => ['-XX:+UseConcMarkSweepGC', '-XX:+UseParallelGC', '-XX:+UseSerialGC', '-XX:+UseG1GC'], :method => 'get_random_one'},
                {:choices => ['-XX:+UseCompressedOops', '-XX:-UseCompressedOops'], :method => 'get_random_one'}
               ],
  'es.node.mode' => {:choices => ['local', 'network'], :method => 'get_random_one'},
  # bug forced to be false for now :test_nightly => { :method => :true_or_false},
  'tests.nightly' => {:selections => false},
  'tests.assertion.disabled'=> {:choices => 'org.elasticsearch', :method => 'get_10_percent'},
  'tests.security.manager' => {:choices => [true, false], :method => 'get_90_percent'},
 }
 L = Logger.new 'test_randomizer'
 L.outputters = Outputter.stdout
 L.level = INFO
 C = {:local => false, :test => false}
 OptionParser.new do |opts|
  opts.banner = "Usage: build_ranodimzatin.rb [options]"
  opts.on("-d", "--debug", "Debug mode") do |d|
    L.level = DEBUG
  end
  opts.on("-l", "--local", "Run in local mode") do |l|
    C[:local] = true
  end
  opts.on("-t", "--test", "Run unit tests") do |t|
    C[:test] = true
  end
 end.parse!
 class Randomizer
  attr_accessor :data_array
  def initialize(data_array)
    @data_array = data_array
  end
  def true_or_false
    [true, false][rand(2)]
  end
  def get_random_with_distribution(mdata_array, distribution)
    L.debug "randomized distribution data %s" % YAML.dump(mdata_array)
    L.debug "randomized distribution distribution %s" % YAML.dump(distribution)
    carry = 0
    distribution_map = distribution.enum_for(:each_with_index).map { |x,i|  pre_carry = carry ; carry += x; {i => x + pre_carry} }
    random_size = distribution_map.last.values.first
    selection = rand(random_size)
    #get the index that randomize choice mapped to
    choice = distribution_map.select do |x|
      x.values.first > selection   #only keep the index with distribution value that is higher than the random generated number
    end.first.keys.first #first hash's first key is the index we want
    L.debug("randomized distribution choice %s" % mdata_array[choice])
    mdata_array[choice]
  end
  def get_random_one
    data_array[rand(data_array.size)]
  end
  def method_missing(meth, *args, &block)
    # trap randomization based on percentage
    if meth.to_s =~ /^get_(\d+)_percent/
      percentage = $1.to_i
      remain = 100 - percentage
      #data = args.first
      normalized_data = if(!data_array.kind_of?(Array))
                   [data_array, nil]
                 else
                   data_array
                 end
      get_random_with_distribution(normalized_data, [percentage, remain])
    else
      super
    end
  end
 end
 class JDKSelector
  attr_reader :directory, :jdk_list
@ -28,91 +141,333 @@ class JDKSelector
    @directory = directory
  end
-  # get selection of available jdks from jenkins automatic install directory
+  # get selection of available JDKs from Jenkins automatic install directory
  def get_jdk
-    @jdk_list = Dir.entries(directory).select do |x| 
+    @jdk_list = Dir.entries(directory).select do |x|
-      x.chars.first == 'J' 
+      x.chars.first == 'J'
    end.map do |y|
      File.join(directory, y)
    end
    self
  end
-  # do ranomize selection from a given array
+  def filter_java_6(files)
    files.select{ |i| File.basename(i).split(/[^0-9]/)[-1].to_i > 6 }
  end
  # do randomized selection from a given array
  def select_one(selection_array = nil)
    selection_array = filter_java_6(selection_array || @jdk_list)
-    selection_array[rand(selection_array.size)]
+    Randomizer.new(selection_array).get_random_one
    get_random_one(selection_array)
  end
 end
-def get_random_one(data_array)
+  def JDKSelector.generate_jdk_hash(jdk_choice)
-  data_array[rand(data_array.size)]
+    file_separator = if Gem.win_platform?
-end
+                       File::ALT_SEPARATOR
-
+                     else
-def filter_java_6(files)
+                       File::SEPARATOR
-  files.select{ |i| File.basename(i).split(/[^0-9]/)[-1].to_i > 6 }
+                     end
-end
+    {
-
+      :PATH => [jdk_choice, 'bin'].join(file_separator) + File::PATH_SEPARATOR + ENV['PATH'],
-# given a jdk directory selection, generate relevant environment variables
+      :JAVA_HOME => jdk_choice
 def get_env_matrix(data_array)
  #refactoring target
  es_test_jvm_option1 = get_random_one(['-server']) #only server for now get_random_one(['-client', '-server'])
  es_test_jvm_option2 = get_random_one(['-XX:+UseConcMarkSweepGC', '-XX:+UseParallelGC', '-XX:+UseSerialGC', '-XX:+UseG1GC'])
  es_test_jvm_option3 = get_random_one(['-XX:+UseCompressedOops', '-XX:-UseCompressedOops'])
  es_node_mode =  get_random_one(['local', 'network'])
  tests_nightly = get_random_one([true, false])
  tests_nightly = get_random_one([false]) #bug
  test_assert_off = (rand(10) == 9) #10 percent chance turning it off 
  tests_security_manager = (rand(10) != 9) #10 percent chance running without security manager
  arg_line = [es_test_jvm_option1, es_test_jvm_option2, es_test_jvm_option3]
  [*data_array].map do |x|
    data_hash = {
      'PATH' => File.join(x,'bin') + ':' + ENV['PATH'],
      'JAVA_HOME' => x,
      'BUILD_DESC' => "%s,%s,%s%s,%s %s%s%s"%[File.basename(x), es_node_mode, tests_nightly ? 'nightly,':'',
                                            es_test_jvm_option1[1..-1], es_test_jvm_option2[4..-1], es_test_jvm_option3[4..-1],
                                            test_assert_off ? ',assert off' : '', tests_security_manager ? ', security manager enabled' : ''], 
      'es.node.mode' => es_node_mode,
      'tests.nightly' => tests_nightly,
      'tests.security.manager' => tests_security_manager,
      'tests.jvm.argline' => arg_line.join(" "),
    }
    data_hash['tests.assertion.disabled'] = 'org.elasticsearch' if test_assert_off
    data_hash
  end
 end
-# pick first element out of array of hashes, generate write java property file
+#
-def generate_property_file(directory, data)
+# Fix argument JDK selector
-  #array transformation
+#
-  content = data.first.map do |key, value|
+class FixedJDKSelector < JDKSelector
-    "%s=%s"%[key, value]
+  def initialize(directory)
    @directory = [*directory] #selection of directories to pick from
  end
-  file_name = (ENV['BUILD_ID'] + ENV['BUILD_NUMBER']) || 'prop' rescue 'prop'
+
-  file_name = file_name.split(File::SEPARATOR).first + '.txt'
+  def get_jdk
-  File.open(File.join(directory, file_name), 'w') do |file| 
+    #since JDK selection is already specified..jdk list is the @directory
-              file.write(content.join("\n")) 
+    @jdk_list = @directory
    self
  end
  def select_one(selection_array = nil)
    #bypass filtering since this is not automatic
    selection_array ||= @jdk_list
    Randomizer.new(selection_array).get_random_one
  end
 end
-working_directory = ENV['WORKSPACE'] || '/var/tmp'
+#
-unless(ENV['BUILD_ID'])
+# Property file writer
-  #local mode set up fake environment 
+#
-  test_directory = 'tools/hudson.model.JDK/'
+class PropertyWriter
-  unless(File.exist?(test_directory))
+  attr_reader :working_directory
    puts "running local mode, setting up running environment"
    puts "properties are written to file prop.txt"
    system("mkdir -p %sJDK{6,7}"%test_directory)
  end
  working_directory = ENV['PWD']
 end
 # jenkins sets pwd prior to execution
 jdk_selector = JDKSelector.new(File.join(ENV['PWD'],'tools','hudson.model.JDK'))
 environment_matrix = get_env_matrix(jdk_selector.get_jdk.select_one)
-generate_property_file(working_directory, environment_matrix)
+  def initialize(mworking_directory)
    @working_directory = mworking_directory
  end
  # # pick first element out of array of hashes, generate write java property file
  def generate_property_file(data)
    directory = working_directory
    #array transformation
    content = data.to_a.map do |x|
      x.join('=')
    end.sort
    file_name = (ENV['BUILD_ID'] + ENV['BUILD_NUMBER']) || 'prop' rescue 'prop'
    file_name = file_name.split(File::SEPARATOR).first + '.txt'
    L.debug "Property file name is %s" % file_name
    File.open(File.join(directory, file_name), 'w') do |file|
      file.write(content.join("\n"))
    end
  end
 end
 #
 # Execute randomization logics
 #
 class RandomizedRunner
  attr_reader :random_choices, :jdk, :p_writer
  def initialize(mrandom_choices, mjdk, mwriter)
    @random_choices = mrandom_choices
    @jdk = mjdk
    @p_writer = mwriter
  end
  def generate_selections
    configuration = random_choices
    L.debug "Enter %s" % __method__
    L.debug "Configuration %s" % YAML.dump(configuration)
    generated = {}
    configuration.each do |k, v|
      if(v.kind_of?(Hash))
        if(v.has_key?(:method))
          randomizer = Randomizer.new(v[:choices])
          v[:selections] = randomizer.__send__(v[:method])
        end
      else
        v.each do |x|
          if(x.has_key?(:method))
            randomizer = Randomizer.new(x[:choices])
            x[:selections] = randomizer.__send__(x[:method])
          end
        end
      end
    end.each do |k, v|
      if(v.kind_of?(Array))
        selections = v.inject([]) do |sum, current_hash|
          sum.push(current_hash[:selections])
        end
      else
        selections = [v[:selections]] unless v[:selections].nil?
      end
      generated[k] = selections unless (selections.nil? || selections.size == 0)
    end
    L.debug "Generated selections %s" % YAML.dump(generated)
    generated
  end
  def get_env_matrix(jdk_selection, selections)
    L.debug "Enter %s" % __method__
    #normalization
    s = {}
    selections.each do |k, v|
      if(v.size > 1)
        s[k] = v.join(' ') #this should be dependent on class of v[0] and perform reduce operation instead... good enough for now
      else
        s[k] = v.first
      end
    end
    j = JDKSelector.generate_jdk_hash(jdk_selection)
    # create build description line
    desc = {}
    # TODO: better error handling
    desc[:BUILD_DESC] = "%s,%s,%s%s%s%s" % [
                                            File.basename(j[:JAVA_HOME]),
                                            s['es.node.mode'],
                                            s['tests.nightly'] ? 'nightly,':'',
                                            s['tests.jvm.argline'].gsub(/-XX:/,''),
                                            s.has_key?('tests.assertion.disabled')? ',assert off' : '',
                                            s['tests.security.manager'] ? ',sec manager on' : ''
                                           ]
    result = j.merge(s).merge(desc)
    L.debug(YAML.dump(result))
    result
  end
  def run!
    p_writer.generate_property_file(get_env_matrix(jdk, generate_selections))
  end
 end
 #
 # Main
 #
 unless(C[:test])
  # Check to see if this is running locally
  unless(C[:local])
    L.debug("Normal Mode")
    working_directory = ENV.fetch('WORKSPACE', (Gem.win_platform? ? Dir.pwd : '/var/tmp'))
  else
    L.debug("Local Mode")
    test_directory = 'tools/hudson.model.JDK/'
    unless(File.exist?(test_directory))
      L.info "running local mode, setting up running environment"
      L.info "properties are written to file prop.txt"
      FileUtils.mkpath "%sJDK6" % test_directory
      FileUtils.mkpath "%sJDK7" % test_directory
    end
    working_directory = Dir.pwd
  end
  # script support both window and linux
  # TODO: refactor into platform/machine dependent class structure
  jdk = if(Gem.win_platform?)
          #window mode jdk directories are fixed
          #TODO: better logic
          L.debug("Window Mode")
          if(File.directory?('y:\jdk7\7u55'))   #old window system under ec2
             FixedJDKSelector.new('y:\jdk7\7u55')
          else  #new metal window system
             FixedJDKSelector.new(['c:\PROGRA~1\JAVA\jdk1.8.0_05', 'c:\PROGRA~1\JAVA\jdk1.7.0_55'])
          end
        else
          #Jenkins sets pwd prior to execution
          L.debug("Linux Mode")
          JDKSelector.new(File.join(ENV['PWD'],'tools','hudson.model.JDK'))
        end
  runner = RandomizedRunner.new(RANDOM_CHOICES,
                               jdk.get_jdk.select_one,
                               PropertyWriter.new(working_directory))
  environment_matrix = runner.run!
  exit 0
 else
  require "test/unit"
 end
 #
 # Test
 #
 class TestJDKSelector < Test::Unit::TestCase
  L = Logger.new 'test'
  L.outputters = Outputter.stdout
  L.level = DEBUG
  def test_hash_generator
    jdk_choice = '/dummy/jdk7'
    generated = JDKSelector.generate_jdk_hash(jdk_choice)
    L.debug "Generated %s" % generated
    assert generated[:PATH].include?(jdk_choice), "PATH doesn't included choice"
    assert generated[:JAVA_HOME].include?(jdk_choice), "JAVA home doesn't include choice"
  end
 end
 class TestFixJDKSelector < Test::Unit::TestCase
  L = Logger.new 'test'
  L.outputters = Outputter.stdout
  L.level = DEBUG
  def test_initialize
    ['/home/dummy', ['/JDK7', '/home2'], ['home/dummy']].each do |x|
      test_object = FixedJDKSelector.new(x)
      assert_kind_of Array, test_object.directory
      assert_equal [*x], test_object.directory
    end
  end
  def test_select_one
    test_array = %w(one two three)
    test_object = FixedJDKSelector.new(test_array)
    assert test_array.include?(test_object.get_jdk.select_one)
  end
  def test_hash_generator
    jdk_choice = '/dummy/jdk7'
    generated = FixedJDKSelector.generate_jdk_hash(jdk_choice)
    L.debug "Generated %s" % generated
    assert generated[:PATH].include?(jdk_choice), "PATH doesn't included choice"
    assert generated[:JAVA_HOME].include?(jdk_choice), "JAVA home doesn't include choice"
  end
 end
 class TestPropertyWriter < Test::Unit::TestCase
  L = Logger.new 'test'
  L.outputters = Outputter.stdout
  L.level = DEBUG
  def test_initialize
    ['/home/dummy','/tmp'].each do |x|
      test_object = PropertyWriter.new(x)
      assert_kind_of String, test_object.working_directory
      assert_equal x, test_object.working_directory
    end
  end
  def test_generate_property
    test_file = '/tmp/prop.txt'
    File.delete(test_file) if File.exist?(test_file)
    test_object = PropertyWriter.new(File.dirname(test_file))
    # default prop.txt
    test_object.generate_property_file({:hi => 'there'})
    assert(File.exist?(test_file))
    File.open(test_file, 'r') do |properties_file|
      properties_file.read.each_line do |line|
        line.strip!
        assert_equal 'hi=there', line, "content %s is not hi=there" % line
      end
    end
    File.delete(test_file) if File.exist?(test_file)
  end
 end
 class DummyPropertyWriter < PropertyWriter
  def generate_property_file(data)
    L.debug "generating property file for %s" % YAML.dump(data) 
    L.debug "on directory %s" % working_directory
  end
 end
 class TestRandomizedRunner < Test::Unit::TestCase
  def test_initialize
    test_object = RandomizedRunner.new(RANDOM_CHOICES, '/tmp/dummy/jdk', po = PropertyWriter.new('/tmp'))
    assert_equal RANDOM_CHOICES, test_object.random_choices
    assert_equal '/tmp/dummy/jdk', test_object.jdk
    assert_equal po, test_object.p_writer
  end
  def test_generate_selection_no_method
    test_object = RandomizedRunner.new({'tests.one' => {:selections => false }}, '/tmp/dummy/jdk', po = DummyPropertyWriter.new('/tmp'))
    selection =  test_object.generate_selections
    assert_equal false, selection['tests.one'].first, 'randomization without selection method fails'
  end
  def test_generate_with_method
    test_object = RandomizedRunner.new({'es.node.mode' => {:choices => ['local', 'network'], :method => 'get_random_one'}}, 
                                      '/tmp/dummy/jdk', po = DummyPropertyWriter.new('/tmp'))
    selection =  test_object.generate_selections
    assert ['local', 'network'].include?(selection['es.node.mode'].first), 'selection choice is not correct'
  end
  def test_get_env_matrix
    test_object = RandomizedRunner.new(RANDOM_CHOICES,
                                      '/tmp/dummy/jdk', po = DummyPropertyWriter.new('/tmp'))
    selection =  test_object.generate_selections
    env_matrix = test_object.get_env_matrix('/tmp/dummy/jdk', selection)
    puts YAML.dump(env_matrix)
    assert_equal '/tmp/dummy/jdk', env_matrix[:JAVA_HOME]
  end
 end
--- a/dev-tools/build_release.py
+++ b/dev-tools/build_release.py
@ -234,7 +234,7 @@ def run_mvn(*cmd):
  for c in cmd:
    run('%s; %s %s' % (java_exe(), MVN, c))
-def build_release(run_tests=False, dry_run=True, cpus=1):
+def build_release(run_tests=False, dry_run=True, cpus=1, bwc_version=None):
  target = 'deploy'
  if dry_run:
    target = 'package'
@ -242,6 +242,9 @@ def build_release(run_tests=False, dry_run=True, cpus=1):
    run_mvn('clean',
            'test -Dtests.jvms=%s -Des.node.mode=local' % (cpus),
            'test -Dtests.jvms=%s -Des.node.mode=network' % (cpus))
  if bwc_version:
      print('Running Backwards compatibilty tests against version [%s]' % (bwc_version))
      run_mvn('clean', 'test -Dtests.filter=@backwards -Dtests.bwc.version=%s -Dtests.bwc=true -Dtests.jvms=1' % bwc_version)
  run_mvn('clean test-compile -Dforbidden.test.signatures="org.apache.lucene.util.LuceneTestCase\$AwaitsFix @ Please fix all bugs before release"')
  run_mvn('clean %s -DskipTests' % (target))
  success = False
@ -345,7 +348,7 @@ def generate_checksums(files):
    directory = os.path.dirname(release_file)
    file = os.path.basename(release_file)
    checksum_file = '%s.sha1.txt' % file
-    
+
    if os.system('cd %s; shasum %s > %s' % (directory, file, checksum_file)):
      raise RuntimeError('Failed to generate checksum for file %s' % release_file)
    res = res + [os.path.join(directory, checksum_file), release_file]
@ -379,12 +382,12 @@ def smoke_test_release(release, files, expected_hash, plugins):
      raise RuntimeError('Smoketest failed missing file %s' % (release_file))
    tmp_dir = tempfile.mkdtemp()
    if release_file.endswith('tar.gz'):
-      run('tar -xzf %s -C %s' % (release_file, tmp_dir)) 
+      run('tar -xzf %s -C %s' % (release_file, tmp_dir))
    elif release_file.endswith('zip'):
-      run('unzip %s -d %s' % (release_file, tmp_dir)) 
+      run('unzip %s -d %s' % (release_file, tmp_dir))
    else:
      log('Skip SmokeTest for [%s]' % release_file)
-      continue # nothing to do here 
+      continue # nothing to do here
    es_run_path = os.path.join(tmp_dir, 'elasticsearch-%s' % (release), 'bin/elasticsearch')
    print('  Smoke testing package [%s]' % release_file)
    es_plugin_path = os.path.join(tmp_dir, 'elasticsearch-%s' % (release),'bin/plugin')
@ -472,7 +475,7 @@ def print_sonartype_notice():
       for line in settings_file:
         if line.strip() == '<id>sonatype-nexus-snapshots</id>':
           # moving out - we found the indicator no need to print the warning
-           return 
+           return
  print("""
    NOTE: No sonartype settings detected, make sure you have configured
    your sonartype credentials in '~/.m2/settings.xml':
@ -499,12 +502,29 @@ def check_s3_credentials():
  if not env.get('AWS_ACCESS_KEY_ID', None) or not env.get('AWS_SECRET_ACCESS_KEY', None):
    raise RuntimeError('Could not find "AWS_ACCESS_KEY_ID" / "AWS_SECRET_ACCESS_KEY" in the env variables please export in order to upload to S3')
-VERSION_FILE = 'src/main/java/org/elasticsearch/Version.java'    
+VERSION_FILE = 'src/main/java/org/elasticsearch/Version.java'
 POM_FILE = 'pom.xml'
-# we print a notice if we can not find the relevant infos in the ~/.m2/settings.xml 
+# we print a notice if we can not find the relevant infos in the ~/.m2/settings.xml
 print_sonartype_notice()
 # finds the highest available bwc version to test against
 def find_bwc_version(release_version, bwc_dir='backwards'):
  log('  Lookup bwc version in directory [%s]' % bwc_dir)
  bwc_version = None
  if os.path.exists(bwc_dir) and os.path.isdir(bwc_dir):
    max_version = [int(x) for x in release_version.split('.')]
    for dir in os.listdir(bwc_dir):
      if os.path.isdir(os.path.join(bwc_dir, dir)) and dir.startswith('elasticsearch-'):
        version = [int(x) for x in dir[len('elasticsearch-'):].split('.')]
        if version < max_version: # bwc tests only against smaller versions
          if (not bwc_version) or version > [int(x) for x in bwc_version.split('.')]:
            bwc_version = dir[len('elasticsearch-'):]
    log('  Using bwc version [%s]' % bwc_version)
  else:
    log('  bwc directory [%s] does not exists or is not a directory - skipping' % bwc_dir)
  return bwc_version
 if __name__ == '__main__':
  parser = argparse.ArgumentParser(description='Builds and publishes a Elasticsearch Release')
  parser.add_argument('--branch', '-b', metavar='master', default=get_current_branch(),
@ -520,11 +540,13 @@ if __name__ == '__main__':
                      help='Publishes the release. Disable by default.')
  parser.add_argument('--smoke', '-s', dest='smoke', default='',
                      help='Smoke tests the given release')
  parser.add_argument('--bwc', '-w', dest='bwc', metavar='backwards', default='backwards',
                      help='Backwards compatibility version path to use to run compatibility tests against')
  parser.set_defaults(dryrun=True)
  parser.set_defaults(smoke=None)
  args = parser.parse_args()
-
+  bwc_path = args.bwc
  src_branch = args.branch
  remote = args.remote
  run_tests = args.tests
@ -534,7 +556,7 @@ if __name__ == '__main__':
  smoke_test_version = args.smoke
  if not dry_run:
    check_s3_credentials()
-    print('WARNING: dryrun is set to "false" - this will push and publish the release') 
+    print('WARNING: dryrun is set to "false" - this will push and publish the release')
    input('Press Enter to continue...')
  print(''.join(['-' for _ in range(80)]))
@ -574,7 +596,7 @@ if __name__ == '__main__':
        print('  Running maven builds now and publish to sonartype - run-tests [%s]' % run_tests)
      else:
        print('  Running maven builds now run-tests [%s]' % run_tests)
-      build_release(run_tests=run_tests, dry_run=dry_run, cpus=cpus)
+      build_release(run_tests=run_tests, dry_run=dry_run, cpus=cpus, bwc_version=find_bwc_version(release_version, bwc_path))
      artifacts = get_artifacts(release_version)
      artifacts_and_checksum = generate_checksums(artifacts)
      smoke_test_release(release_version, artifacts, get_head_hash(), PLUGINS)
--- a/dev-tools/es_release_notes.pl
+++ b/dev-tools/es_release_notes.pl
@ -20,6 +20,7 @@ use warnings;
 use HTTP::Tiny;
 use IO::Socket::SSL 1.52;
 use utf8;
 my $Base_URL  = 'https://api.github.com/repos/';
 my $User_Repo = 'elasticsearch/elasticsearch/';
@ -85,6 +86,9 @@ sub dump_issues {
            }
            for my $issue (@$header_issues) {
                my $title = $issue->{title};
                $title=~s{`([^`]+)`}{<code>$1</code>}g
                    if $format eq 'html';
                if ( $issue->{state} eq 'open' ) {
                    $title .= " [OPEN]";
                }
--- a/dev-tools/tests.policy
+++ b/dev-tools/tests.policy
@ -27,10 +27,11 @@ grant {
  permission java.io.FilePermission "${junit4.childvm.cwd}", "read,execute,write";
  permission java.io.FilePermission "${junit4.childvm.cwd}${/}-", "read,execute,write,delete";
  permission java.io.FilePermission "${junit4.tempDir}${/}*", "read,execute,write,delete";
-  
+  permission groovy.security.GroovyCodeSourcePermission "/groovy/script";
  // Allow connecting to the internet anywhere
  permission java.net.SocketPermission "*", "accept,listen,connect,resolve";
-  
+
  // Basic permissions needed for Lucene / Elasticsearch to work:
  permission java.util.PropertyPermission "*", "read,write";
  permission java.lang.reflect.ReflectPermission "*";
--- a/dev-tools/upgrade-tests.py
+++ b/dev-tools/upgrade-tests.py
@ -0,0 +1,321 @@
 # Licensed to Elasticsearch under one or more contributor
 # license agreements. See the NOTICE file distributed with
 # this work for additional information regarding copyright
 # ownership. Elasticsearch licenses this file to you under
 # the Apache License, Version 2.0 (the "License"); you may
 # not use this file except in compliance  with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on
 # an 'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
 # either express or implied. See the License for the specific
 # language governing permissions and limitations under the License.
 import random
 import os
 import tempfile
 import shutil
 import subprocess
 import time
 import argparse
 import logging
 import sys
 import re
 from datetime import datetime
 try:
  from elasticsearch import Elasticsearch
  from elasticsearch.exceptions import ConnectionError
  from elasticsearch.exceptions import TransportError
 except ImportError as e:
  print('Can\'t import elasticsearch please install `sudo pip install elasticsearch`')
  raise e
 '''This file executes a basic upgrade test by running a full cluster restart.
 The upgrade test starts 2 or more nodes of an old elasticserach version, indexes
 a random number of documents into the running nodes and executes a full cluster restart.
 After the nodes are recovered a small set of basic checks are executed to ensure all
 documents are still searchable and field data can be loaded etc.
 NOTE: This script requires the elasticsearch python client `elasticsearch-py` run the following command to install:
  `sudo pip install elasticsearch`
 if you are running python3 you need to install the client using pip3. On OSX `pip3` will be included in the Python 3.4
 release available on `https://www.python.org/download/`:
  `sudo pip3 install elasticsearch`
 See `https://github.com/elasticsearch/elasticsearch-py` for details
 In order to run this test two different version of elasticsearch are required. Both need to be unpacked into
 the same directory:
 ```
   $ cd /path/to/elasticsearch/clone
   $ mkdir backwards && cd backwards
   $ wget  https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-1.3.1.tar.gz
   $ wget  https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-0.90.13.tar.gz
   $ tar -zxvf elasticsearch-1.3.1.tar.gz && tar -zxvf elasticsearch-0.90.13.tar.gz
   $ cd ..
   $ python dev-tools/upgrade-tests.py --version.backwards 0.90.13 --version.current 1.3.1
 ```
 '''
 BLACK_LIST = {'1.2.0' : { 'reason': 'Contains a major bug where routing hashes are not consistent with previous version',
                          'issue': 'https://github.com/elasticsearch/elasticsearch/pull/6393'},
              '1.3.0' : { 'reason': 'Lucene Related bug prevents upgrades from 0.90.7 and some earlier versions ',
                          'issue' : 'https://github.com/elasticsearch/elasticsearch/pull/7055'}}
 # sometimes returns True
 def rarely():
  return random.randint(0, 10) == 0
 # usually returns True
 def frequently():
  return not rarely()
 # asserts the correctness of the given hits given they are sorted asc
 def assert_sort(hits):
  values = [hit['sort'] for hit in hits['hits']['hits']]
  assert len(values) > 0, 'expected non emtpy result'
  val = min(values)
  for x in values:
    assert x >= val, '%s >= %s' % (x, val)
    val = x
 # asserts that the cluster health didn't timeout etc.
 def assert_health(cluster_health, num_shards, num_replicas):
  assert cluster_health['timed_out'] == False, 'cluster health timed out %s' % cluster_health
 # Starts a new elasticsearch node from a released & untared version.
 # This node uses unicast discovery with the provided unicast host list and starts
 # the nodes with the given data directory. This allows shutting down and starting up
 # nodes on the same data dir simulating a full cluster restart.
 def start_node(version, data_dir, node_dir, unicast_host_list, tcp_port, http_port):
  es_run_path = os.path.join(node_dir, 'elasticsearch-%s' % (version), 'bin/elasticsearch')
  if version.startswith('0.90.'):
    foreground = '-f' # 0.90.x starts in background automatically
  else:
    foreground = ''
  return subprocess.Popen([es_run_path,
    '-Des.path.data=%s' % data_dir, '-Des.cluster.name=upgrade_test',  
    '-Des.discovery.zen.ping.unicast.hosts=%s' % unicast_host_list, 
    '-Des.discovery.zen.ping.multicast.enabled=false',
    '-Des.script.disable_dynamic=true',
    '-Des.transport.tcp.port=%s' % tcp_port,
    '-Des.http.port=%s' % http_port,
    foreground], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 # Indexes the given number of document into the given index
 # and randomly runs refresh, optimize and flush commands
 def index_documents(es, index_name, type, num_docs):
  logging.info('Indexing %s docs' % num_docs)
  for id in range(0, num_docs):
    es.index(index=index_name, doc_type=type, id=id, body={'string': str(random.randint(0, 100)),
                                                           'long_sort': random.randint(0, 100),
                                                           'double_sort' : float(random.randint(0, 100))})
    if rarely():
      es.indices.refresh(index=index_name)
    if rarely():
      es.indices.flush(index=index_name, force=frequently())
  if rarely():
      es.indices.optimize(index=index_name)
  es.indices.refresh(index=index_name)
 # Runs a basic number of assertions including:
 #  - document counts
 #  - match all search with sort on double / long
 #  - Realtime GET operations
 # TODO(simonw): we should add stuff like:
 #  - dates including sorting
 #  - string sorting
 #  - docvalues if available
 #  - global ordinal if available
 def run_basic_asserts(es, index_name, type, num_docs):
  count = es.count(index=index_name)['count']
  assert count == num_docs, 'Expected %r but got %r documents' % (num_docs, count)
  for _ in range(0, num_docs):
    random_doc_id = random.randint(0, num_docs-1)
    doc = es.get(index=index_name, doc_type=type, id=random_doc_id)
    assert doc, 'Expected document for id %s but got %s' % (random_doc_id, doc)
  assert_sort(es.search(index=index_name,
                  body={
                    'sort': [
                      {'double_sort': {'order': 'asc'}}
                    ]
                  }))
  assert_sort(es.search(index=index_name,
                  body={
                    'sort': [
                      {'long_sort': {'order': 'asc'}}
                    ]
                  }))
 # picks a random version or and entire random version tuple from the directory
 # to run the backwards tests against.
 def pick_random_upgrade_version(directory, lower_version=None, upper_version=None):
  if lower_version and upper_version:
    return lower_version, upper_version
  assert os.path.isdir(directory), 'No such directory %s' % directory
  versions = []
  for version in map(lambda x : x[len('elasticsearch-'):], filter(lambda x : re.match(r'^elasticsearch-\d+[.]\d+[.]\d+$', x), os.listdir(directory))):
    if not version in BLACK_LIST:
      versions.append(build_tuple(version))
  versions.sort()
  if lower_version: # lower version is set - picking a higher one
    versions = filter(lambda x : x > build_tuple(lower_version), versions)
    assert len(versions) >= 1, 'Expected at least 1 higher version than %s version in %s ' % (lower_version, directory)
    random.shuffle(versions)
    return lower_version, build_version(versions[0])
  if upper_version:
    versions = filter(lambda x : x < build_tuple(upper_version), versions)
    assert len(versions) >= 1, 'Expected at least 1 lower version than %s version in %s ' % (upper_version, directory)
    random.shuffle(versions)
    return build_version(versions[0]), upper_version
  assert len(versions) >= 2, 'Expected at least 2 different version in %s but found %s' % (directory, len(versions))
  random.shuffle(versions)
  versions = versions[0:2]
  versions.sort()
  return build_version(versions[0]), build_version(versions[1])
 def build_version(version_tuple):
  return '.'.join([str(x) for x in version_tuple])
 def build_tuple(version_string):
  return [int(x) for x in version_string.split('.')]
 # returns a new elasticsearch client and ensures the all nodes have joined the cluster
 # this method waits at most 30 seconds for all nodes to join
 def new_es_instance(num_nodes, http_port, timeout = 30):
  logging.info('Waiting for %s nodes to join the cluster' % num_nodes)
  for _ in range(0, timeout):
    # TODO(simonw): ask Honza if there is a better way to do this?
    try:
      es = Elasticsearch([
      {'host': '127.0.0.1', 'port': http_port + x}
        for x in range(0, num_nodes)])
      es.cluster.health(wait_for_nodes=num_nodes)
      es.count() # can we actually search or do we get a 503? -- anyway retry
      return es
    except (ConnectionError, TransportError):
      pass
    time.sleep(1)
  assert False, 'Timed out waiting for %s nodes for %s seconds' % (num_nodes, timeout)
 def assert_versions(bwc_version, current_version, node_dir):
  assert [int(x) for x in bwc_version.split('.')] < [int(x) for x in current_version.split('.')],\
      '[%s] must be < than [%s]' % (bwc_version, current_version)
  for version in [bwc_version, current_version]:
    assert not version in BLACK_LIST, 'Version %s is blacklisted - %s, see %s' \
                                          % (version, BLACK_LIST[version]['reason'],
                                             BLACK_LIST[version]['issue'])
    dir = os.path.join(node_dir, 'elasticsearch-%s' % current_version)
    assert os.path.isdir(dir), 'Expected elasticsearch-%s install directory does not exists: %s' % (version, dir)
 def full_cluster_restart(node_dir, current_version, bwc_version, tcp_port, http_port):
  assert_versions(bwc_version, current_version, node_dir)
  num_nodes = random.randint(2, 3)
  nodes = []
  data_dir = tempfile.mkdtemp()
  logging.info('Running upgrade test from [%s] to [%s] seed: [%s] es.path.data: [%s] es.http.port [%s] es.tcp.port [%s]'
        % (bwc_version, current_version, seed, data_dir, http_port, tcp_port))
  try:
    logging.info('Starting %s BWC nodes of version %s' % (num_nodes, bwc_version))
    unicast_addresses = ','.join(['127.0.0.1:%s' % (tcp_port+x) for x in range(0, num_nodes)])
    for id in range(0, num_nodes):
      nodes.append(start_node(bwc_version, data_dir, node_dir, unicast_addresses, tcp_port+id, http_port+id))
    es = new_es_instance(num_nodes, http_port)
    es.indices.delete(index='test_index', ignore=404)
    num_shards = random.randint(1, 10)
    num_replicas = random.randint(0, 1)
    logging.info('Create index with [%s] shards and [%s] replicas' % (num_shards, num_replicas))
    es.indices.create(index='test_index', body={
        # TODO(simonw): can we do more here in terms of randomization - seems hard due to all the different version
        'settings': {
            'number_of_shards': num_shards,
            'number_of_replicas': num_replicas
        }
    })
    logging.info('Nodes joined, waiting for green status')
    health = es.cluster.health(wait_for_status='green', wait_for_relocating_shards=0)
    assert_health(health, num_shards, num_replicas)
    num_docs = random.randint(10, 100)
    index_documents(es, 'test_index', 'test_type', num_docs)
    logging.info('Run basic asserts before full cluster restart')
    run_basic_asserts(es, 'test_index', 'test_type', num_docs)
    logging.info('kill bwc nodes -- prepare upgrade')
    for node in nodes:
      node.terminate()
    # now upgrade the nodes and rerun the checks
    tcp_port = tcp_port + len(nodes) # bump up port to make sure we can claim them
    http_port = http_port + len(nodes)
    logging.info('Full Cluster restart starts upgrading to version [elasticsearch-%s] es.http.port [%s] es.tcp.port [%s]'
                 % (current_version, http_port, tcp_port))
    nodes = []
    unicast_addresses = ','.join(['127.0.0.1:%s' % (tcp_port+x) for x in range(0, num_nodes)])
    for id in range(0, num_nodes+1): # one more to trigger relocation
      nodes.append(start_node(current_version, data_dir, node_dir, unicast_addresses, tcp_port+id, http_port+id))
    es = new_es_instance(num_nodes+1, http_port)
    logging.info('Nodes joined, waiting for green status')
    health = es.cluster.health(wait_for_status='green', wait_for_relocating_shards=0)
    assert_health(health, num_shards, num_replicas)
    run_basic_asserts(es, 'test_index', 'test_type', num_docs)
    # by running the indexing again we try to catch possible mapping problems after the upgrade
    index_documents(es, 'test_index', 'test_type', num_docs)
    run_basic_asserts(es, 'test_index', 'test_type', num_docs)
    logging.info("[SUCCESS] - all test passed upgrading from version [%s] to version [%s]" % (bwc_version, current_version))
  finally:
    for node in nodes:
      node.terminate()
    time.sleep(1) # wait a second until removing the data dirs to give the nodes a chance to shutdown
    shutil.rmtree(data_dir) # remove the temp data dir
 if __name__ == '__main__':
  logging.basicConfig(format='[%(levelname)s] [%(asctime)s] %(message)s', level=logging.INFO,
                      datefmt='%Y-%m-%d %I:%M:%S %p')
  logging.getLogger('elasticsearch').setLevel(logging.ERROR)
  logging.getLogger('urllib3').setLevel(logging.WARN)
  parser = argparse.ArgumentParser(description='Tests Full Cluster Restarts across major version')
  parser.add_argument('--version.backwards', '-b', dest='backwards_version', metavar='V',
                      help='The elasticsearch version to upgrade from')
  parser.add_argument('--version.current', '-c', dest='current_version', metavar='V',
                      help='The elasticsearch version to upgrade to')
  parser.add_argument('--seed', '-s', dest='seed', metavar='N', type=int,
                      help='The random seed to use')
  parser.add_argument('--backwards.dir', '-d', dest='bwc_directory', default='backwards', metavar='dir',
                      help='The directory to the backwards compatibility sources')
  parser.add_argument('--tcp.port', '-p', dest='tcp_port', default=9300, metavar='port', type=int,
                      help='The port to use as the minimum port for TCP communication')
  parser.add_argument('--http.port', '-t', dest='http_port', default=9200, metavar='port', type=int,
                      help='The port to use as the minimum port for HTTP communication')
  parser.set_defaults(bwc_directory='backwards')
  parser.set_defaults(seed=int(time.time()))
  args = parser.parse_args()
  node_dir = args.bwc_directory
  current_version = args.current_version
  bwc_version = args.backwards_version
  seed = args.seed
  random.seed(seed)
  bwc_version, current_version = pick_random_upgrade_version(node_dir, bwc_version, current_version)
  tcp_port = args.tcp_port
  http_port = args.http_port
  try:
    full_cluster_restart(node_dir, current_version, bwc_version, tcp_port, http_port)
  except:
    logging.warn('REPRODUCE WITH: \n\t`python %s --version.backwards %s --version.current %s --seed %s --tcp.port %s --http.port %s`'
                   % (sys.argv[0], bwc_version, current_version, seed, tcp_port, http_port))
    raise
--- a/docs/community/clients.asciidoc
+++ b/docs/community/clients.asciidoc
@ -70,7 +70,7 @@ See the {client}/php-api/current/index.html[official Elasticsearch PHP client].
 * https://github.com/searchbox-io/Jest[Jest]:
  Java Rest client.
-* There is of course the [native ES Java client](http://www.elasticsearch.org/guide/en/elasticsearch/client/java-api/current/index.html)
+* There is of course the http://www.elasticsearch.org/guide/en/elasticsearch/client/java-api/current/index.html[native ES Java client]
 [[community-javascript]]
 === JavaScript
@ -90,14 +90,13 @@ See the {client}/javascript-api/current/index.html[official Elasticsearch JavaSc
 [[community-dotnet]]
-=== .Net
+=== .NET
 See the {client}/net-api/current/index.html[official Elasticsearch .NET client].
 * https://github.com/Yegoroff/PlainElastic.Net[PlainElastic.Net]:
  .NET client.
 * https://github.com/Mpdreamz/NEST[NEST]:
  .NET client.
 * https://github.com/medcl/ElasticSearch.Net[ElasticSearch.NET]:
  .NET client.
--- a/docs/community/frontends.asciidoc
+++ b/docs/community/frontends.asciidoc
@ -1,9 +1,6 @@
 [[front-ends]]
 == Front Ends
 * https://chrome.google.com/webstore/detail/sense/doinijnbnggojdlcjifpdckfokbbfpbo[Sense]:
  Chrome curl-like plugin for running requests against an Elasticsearch node
 * https://github.com/mobz/elasticsearch-head[elasticsearch-head]: 
  A web front end for an Elasticsearch cluster.
@ -15,3 +12,6 @@
 * http://elastichammer.exploringelasticsearch.com/[Hammer]: 
  Web front-end for elasticsearch
 * https://github.com/romansanchez/Calaca[Calaca]: 
  Simple search client for Elasticsearch
--- a/docs/community/integrations.asciidoc
+++ b/docs/community/integrations.asciidoc
@ -35,8 +35,8 @@
 * https://drupal.org/project/elasticsearch_connector[Drupal]:
  Drupal Elasticsearch integration (1.0.0 and later).
-* http://drupal.org/project/elasticsearch[Drupal]:
+* http://drupal.org/project/search_api_elasticsearch[Drupal]:
-  Drupal Elasticsearch integration (0.90 and earlier).
+  Drupal Elasticsearch integration via Search API (1.0.0 and earlier).
 * https://github.com/refuge/couch_es[couch_es]:
  elasticsearch helper for couchdb based products (apache couchdb, bigcouch & refuge)
@ -88,3 +88,7 @@
 * https://github.com/twitter/storehaus[Twitter Storehaus]:
  Thin asynchronous scala client for storehaus.
 * https://doc.tiki.org/Elasticsearch[Tiki Wiki CMS Groupware]
  Tiki has native support for Elasticsearch. This provides faster & better search (facets, etc), along with some Natural Language Processing features (ex.: More like this)
--- a/docs/community/misc.asciidoc
+++ b/docs/community/misc.asciidoc
@ -7,6 +7,9 @@
 * http://github.com/elasticsearch/cookbook-elasticsearch[Chef]:
  Chef cookbook for Elasticsearch
 * https://github.com/medcl/salt-elasticsearch[SaltStack]:
  SaltStack Module for Elasticsearch
 * http://www.github.com/neogenix/daikon[daikon]:
  Daikon Elasticsearch CLI
--- a/docs/java-api/client.asciidoc
+++ b/docs/java-api/client.asciidoc
@ -54,12 +54,12 @@ different clusters by simply setting the `cluster.name` setting, or
 explicitly using the `clusterName` method on the builder.
 You can define `cluster.name` in the `/src/main/resources/elasticsearch.yml`
-dir in your project. As long as `elasticsearch.yml` is present in the
+file in your project. As long as `elasticsearch.yml` is present in the
 classpath, it will be used when you start your node.
-[source,java]
+[source,yaml]
 --------------------------------------------------
-cluster.name=yourclustername
+cluster.name: yourclustername
 --------------------------------------------------
 Or in Java:
--- a/docs/java-api/index_.asciidoc
+++ b/docs/java-api/index_.asciidoc
@ -8,7 +8,7 @@ index and make it searchable.
 [[generate]]
 === Generate JSON document
-There are different way of generating a JSON document:
+There are several different ways of generating a JSON document:
 * Manually (aka do it yourself) using native `byte[]` or as a `String`
--- a/docs/java-api/indexed-scripts.asciidoc
+++ b/docs/java-api/indexed-scripts.asciidoc
@ -0,0 +1,36 @@
 [[indexed-scripts]]
 == Indexed Scripts API
 The indexed script API allows one to interact with scripts and templates
 stored in an elasticsearch index. It can be used to create, update, get, 
 and delete indexed scripts and templates.
 [source,java]
 --------------------------------------------------
 PutIndexedScriptResponse = client.preparePutIndexedScript()
 			 .setScriptLang("groovy")		
 			 .setId("script1") 
 			 .setSource("_score * doc['my_numeric_field'].value")
 			 .execute()
 			 .actionGet();
 GetIndexedScriptResponse = client.prepareGetIndexedScript()
 			    .setScriptLang("groovy")
 			    .setId("script1")
 			    .execute()
 			    .actionGet();
 DeleteIndexedScriptResponse = client.prepareDeleteIndexedScript()
 			    .setScriptLang("groovy")
 			    .setId("script1")
 			    .execute()
 			    .actionGet();
 --------------------------------------------------
 To store templates simply use "mustache" for the scriptLang.
 === Script Language
 The API allows one to set the language of the indexed script being 
 interacted with. If one is not provided the default scripting language
 will be used.
--- a/docs/reference/analysis/analyzers/lang-analyzer.asciidoc
+++ b/docs/reference/analysis/analyzers/lang-analyzer.asciidoc
@ -23,30 +23,53 @@ following types are supported:
 <<hindi-analyzer,`hindi`>>,
 <<hungarian-analyzer,`hungarian`>>,
 <<indonesian-analyzer,`indonesian`>>,
 <<irish-analyzer,`irish`>>,
 <<italian-analyzer,`italian`>>,
 <<norwegian-analyzer,`norwegian`>>,
 <<persian-analyzer,`persian`>>,
 <<portuguese-analyzer,`portuguese`>>,
 <<romanian-analyzer,`romanian`>>,
 <<russian-analyzer,`russian`>>,
 <<sorani-analyzer,`sorani`>>,
 <<spanish-analyzer,`spanish`>>,
 <<swedish-analyzer,`swedish`>>,
 <<turkish-analyzer,`turkish`>>,
 <<thai-analyzer,`thai`>>.
 ==== Configuring language analyzers
 ===== Stopwords
 All analyzers support setting custom `stopwords` either internally in
 the config, or by using an external stopwords file by setting
 `stopwords_path`. Check <<analysis-stop-analyzer,Stop Analyzer>> for
 more details.
 ===== Excluding words from stemming
 The `stem_exclusion` parameter allows you to specify an array
 of lowercase words that should not be stemmed.  Internally, this
 functionality is implemented by adding the
 <<analysis-keyword-marker-tokenfilter,`keyword_marker` token filter>>
 with the `keywords` set to the value of the `stem_exclusion` parameter.
 The following analyzers support setting custom `stem_exclusion` list:
 `arabic`, `armenian`, `basque`, `catalan`, `bulgarian`, `catalan`,
 `czech`, `finnish`, `dutch`, `english`, `finnish`, `french`, `galician`,
-`german`, `hindi`, `hungarian`, `indonesian`, `italian`, `norwegian`,
+`german`, `irish`, `hindi`, `hungarian`, `indonesian`, `italian`, `norwegian`,
-`portuguese`, `romanian`, `russian`, `spanish`, `swedish`, `turkish`.
+`portuguese`, `romanian`, `russian`, `sorani`, `spanish`, `swedish`, `turkish`.
 ==== Reimplementing language analyzers
 The built-in language analyzers can be reimplemented as `custom` analyzers
 (as described below) in order to customize their behaviour.
 NOTE: If you do not intend to exclude words from being stemmed (the
 equivalent of the `stem_exclusion` parameter above), then you should remove
 the `keyword_marker` token filter from the custom analyzer configuration.
 [[arabic-analyzer]]
-==== `arabic` analyzer
+===== `arabic` analyzer
 The `arabic` analyzer could be reimplemented as a `custom` analyzer as follows:
@ -87,11 +110,11 @@ The `arabic` analyzer could be reimplemented as a `custom` analyzer as follows:
 ----------------------------------------------------
 <1> The default stopwords can be overridden with the `stopwords`
    or `stopwords_path` parameters.
-<2> Words can be excluded from stemming with the `stem_exclusion`
+<2> This filter should be removed unless there are words which should
-    parameter.
+    be excluded from stemming.
 [[armenian-analyzer]]
-==== `armenian` analyzer
+===== `armenian` analyzer
 The `armenian` analyzer could be reimplemented as a `custom` analyzer as follows:
@ -131,11 +154,11 @@ The `armenian` analyzer could be reimplemented as a `custom` analyzer as follows
 ----------------------------------------------------
 <1> The default stopwords can be overridden with the `stopwords`
    or `stopwords_path` parameters.
-<2> Words can be excluded from stemming with the `stem_exclusion`
+<2> This filter should be removed unless there are words which should
-    parameter.
+    be excluded from stemming.
 [[basque-analyzer]]
-==== `basque` analyzer
+===== `basque` analyzer
 The `basque` analyzer could be reimplemented as a `custom` analyzer as follows:
@ -175,11 +198,11 @@ The `basque` analyzer could be reimplemented as a `custom` analyzer as follows:
 ----------------------------------------------------
 <1> The default stopwords can be overridden with the `stopwords`
    or `stopwords_path` parameters.
-<2> Words can be excluded from stemming with the `stem_exclusion`
+<2> This filter should be removed unless there are words which should
-    parameter.
+    be excluded from stemming.
 [[brazilian-analyzer]]
-==== `brazilian` analyzer
+===== `brazilian` analyzer
 The `brazilian` analyzer could be reimplemented as a `custom` analyzer as follows:
@ -219,11 +242,11 @@ The `brazilian` analyzer could be reimplemented as a `custom` analyzer as follow
 ----------------------------------------------------
 <1> The default stopwords can be overridden with the `stopwords`
    or `stopwords_path` parameters.
-<2> Words can be excluded from stemming with the `stem_exclusion`
+<2> This filter should be removed unless there are words which should
-    parameter.
+    be excluded from stemming.
 [[bulgarian-analyzer]]
-==== `bulgarian` analyzer
+===== `bulgarian` analyzer
 The `bulgarian` analyzer could be reimplemented as a `custom` analyzer as follows:
@ -263,11 +286,11 @@ The `bulgarian` analyzer could be reimplemented as a `custom` analyzer as follow
 ----------------------------------------------------
 <1> The default stopwords can be overridden with the `stopwords`
    or `stopwords_path` parameters.
-<2> Words can be excluded from stemming with the `stem_exclusion`
+<2> This filter should be removed unless there are words which should
-    parameter.
+    be excluded from stemming.
 [[catalan-analyzer]]
-==== `catalan` analyzer
+===== `catalan` analyzer
 The `catalan` analyzer could be reimplemented as a `custom` analyzer as follows:
@ -312,11 +335,11 @@ The `catalan` analyzer could be reimplemented as a `custom` analyzer as follows:
 ----------------------------------------------------
 <1> The default stopwords can be overridden with the `stopwords`
    or `stopwords_path` parameters.
-<2> Words can be excluded from stemming with the `stem_exclusion`
+<2> This filter should be removed unless there are words which should
-    parameter.
+    be excluded from stemming.
 [[chinese-analyzer]]
-==== `chinese` analyzer
+===== `chinese` analyzer
 The `chinese` analyzer cannot be reimplemented as a `custom` analyzer
 because it depends on the ChineseTokenizer and ChineseFilter classes,
@ -325,7 +348,7 @@ deprecated in Lucene 4 and the `chinese` analyzer will be replaced
 with the <<analysis-standard-analyzer>> in Lucene 5.
 [[cjk-analyzer]]
-==== `cjk` analyzer
+===== `cjk` analyzer
 The `cjk` analyzer could be reimplemented as a `custom` analyzer as follows:
@ -359,7 +382,7 @@ The `cjk` analyzer could be reimplemented as a `custom` analyzer as follows:
    or `stopwords_path` parameters.
 [[czech-analyzer]]
-==== `czech` analyzer
+===== `czech` analyzer
 The `czech` analyzer could be reimplemented as a `custom` analyzer as follows:
@ -399,11 +422,11 @@ The `czech` analyzer could be reimplemented as a `custom` analyzer as follows:
 ----------------------------------------------------
 <1> The default stopwords can be overridden with the `stopwords`
    or `stopwords_path` parameters.
-<2> Words can be excluded from stemming with the `stem_exclusion`
+<2> This filter should be removed unless there are words which should
-    parameter.
+    be excluded from stemming.
 [[danish-analyzer]]
-==== `danish` analyzer
+===== `danish` analyzer
 The `danish` analyzer could be reimplemented as a `custom` analyzer as follows:
@ -443,11 +466,11 @@ The `danish` analyzer could be reimplemented as a `custom` analyzer as follows:
 ----------------------------------------------------
 <1> The default stopwords can be overridden with the `stopwords`
    or `stopwords_path` parameters.
-<2> Words can be excluded from stemming with the `stem_exclusion`
+<2> This filter should be removed unless there are words which should
-    parameter.
+    be excluded from stemming.
 [[dutch-analyzer]]
-==== `dutch` analyzer
+===== `dutch` analyzer
 The `dutch` analyzer could be reimplemented as a `custom` analyzer as follows:
@ -497,11 +520,11 @@ The `dutch` analyzer could be reimplemented as a `custom` analyzer as follows:
 ----------------------------------------------------
 <1> The default stopwords can be overridden with the `stopwords`
    or `stopwords_path` parameters.
-<2> Words can be excluded from stemming with the `stem_exclusion`
+<2> This filter should be removed unless there are words which should
-    parameter.
+    be excluded from stemming.
 [[english-analyzer]]
-==== `english` analyzer
+===== `english` analyzer
 The `english` analyzer could be reimplemented as a `custom` analyzer as follows:
@ -546,11 +569,11 @@ The `english` analyzer could be reimplemented as a `custom` analyzer as follows:
 ----------------------------------------------------
 <1> The default stopwords can be overridden with the `stopwords`
    or `stopwords_path` parameters.
-<2> Words can be excluded from stemming with the `stem_exclusion`
+<2> This filter should be removed unless there are words which should
-    parameter.
+    be excluded from stemming.
 [[finnish-analyzer]]
-==== `finnish` analyzer
+===== `finnish` analyzer
 The `finnish` analyzer could be reimplemented as a `custom` analyzer as follows:
@ -590,11 +613,11 @@ The `finnish` analyzer could be reimplemented as a `custom` analyzer as follows:
 ----------------------------------------------------
 <1> The default stopwords can be overridden with the `stopwords`
    or `stopwords_path` parameters.
-<2> Words can be excluded from stemming with the `stem_exclusion`
+<2> This filter should be removed unless there are words which should
-    parameter.
+    be excluded from stemming.
 [[french-analyzer]]
-==== `french` analyzer
+===== `french` analyzer
 The `french` analyzer could be reimplemented as a `custom` analyzer as follows:
@ -642,11 +665,11 @@ The `french` analyzer could be reimplemented as a `custom` analyzer as follows:
 ----------------------------------------------------
 <1> The default stopwords can be overridden with the `stopwords`
    or `stopwords_path` parameters.
-<2> Words can be excluded from stemming with the `stem_exclusion`
+<2> This filter should be removed unless there are words which should
-    parameter.
+    be excluded from stemming.
 [[galician-analyzer]]
-==== `galician` analyzer
+===== `galician` analyzer
 The `galician` analyzer could be reimplemented as a `custom` analyzer as follows:
@ -686,11 +709,11 @@ The `galician` analyzer could be reimplemented as a `custom` analyzer as follows
 ----------------------------------------------------
 <1> The default stopwords can be overridden with the `stopwords`
    or `stopwords_path` parameters.
-<2> Words can be excluded from stemming with the `stem_exclusion`
+<2> This filter should be removed unless there are words which should
-    parameter.
+    be excluded from stemming.
 [[german-analyzer]]
-==== `german` analyzer
+===== `german` analyzer
 The `german` analyzer could be reimplemented as a `custom` analyzer as follows:
@ -720,7 +743,7 @@ The `german` analyzer could be reimplemented as a `custom` analyzer as follows:
            "lowercase",
            "german_stop",
            "german_keywords",
-            "ascii_folding", <3>
+            "german_normalization",
            "german_stemmer"
          ]
        }
@ -731,14 +754,11 @@ The `german` analyzer could be reimplemented as a `custom` analyzer as follows:
 ----------------------------------------------------
 <1> The default stopwords can be overridden with the `stopwords`
    or `stopwords_path` parameters.
-<2> Words can be excluded from stemming with the `stem_exclusion`
+<2> This filter should be removed unless there are words which should
-    parameter.
+    be excluded from stemming.
 <3> The `german` analyzer actually uses the GermanNormalizationFilter,
    which isn't exposed in Elasticsearch.  The `ascii_folding` filter
    does a similar job but is more extensive.
 [[greek-analyzer]]
-==== `greek` analyzer
+===== `greek` analyzer
 The `greek` analyzer could be reimplemented as a `custom` analyzer as follows:
@ -752,6 +772,10 @@ The `greek` analyzer could be reimplemented as a `custom` analyzer as follows:
          "type":       "stop",
          "stopwords":  "_greek_" <1>
        },
        "greek_lowercase": {
          "type":       "lowercase",
          "language":   "greek"
        },
        "greek_keywords": {
          "type":       "keyword_marker",
          "keywords":   [] <2>
@ -765,7 +789,7 @@ The `greek` analyzer could be reimplemented as a `custom` analyzer as follows:
        "greek": {
          "tokenizer":  "standard",
          "filter": [
-            "lowercase",
+            "greek_lowercase",
            "greek_stop",
            "greek_keywords",
            "greek_stemmer"
@ -778,18 +802,57 @@ The `greek` analyzer could be reimplemented as a `custom` analyzer as follows:
 ----------------------------------------------------
 <1> The default stopwords can be overridden with the `stopwords`
    or `stopwords_path` parameters.
-<2> Words can be excluded from stemming with the `stem_exclusion`
+<2> This filter should be removed unless there are words which should
-    parameter.
+    be excluded from stemming.
 [[hindi-analyzer]]
-==== `hindi` analyzer
+===== `hindi` analyzer
-The `hindi` analyzer cannot currently be implemented as a `custom` analyzer
+The `hindi` analyzer could be reimplemented as a `custom` analyzer as follows:
-as it depends on the IndicNormalizationFilter and HindiNormalizationFilter
+
-which are not yet exposed by Elasticsearch. Instead, see the <<analysis-icu-plugin>>.
+[source,js]
 ----------------------------------------------------
 {
  "settings": {
    "analysis": {
      "filter": {
        "hindi_stop": {
          "type":       "stop",
          "stopwords":  "_hindi_" <1>
        },
        "hindi_keywords": {
          "type":       "keyword_marker",
          "keywords":   [] <2>
        },
        "hindi_stemmer": {
          "type":       "stemmer",
          "language":   "hindi"
        }
      },
      "analyzer": {
        "hindi": {
          "tokenizer":  "standard",
          "filter": [
            "lowercase",
            "indic_normalization",
            "hindi_normalization",
            "hindi_stop",
            "hindi_keywords",
            "hindi_stemmer"
          ]
        }
      }
    }
  }
 }
 ----------------------------------------------------
 <1> The default stopwords can be overridden with the `stopwords`
    or `stopwords_path` parameters.
 <2> This filter should be removed unless there are words which should
    be excluded from stemming.
 [[hungarian-analyzer]]
-==== `hungarian` analyzer
+===== `hungarian` analyzer
 The `hungarian` analyzer could be reimplemented as a `custom` analyzer as follows:
@ -829,12 +892,12 @@ The `hungarian` analyzer could be reimplemented as a `custom` analyzer as follow
 ----------------------------------------------------
 <1> The default stopwords can be overridden with the `stopwords`
    or `stopwords_path` parameters.
-<2> Words can be excluded from stemming with the `stem_exclusion`
+<2> This filter should be removed unless there are words which should
-    parameter.
+    be excluded from stemming.
 [[indonesian-analyzer]]
-==== `indonesian` analyzer
+===== `indonesian` analyzer
 The `indonesian` analyzer could be reimplemented as a `custom` analyzer as follows:
@ -874,11 +937,64 @@ The `indonesian` analyzer could be reimplemented as a `custom` analyzer as follo
 ----------------------------------------------------
 <1> The default stopwords can be overridden with the `stopwords`
    or `stopwords_path` parameters.
-<2> Words can be excluded from stemming with the `stem_exclusion`
+<2> This filter should be removed unless there are words which should
-    parameter.
+    be excluded from stemming.
 [[irish-analyzer]]
 ===== `irish` analyzer
 The `irish` analyzer could be reimplemented as a `custom` analyzer as follows:
 [source,js]
 ----------------------------------------------------
 {
  "settings": {
    "analysis": {
      "filter": {
        "irish_elision": {
          "type":       "elision",
          "articles": [ "h", "n", "t" ]
        },
        "irish_stop": {
          "type":       "stop",
          "stopwords":  "_irish_" <1>
        },
        "irish_lowercase": {
          "type":       "lowercase",
          "language":   "irish"
        },
        "irish_keywords": {
          "type":       "keyword_marker",
          "keywords":   [] <2>
        },
        "irish_stemmer": {
          "type":       "stemmer",
          "language":   "irish"
        }
      },
      "analyzer": {
        "irish": {
          "tokenizer":  "standard",
          "filter": [
            "irish_stop",
            "irish_elision",
            "irish_lowercase",
            "irish_keywords",
            "irish_stemmer"
          ]
        }
      }
    }
  }
 }
 ----------------------------------------------------
 <1> The default stopwords can be overridden with the `stopwords`
    or `stopwords_path` parameters.
 <2> This filter should be removed unless there are words which should
    be excluded from stemming.
 [[italian-analyzer]]
-==== `italian` analyzer
+===== `italian` analyzer
 The `italian` analyzer could be reimplemented as a `custom` analyzer as follows:
@ -928,11 +1044,11 @@ The `italian` analyzer could be reimplemented as a `custom` analyzer as follows:
 ----------------------------------------------------
 <1> The default stopwords can be overridden with the `stopwords`
    or `stopwords_path` parameters.
-<2> Words can be excluded from stemming with the `stem_exclusion`
+<2> This filter should be removed unless there are words which should
-    parameter.
+    be excluded from stemming.
 [[norwegian-analyzer]]
-==== `norwegian` analyzer
+===== `norwegian` analyzer
 The `norwegian` analyzer could be reimplemented as a `custom` analyzer as follows:
@ -972,11 +1088,11 @@ The `norwegian` analyzer could be reimplemented as a `custom` analyzer as follow
 ----------------------------------------------------
 <1> The default stopwords can be overridden with the `stopwords`
    or `stopwords_path` parameters.
-<2> Words can be excluded from stemming with the `stem_exclusion`
+<2> This filter should be removed unless there are words which should
-    parameter.
+    be excluded from stemming.
 [[persian-analyzer]]
-==== `persian` analyzer
+===== `persian` analyzer
 The `persian` analyzer could be reimplemented as a `custom` analyzer as follows:
@ -1018,7 +1134,7 @@ The `persian` analyzer could be reimplemented as a `custom` analyzer as follows:
    or `stopwords_path` parameters.
 [[portuguese-analyzer]]
-==== `portuguese` analyzer
+===== `portuguese` analyzer
 The `portuguese` analyzer could be reimplemented as a `custom` analyzer as follows:
@ -1058,11 +1174,11 @@ The `portuguese` analyzer could be reimplemented as a `custom` analyzer as follo
 ----------------------------------------------------
 <1> The default stopwords can be overridden with the `stopwords`
    or `stopwords_path` parameters.
-<2> Words can be excluded from stemming with the `stem_exclusion`
+<2> This filter should be removed unless there are words which should
-    parameter.
+    be excluded from stemming.
 [[romanian-analyzer]]
-==== `romanian` analyzer
+===== `romanian` analyzer
 The `romanian` analyzer could be reimplemented as a `custom` analyzer as follows:
@ -1102,12 +1218,12 @@ The `romanian` analyzer could be reimplemented as a `custom` analyzer as follows
 ----------------------------------------------------
 <1> The default stopwords can be overridden with the `stopwords`
    or `stopwords_path` parameters.
-<2> Words can be excluded from stemming with the `stem_exclusion`
+<2> This filter should be removed unless there are words which should
-    parameter.
+    be excluded from stemming.
 [[russian-analyzer]]
-==== `russian` analyzer
+===== `russian` analyzer
 The `russian` analyzer could be reimplemented as a `custom` analyzer as follows:
@ -1147,11 +1263,56 @@ The `russian` analyzer could be reimplemented as a `custom` analyzer as follows:
 ----------------------------------------------------
 <1> The default stopwords can be overridden with the `stopwords`
    or `stopwords_path` parameters.
-<2> Words can be excluded from stemming with the `stem_exclusion`
+<2> This filter should be removed unless there are words which should
-    parameter.
+    be excluded from stemming.
 [[sorani-analyzer]]
 ===== `sorani` analyzer
 The `sorani` analyzer could be reimplemented as a `custom` analyzer as follows:
 [source,js]
 ----------------------------------------------------
 {
  "settings": {
    "analysis": {
      "filter": {
        "sorani_stop": {
          "type":       "stop",
          "stopwords":  "_sorani_" <1>
        },
        "sorani_keywords": {
          "type":       "keyword_marker",
          "keywords":   [] <2>
        },
        "sorani_stemmer": {
          "type":       "stemmer",
          "language":   "sorani"
        }
      },
      "analyzer": {
        "sorani": {
          "tokenizer":  "standard",
          "filter": [
            "sorani_normalization",
            "lowercase",
            "sorani_stop",
            "sorani_keywords",
            "sorani_stemmer"
          ]
        }
      }
    }
  }
 }
 ----------------------------------------------------
 <1> The default stopwords can be overridden with the `stopwords`
    or `stopwords_path` parameters.
 <2> This filter should be removed unless there are words which should
    be excluded from stemming.
 [[spanish-analyzer]]
-==== `spanish` analyzer
+===== `spanish` analyzer
 The `spanish` analyzer could be reimplemented as a `custom` analyzer as follows:
@ -1191,11 +1352,11 @@ The `spanish` analyzer could be reimplemented as a `custom` analyzer as follows:
 ----------------------------------------------------
 <1> The default stopwords can be overridden with the `stopwords`
    or `stopwords_path` parameters.
-<2> Words can be excluded from stemming with the `stem_exclusion`
+<2> This filter should be removed unless there are words which should
-    parameter.
+    be excluded from stemming.
 [[swedish-analyzer]]
-==== `swedish` analyzer
+===== `swedish` analyzer
 The `swedish` analyzer could be reimplemented as a `custom` analyzer as follows:
@ -1235,20 +1396,86 @@ The `swedish` analyzer could be reimplemented as a `custom` analyzer as follows:
 ----------------------------------------------------
 <1> The default stopwords can be overridden with the `stopwords`
    or `stopwords_path` parameters.
-<2> Words can be excluded from stemming with the `stem_exclusion`
+<2> This filter should be removed unless there are words which should
-    parameter.
+    be excluded from stemming.
 [[turkish-analyzer]]
-==== `turkish` analyzer
+===== `turkish` analyzer
-The `turkish` analyzer cannot currently be implemented as a `custom` analyzer
+The `turkish` analyzer could be reimplemented as a `custom` analyzer as follows:
-because it depends on the TurkishLowerCaseFilter and the ApostropheFilter
+
-which are not exposed in Elasticsearch. Instead, see the <<analysis-icu-plugin>>.
+[source,js]
 ----------------------------------------------------
 {
  "settings": {
    "analysis": {
      "filter": {
        "turkish_stop": {
          "type":       "stop",
          "stopwords":  "_turkish_" <1>
        },
        "turkish_lowercase": {
          "type":       "lowercase",
          "language":   "turkish"
        },
        "turkish_keywords": {
          "type":       "keyword_marker",
          "keywords":   [] <2>
        },
        "turkish_stemmer": {
          "type":       "stemmer",
          "language":   "turkish"
        }
      },
      "analyzer": {
        "turkish": {
          "tokenizer":  "standard",
          "filter": [
            "apostrophe",
            "turkish_lowercase",
            "turkish_stop",
            "turkish_keywords",
            "turkish_stemmer"
          ]
        }
      }
    }
  }
 }
 ----------------------------------------------------
 <1> The default stopwords can be overridden with the `stopwords`
    or `stopwords_path` parameters.
 <2> This filter should be removed unless there are words which should
    be excluded from stemming.
 [[thai-analyzer]]
-==== `thai` analyzer
+===== `thai` analyzer
-The `thai` analyzer cannot currently be implemented as a `custom` analyzer
+The `thai` analyzer could be reimplemented as a `custom` analyzer as follows:
 because it depends on the ThaiTokenizer which is not exposed in Elasticsearch.
 Instead, see the <<analysis-icu-plugin>>.
 [source,js]
 ----------------------------------------------------
 {
  "settings": {
    "analysis": {
      "filter": {
        "thai_stop": {
          "type":       "stop",
          "stopwords":  "_thai_" <1>
        }
      },
      "analyzer": {
        "thai": {
          "tokenizer":  "thai",
          "filter": [
            "lowercase",
            "thai_stop"
          ]
        }
      }
    }
  }
 }
 ----------------------------------------------------
 <1> The default stopwords can be overridden with the `stopwords`
    or `stopwords_path` parameters.
--- a/docs/reference/analysis/icu-plugin.asciidoc
+++ b/docs/reference/analysis/icu-plugin.asciidoc
@ -218,3 +218,29 @@ Breaks text into words according to UAX #29: Unicode Text Segmentation ((http://
 }
 --------------------------------------------------
 [float]
 === ICU Normalization CharFilter
 Normalizes characters as explained http://userguide.icu-project.org/transforms/normalization[here].
 It registers itself by default under `icu_normalizer` or `icuNormalizer` using the default settings.
 Allows for the name parameter to be provided which can include the following values: `nfc`, `nfkc`, and `nfkc_cf`.
 Allows for the mode parameter to be provided which can include the following values: `compose` and `decompose`.
 Use `decompose` with `nfc` or `nfkc`, to get `nfd` or `nfkd`, respectively.
 Here is a sample settings:
 [source,js]
 --------------------------------------------------
 {
    "index" : {
        "analysis" : {
            "analyzer" : {
                "collation" : {
                    "tokenizer" : "keyword",
                    "char_filter" : ["icu_normalizer"]
                }
            }
        }
    }
 }
 --------------------------------------------------
--- a/docs/reference/analysis/tokenfilters.asciidoc
+++ b/docs/reference/analysis/tokenfilters.asciidoc
@ -78,3 +78,7 @@ include::tokenfilters/cjk-bigram-tokenfilter.asciidoc[]
 include::tokenfilters/delimited-payload-tokenfilter.asciidoc[]
 include::tokenfilters/keep-words-tokenfilter.asciidoc[]
 include::tokenfilters/classic-tokenfilter.asciidoc[]
 include::tokenfilters/apostrophe-tokenfilter.asciidoc[]
--- a/docs/reference/analysis/tokenfilters/apostrophe-tokenfilter.asciidoc
+++ b/docs/reference/analysis/tokenfilters/apostrophe-tokenfilter.asciidoc
@ -0,0 +1,7 @@
 [[analysis-apostrophe-tokenfilter]]
 === Apostrophe Token Filter
 added[1.3.0]
 The `apostrophe` token filter strips all characters after an apostrophe,
 including the apostrophe itself.
--- a/docs/reference/analysis/tokenfilters/classic-tokenfilter.asciidoc
+++ b/docs/reference/analysis/tokenfilters/classic-tokenfilter.asciidoc
@ -0,0 +1,11 @@
 [[analysis-classic-tokenfilter]]
 === Classic Token Filter
 added[1.3.0]
 The `classic` token filter does optional post-processing of
 terms that are generated by the <<analysis-classic-tokenizer,`classic` tokenizer>>.
 This filter removes the english possessive from the end of words, and
 it removes dots from acronyms.
--- a/docs/reference/analysis/tokenfilters/hunspell-tokenfilter.asciidoc
+++ b/docs/reference/analysis/tokenfilters/hunspell-tokenfilter.asciidoc
@ -5,10 +5,10 @@ Basic support for hunspell stemming. Hunspell dictionaries will be
 picked up from a dedicated hunspell directory on the filesystem
 (defaults to `<path.conf>/hunspell`). Each dictionary is expected to
 have its own directory named after its associated locale (language).
-This dictionary directory is expected to hold both the `*.aff` and `*.dic`
+This dictionary directory is expected to hold a single `*.aff` and
-files (all of which will automatically be picked up). For example,
+one or more `*.dic` files (all of which will automatically be picked up).
-assuming the default hunspell location is used, the following directory
+For example, assuming the default hunspell location is used, the
-layout will define the `en_US` dictionary:
+following directory layout will define the `en_US` dictionary:
 [source,js]
 --------------------------------------------------
@ -25,7 +25,7 @@ _elasticsearch.yml_.
 Each dictionary can be configured with one setting:
-`ignore_case`:: 
+`ignore_case`::
    If true, dictionary matching will be case insensitive
    (defaults to `false`)
@ -67,20 +67,20 @@ settings:
 The hunspell token filter accepts four options:
-`locale`:: 
+`locale`::
    A locale for this filter. If this is unset, the `lang` or
    `language` are used instead - so one of these has to be set.
-`dictionary`:: 
+`dictionary`::
    The name of a dictionary. The path to your hunspell
    dictionaries should be configured via
    `indices.analysis.hunspell.dictionary.location` before.
-`dedup`:: 
+`dedup`::
    If only unique terms should be returned, this needs to be
    set to `true`. Defaults to `true`.
-`longest_only`:: 
+`longest_only`::
    If only the longest term should be returned, set this to `true`.
    Defaults to `false`: all possible stems are returned.
@ -88,6 +88,16 @@ NOTE: As opposed to the snowball stemmers (which are algorithm based)
 this is a dictionary lookup based stemmer and therefore the quality of
 the stemming is determined by the quality of the dictionary.
 [float]
 ==== Dictionary loading
 By default, the configured (`indices.analysis.hunspell.dictionary.location`)
 or default Hunspell directory (`config/hunspell/`) is checked for dictionaries
 when the node starts up, and any dictionaries are automatically loaded.
 Dictionary loading can be deferred until they are actually used by setting
 `indices.analysis.hunspell.dictionary.lazy` to `true`in the config file.
 [float]
 ==== References
--- a/docs/reference/analysis/tokenfilters/lowercase-tokenfilter.asciidoc
+++ b/docs/reference/analysis/tokenfilters/lowercase-tokenfilter.asciidoc
@ -4,7 +4,7 @@
 A token filter of type `lowercase` that normalizes token text to lower
 case.
-Lowercase token filter supports Greek and Turkish lowercase token
+Lowercase token filter supports Greek, Irish added[1.3.0], and Turkish lowercase token
 filters through the `language` parameter. Below is a usage example in a
 custom analyzer
--- a/docs/reference/analysis/tokenfilters/normalization-tokenfilter.asciidoc
+++ b/docs/reference/analysis/tokenfilters/normalization-tokenfilter.asciidoc
@ -4,12 +4,33 @@
 There are several token filters available which try to normalize special
 characters of a certain language.
-You can currently choose between `arabic_normalization` and
+[horizontal]
-`persian_normalization` normalization in your token filter
+Arabic::
-configuration. For more information check the
+
-http://lucene.apache.org/core/4_3_1/analyzers-common/org/apache/lucene/analysis/ar/ArabicNormalizer.html[ArabicNormalizer]
+http://lucene.apache.org/core/4_9_0/analyzers-common/org/apache/lucene/analysis/ar/ArabicNormalizer.html[`arabic_normalization`]
-or the
+
-http://lucene.apache.org/core/4_3_1/analyzers-common/org/apache/lucene/analysis/fa/PersianNormalizer.html[PersianNormalizer]
+German::
-documentation.
+
 http://lucene.apache.org/core/4_9_0/analyzers-common/org/apache/lucene/analysis/de/GermanNormalizationFilter.html[`german_normalization`] added[1.3.0]
 Hindi::
 http://lucene.apache.org/core/4_9_0/analyzers-common/org/apache/lucene/analysis/hi/HindiNormalizer.html[`hindi_normalization`] added[1.3.0]
 Indic::
 http://lucene.apache.org/core/4_9_0/analyzers-common/org/apache/lucene/analysis/in/IndicNormalizer.html[`indic_normalization`] added[1.3.0]
 Kurdish (Sorani)::
 http://lucene.apache.org/core/4_9_0/analyzers-common/org/apache/lucene/analysis/ckb/SoraniNormalizer.html[`sorani_normalization`] added[1.3.0]
 Persian::
 http://lucene.apache.org/core/4_9_0/analyzers-common/org/apache/lucene/analysis/fa/PersianNormalizer.html[`persian_normalization`]
 Scandinavian::
 http://lucene.apache.org/core/4_9_0/analyzers-common/org/apache/lucene/analysis/miscellaneous/ScandinavianNormalizationFilter.html[`scandinavian_normalization`] added[1.3.0],
 http://lucene.apache.org/core/4_9_0/analyzers-common/org/apache/lucene/analysis/miscellaneous/ScandinavianFoldingFilter.html[`scandinavian_folding`] added[1.3.0]
 *Note:* These filters are available since `0.90.2`
--- a/docs/reference/analysis/tokenfilters/stemmer-override-tokenfilter.asciidoc
+++ b/docs/reference/analysis/tokenfilters/stemmer-override-tokenfilter.asciidoc
@ -5,7 +5,7 @@ Overrides stemming algorithms, by applying a custom mapping, then
 protecting these terms from being modified by stemmers. Must be placed
 before any stemming filters.
-Rules are separated by "=>"
+Rules are separated by `=>`
 [cols="<,<",options="header",]
 |=======================================================================
--- a/docs/reference/analysis/tokenfilters/stemmer-tokenfilter.asciidoc
+++ b/docs/reference/analysis/tokenfilters/stemmer-tokenfilter.asciidoc
@ -32,7 +32,7 @@ available values (the preferred filters are marked in *bold*):
 [horizontal]
 Arabic::
-http://lucene.apache.org/core/4_3_0/analyzers-common/index.html?org%2Fapache%2Flucene%2Fanalysis%2Far%2FArabicStemmer.html[*`arabic`*]
+http://lucene.apache.org/core/4_9_0/analyzers-common/org/apache/lucene/analysis/ar/ArabicStemmer.html[*`arabic`*]
 Armenian::
@ -44,7 +44,7 @@ http://snowball.tartarus.org/algorithms/basque/stemmer.html[*`basque`*]
 Brazilian Portuguese::
-http://lucene.apache.org/core/4_3_0/analyzers-common/index.html?org%2Fapache%2Flucene%2Fanalysis%2Fbr%2FBrazilianStemmer.html[*`brazilian`*]
+http://lucene.apache.org/core/4_9_0/analyzers-common/org/apache/lucene/analysis/br/BrazilianStemmer.html[*`brazilian`*]
 Bulgarian::
@ -65,15 +65,15 @@ http://snowball.tartarus.org/algorithms/danish/stemmer.html[*`danish`*]
 Dutch::
 http://snowball.tartarus.org/algorithms/dutch/stemmer.html[*`dutch`*],
-http://snowball.tartarus.org/algorithms/kraaij_pohlmann/stemmer.html[`dutch_kp`] coming[1.3.0,Renamed from `kp`]
+http://snowball.tartarus.org/algorithms/kraaij_pohlmann/stemmer.html[`dutch_kp`] added[1.3.0,Renamed from `kp`]
 English::
-http://snowball.tartarus.org/algorithms/porter/stemmer.html[*`english`*] coming[1.3.0,Returns the <<analysis-porterstem-tokenfilter,`porter_stem`>> instead of the <<analysis-snowball-tokenfilter,`english` Snowball token filter>>],
+http://snowball.tartarus.org/algorithms/porter/stemmer.html[*`english`*] added[1.3.0,Returns the <<analysis-porterstem-tokenfilter,`porter_stem`>> instead of the <<analysis-snowball-tokenfilter,`english` Snowball token filter>>],
-http://ciir.cs.umass.edu/pubfiles/ir-35.pdf[`light_english`] coming[1.3.0,Returns the <<analysis-kstem-tokenfilter,`kstem` token filter>>],
+http://ciir.cs.umass.edu/pubfiles/ir-35.pdf[`light_english`] added[1.3.0,Returns the <<analysis-kstem-tokenfilter,`kstem` token filter>>],
 http://www.medialab.tfe.umu.se/courses/mdm0506a/material/fulltext_ID%3D10049387%26PLACEBO%3DIE.pdf[`minimal_english`],
-http://lucene.apache.org/core/4_3_0/analyzers-common/index.html?org%2Fapache%2Flucene%2Fanalysis%2Fen%2FEnglishPossessiveFilter.html[`possessive_english`],
+http://lucene.apache.org/core/4_9_0/analyzers-common/org/apache/lucene/analysis/en/EnglishPossessiveFilter.html[`possessive_english`],
-http://snowball.tartarus.org/algorithms/english/stemmer.html[`porter2`] coming[1.3.0,Returns the <<analysis-snowball-tokenfilter,`english` Snowball token filter>> instead of the <<analysis-snowball-tokenfilter,`porter` Snowball token filter>>],
+http://snowball.tartarus.org/algorithms/english/stemmer.html[`porter2`] added[1.3.0,Returns the <<analysis-snowball-tokenfilter,`english` Snowball token filter>> instead of the <<analysis-snowball-tokenfilter,`porter` Snowball token filter>>],
 http://snowball.tartarus.org/algorithms/lovins/stemmer.html[`lovins`]
 Finnish::
@ -87,6 +87,11 @@ http://snowball.tartarus.org/algorithms/french/stemmer.html[`french`],
 http://dl.acm.org/citation.cfm?id=1141523[*`light_french`*],
 http://dl.acm.org/citation.cfm?id=318984[`minimal_french`]
 Galician::
 http://bvg.udc.es/recursos_lingua/stemming.jsp[*`galician`*] added[1.3.0],
 http://bvg.udc.es/recursos_lingua/stemming.jsp[`minimal_galician`] (Plural step only) added[1.3.0]
 German::
 http://snowball.tartarus.org/algorithms/german/stemmer.html[`german`],
@ -111,27 +116,40 @@ Indonesian::
 http://www.illc.uva.nl/Publications/ResearchReports/MoL-2003-02.text.pdf[*`indonesian`*]
 Irish::
 http://snowball.tartarus.org/otherapps/oregan/intro.html[*`irish`*]
 Italian::
 http://snowball.tartarus.org/algorithms/italian/stemmer.html[`italian`],
 http://www.ercim.eu/publication/ws-proceedings/CLEF2/savoy.pdf[*`light_italian`*]
 Kurdish (Sorani)::
 http://lucene.apache.org/core/4_9_0/analyzers-common/org/apache/lucene/analysis/ckb/SoraniStemmer.html[*`sorani`*] added[1.3.0]
 Latvian::
-http://lucene.apache.org/core/4_3_0/analyzers-common/index.html?org%2Fapache%2Flucene%2Fanalysis%2Flv%2FLatvianStemmer.html[*`latvian`*]
+http://lucene.apache.org/core/4_9_0/analyzers-common/org/apache/lucene/analysis/lv/LatvianStemmer.html[*`latvian`*]
-Norwegian::
+Norwegian (Bokmål)::
 http://snowball.tartarus.org/algorithms/norwegian/stemmer.html[*`norwegian`*],
-http://lucene.apache.org/core/4_3_0/analyzers-common/index.html?org%2Fapache%2Flucene%2Fanalysis%2Fno%2FNorwegianMinimalStemFilter.html[`minimal_norwegian`]
+http://lucene.apache.org/core/4_9_0/analyzers-common/org/apache/lucene/analysis/no/NorwegianLightStemmer.html[*`light_norwegian`*] added[1.3.0],
 http://lucene.apache.org/core/4_9_0/analyzers-common/org/apache/lucene/analysis/no/NorwegianMinimalStemmer.html[`minimal_norwegian`]
 Norwegian (Nynorsk)::
 http://lucene.apache.org/core/4_9_0/analyzers-common/org/apache/lucene/analysis/no/NorwegianLightStemmer.html[*`light_nynorsk`*] added[1.3.0],
 http://lucene.apache.org/core/4_9_0/analyzers-common/org/apache/lucene/analysis/no/NorwegianMinimalStemmer.html[`minimal_nynorsk`] added[1.3.0]
 Portuguese::
 http://snowball.tartarus.org/algorithms/portuguese/stemmer.html[`portuguese`],
 http://dl.acm.org/citation.cfm?id=1141523&dl=ACM&coll=DL&CFID=179095584&CFTOKEN=80067181[*`light_portuguese`*],
 http://www.inf.ufrgs.br/\~buriol/papers/Orengo_CLEF07.pdf[`minimal_portuguese`],
-http://www.inf.ufrgs.br/\~viviane/rslp/index.htm[`portuguese_rslp`] coming[1.3.0]
+http://www.inf.ufrgs.br/\~viviane/rslp/index.htm[`portuguese_rslp`] added[1.3.0]
 Romanian::
--- a/docs/reference/analysis/tokenfilters/stop-tokenfilter.asciidoc
+++ b/docs/reference/analysis/tokenfilters/stop-tokenfilter.asciidoc
@ -7,29 +7,72 @@ streams.
 The following are settings that can be set for a `stop` token filter
 type:
-[cols="<,<",options="header",]
+[horizontal]
-|=======================================================================
+`stopwords`::
 |Setting |Description
 |`stopwords` |A list of stop words to use. Defaults to english stop
 words.
-|`stopwords_path` |A path (either relative to `config` location, or
+    A list of stop words to use. Defaults to `_english_` stop words.
 absolute) to a stopwords file configuration. Each stop word should be in
 its own "line" (separated by a line break). The file must be UTF-8
 encoded.
-|`ignore_case` |Set to `true` to lower case all words first. Defaults to
+`stopwords_path`::
 `false`.
-|`remove_trailing` |Set to `false` in order to not ignore the last term of
+    A path (either relative to `config` location, or absolute) to a stopwords
-a search if it is a stop word. This is very useful for the completion
+    file configuration. Each stop word should be in its own "line" (separated
-suggester as a query like `green a` can be extended to `green apple` even
+    by a line break). The file must be UTF-8 encoded.
 though you remove stop words in general. Defaults to `true`.
 |=======================================================================
-stopwords allow for custom language specific expansion of default
+`ignore_case`::
-stopwords. It follows the `_lang_` notation and supports: arabic,
+
-armenian, basque, brazilian, bulgarian, catalan, czech, danish, dutch,
+    Set to `true` to lower case all words first. Defaults to `false`.
-english, finnish, french, galician, german, greek, hindi, hungarian,
+
-indonesian, italian, norwegian, persian, portuguese, romanian, russian,
+`remove_trailing`::
-spanish, swedish, turkish.
+
    Set to `false` in order to not ignore the last term of a search if it is a
    stop word. This is very useful for the completion suggester as a query
    like `green a` can be extended to `green apple` even though you remove
    stop words in general. Defaults to `true`.
 The `stopwords` parameter accepts either an array of stopwords:
 [source,json]
 ------------------------------------
 PUT /my_index
 {
    "settings": {
        "analysis": {
            "filter": {
                "my_stop": {
                    "type":       "stop",
                    "stopwords": ["and", "is", "the"]
                }
            }
        }
    }
 }
 ------------------------------------
 or a predefined language-specific list:
 [source,json]
 ------------------------------------
 PUT /my_index
 {
    "settings": {
        "analysis": {
            "filter": {
                "my_stop": {
                    "type":       "stop",
                    "stopwords":  "_english_"
                }
            }
        }
    }
 }
 ------------------------------------
 Elasticsearch provides the following predefined list of languages:
 `_arabic_`, `_armenian_`, `_basque_`, `_brazilian_`, `_bulgarian_`,
 `_catalan_`, `_czech_`, `_danish_`, `_dutch_`, `_english_`, `_finnish_`,
 `_french_`, `_galician_`, `_german_`, `_greek_`, `_hindi_`, `_hungarian_`,
 `_indonesian_`, `_italian_`, `_norwegian_`, `_persian_`, `_portuguese_`,
 `_romanian_`, `_russian_`, `_spanish_`, `_swedish_`, `_turkish_`.
 For the empty stopwords list (to disable stopwords) use: `_none_`.
--- a/docs/reference/analysis/tokenizers.asciidoc
+++ b/docs/reference/analysis/tokenizers.asciidoc
@ -28,3 +28,7 @@ include::tokenizers/uaxurlemail-tokenizer.asciidoc[]
 include::tokenizers/pathhierarchy-tokenizer.asciidoc[]
 include::tokenizers/classic-tokenizer.asciidoc[]
 include::tokenizers/thai-tokenizer.asciidoc[]
--- a/docs/reference/analysis/tokenizers/classic-tokenizer.asciidoc
+++ b/docs/reference/analysis/tokenizers/classic-tokenizer.asciidoc
@ -0,0 +1,21 @@
 [[analysis-classic-tokenizer]]
 === Classic Tokenizer
 added[1.3.0]
 A tokenizer of type `classic` providing grammar based tokenizer that is
 a good tokenizer for English language documents. This tokenizer has 
 heuristics for special treatment of acronyms, company names, email addresses,
 and internet host names. However, these rules don't always work, and 
 the tokenizer doesn't work well for most languages other than English.
 The following are settings that can be set for a `classic` tokenizer
 type:
 [cols="<,<",options="header",]
 |=======================================================================
 |Setting |Description
 |`max_token_length` |The maximum token length. If a token is seen that
 exceeds this length then it is discarded. Defaults to `255`.
 |=======================================================================
--- a/docs/reference/analysis/tokenizers/pattern-tokenizer.asciidoc
+++ b/docs/reference/analysis/tokenizers/pattern-tokenizer.asciidoc
@ -7,7 +7,7 @@ via a regular expression. Accepts the following settings:
 [cols="<,<",options="header",]
 |======================================================================
 |Setting |Description
-|`pattern` |The regular expression pattern, defaults to `\\W+`.
+|`pattern` |The regular expression pattern, defaults to `\W+`.
 |`flags` |The regular expression flags.
 |`group` |Which group to extract into tokens. Defaults to `-1` (split).
 |======================================================================
@ -15,15 +15,24 @@ via a regular expression. Accepts the following settings:
 *IMPORTANT*: The regular expression should match the *token separators*,
 not the tokens themselves.
 *********************************************
 Note that you may need to escape `pattern` string literal according to
 your client language rules. For example, in many programming languages
 a string literal for `\W+` pattern is written as `"\\W+"`.
 There is nothing special about `pattern` (you may have to escape other
 string literals as well); escaping `pattern` is common just because it
 often contains characters that should be escaped.
 *********************************************
 `group` set to `-1` (the default) is equivalent to "split". Using group
 >= 0 selects the matching group as the token. For example, if you have:
 ------------------------
-pattern = \\'([^\']+)\\'
+pattern = '([^']+)'
 group   = 0
 input   = aaa 'bbb' 'ccc'
 ------------------------
-the output will be two tokens: 'bbb' and 'ccc' (including the ' marks).
+the output will be two tokens: `'bbb'` and `'ccc'` (including the `'`
-With the same input but using group=1, the output would be: bbb and ccc
+marks). With the same input but using group=1, the output would be:
-(no ' marks).
+`bbb` and `ccc` (no `'` marks).
--- a/docs/reference/analysis/tokenizers/thai-tokenizer.asciidoc
+++ b/docs/reference/analysis/tokenizers/thai-tokenizer.asciidoc
@ -0,0 +1,9 @@
 [[analysis-thai-tokenizer]]
 === Thai Tokenizer
 added[1.3.0]
 A tokenizer of type `thai` that segments Thai text into words. This tokenizer
 uses the built-in Thai segmentation algorithm included with Java to divide
 up Thai text. Text in other languages in general will be treated the same
 as `standard`.
--- a/docs/reference/api-conventions.asciidoc
+++ b/docs/reference/api-conventions.asciidoc
@ -241,8 +241,19 @@ document indexed.
 [float]
 === JSONP
-All REST APIs accept a `callback` parameter resulting in a
+By default JSONP responses are disabled by default. coming[1.3,Previously JSONP was enabled by default]
-http://en.wikipedia.org/wiki/JSONP[JSONP] result.
+
 When enabled, all REST APIs accept a `callback` parameter
 resulting in a http://en.wikipedia.org/wiki/JSONP[JSONP] result. You can enable
 this behavior by adding the following to `config.yaml`:
    http.jsonp.enable: true
 Please note, when enabled, due to the architecture of Elasticsearch, this may pose
 a security risk. Under some circumstances, an attacker may be able to exfiltrate
 data in your Elasticsearch server if they're able to force your browser to make a
 JSONP request on your behalf (e.g. by including a <script> tag on an untrusted site
 with a legitimate query against a local Elasticsearch server).
 [float]
 === Request body in query string
--- a/docs/reference/cat/nodes.asciidoc
+++ b/docs/reference/cat/nodes.asciidoc
@ -3,23 +3,23 @@
 The `nodes` command shows the cluster topology.
-[source,shell]
+["source","sh",subs="attributes,callouts"]
 --------------------------------------------------
 % curl 192.168.56.10:9200/_cat/nodes
-SP4H 4727 192.168.56.30 9300 1.0.1 1.6.0_27 72.1gb 35.4 93.9mb 79 239.1mb 0.45 3.4h d m Boneyard
+SP4H 4727 192.168.56.30 9300 {version} {jdk} 72.1gb 35.4 93.9mb 79 239.1mb 0.45 3.4h d m Boneyard
-_uhJ 5134 192.168.56.10 9300 1.0.1 1.6.0_27 72.1gb 33.3 93.9mb 85 239.1mb 0.06 3.4h d * Athena
+_uhJ 5134 192.168.56.10 9300 {version} {jdk} 72.1gb 33.3 93.9mb 85 239.1mb 0.06 3.4h d * Athena
-HfDp 4562 192.168.56.20 9300 1.0.1 1.6.0_27 72.2gb 74.5 93.9mb 83 239.1mb 0.12 3.4h d m Zarek
+HfDp 4562 192.168.56.20 9300 {version} {jdk} 72.2gb 74.5 93.9mb 83 239.1mb 0.12 3.4h d m Zarek
 --------------------------------------------------
 The first few columns tell you where your nodes live.  For sanity it
 also tells you what version of ES and the JVM each one runs.
-[source,shell]
+["source","sh",subs="attributes,callouts"]
 --------------------------------------------------
 nodeId pid  ip            port version jdk
-u2PZ   4234 192.168.56.30 9300 1.0.1   1.6.0_27
+u2PZ   4234 192.168.56.30 9300 {version}   {jdk}
-URzf   5443 192.168.56.10 9300 1.0.1   1.6.0_27
+URzf   5443 192.168.56.10 9300 {version}   {jdk}
-ActN   3806 192.168.56.20 9300 1.0.1   1.6.0_27
+ActN   3806 192.168.56.20 9300 {version}   {jdk}
 --------------------------------------------------
@ -65,20 +65,20 @@ by default.  To have the headers appear in the output, use verbose
 mode (`v`). The header name will match the supplied value (e.g.,
 `pid` versus `p`).  For example:
-[source,shell]
+["source","sh",subs="attributes,callouts"]
 --------------------------------------------------
 % curl 192.168.56.10:9200/_cat/nodes?v\&h=id,ip,port,v,m
 id   ip            port version m
-pLSN 192.168.56.30 9300 1.0.1   m
+pLSN 192.168.56.30 9300 {version}   m
-k0zy 192.168.56.10 9300 1.0.1   m
+k0zy 192.168.56.10 9300 {version}   m
-6Tyi 192.168.56.20 9300 1.0.1   *
+6Tyi 192.168.56.20 9300 {version}   *
 % curl 192.168.56.10:9200/_cat/nodes?h=id,ip,port,v,m
-pLSN 192.168.56.30 9300 1.0.1 m
+pLSN 192.168.56.30 9300 {version} m
-k0zy 192.168.56.10 9300 1.0.1 m
+k0zy 192.168.56.10 9300 {version} m
-6Tyi 192.168.56.20 9300 1.0.1 *
+6Tyi 192.168.56.20 9300 {version} *
 --------------------------------------------------
-[cols="<,<,<,<,<",options="header",]
+[cols="<,<,<,<,<",options="header",subs="normal"]
 |=======================================================================
 |Header |Alias |Appear by Default |Description |Example
 |`id` |`nodeId` |No |Unique node ID |k0zy
@ -86,7 +86,7 @@ k0zy 192.168.56.10 9300 1.0.1 m
 |`host` |`h` |Yes |Host name |n1
 |`ip` |`i` |Yes |IP address |127.0.1.1
 |`port` |`po` |No |Bound transport port |9300
-|`version` |`v` |No |Elasticsearch version |1.0.1
+|`version` |`v` |No |Elasticsearch version |{version}
 |`build` |`b` |No |Elasticsearch Build hash |5c03844
 |`jdk` |`j` |No |Running Java version |1.8.0
 |`disk.avail` |`d`, `disk`, `diskAvail` |No |Available disk space |1.8gb
@ -179,4 +179,8 @@ operations |9
 |`segments.count` |`sc`, `segmentsCount` |No |Number of segments |4
 |`segments.memory` |`sm`, `segmentsMemory` |No |Memory used by
 segments |1.4kb
 |`segments.index_writer_memory` |`siwm`, `segmentsIndexWriterMemory` |No
 |Memory used by index writer |1.2kb
 |`segments.version_map_memory` |`svmm`, `segmentsVersionMapMemory` |No
 |Memory used by version map |1.0kb
 |=======================================================================
--- a/docs/reference/cluster/health.asciidoc
+++ b/docs/reference/cluster/health.asciidoc
@ -60,12 +60,13 @@ The cluster health API accepts the following request parameters:
 `wait_for_status`::
    One of `green`, `yellow` or `red`. Will wait (until
    the timeout provided) until the status of the cluster changes to the one
-    provided. By default, will not wait for any status.
+    provided or better, i.e. `green` > `yellow` > `red`. By default, will not 
    wait for any status.
 `wait_for_relocating_shards`::
    A number controlling to how many relocating
    shards to wait for. Usually will be `0` to indicate to wait till all
-    relocation have happened. Defaults to not to wait.
+    relocations have happened. Defaults to not wait.
 `wait_for_nodes`::
    The request waits until the specified number `N` of
--- a/docs/reference/cluster/stats.asciidoc
+++ b/docs/reference/cluster/stats.asciidoc
@ -12,7 +12,7 @@ curl -XGET 'http://localhost:9200/_cluster/stats?human&pretty'
 --------------------------------------------------
 Will return, for example:
-[source,js]
+["source","js",subs="attributes,callouts"]
 --------------------------------------------------
 {
   "cluster_name": "elasticsearch",
@ -82,7 +82,7 @@ Will return, for example:
         "client": 0
      },
      "versions": [
-         "0.90.8"
+         "{version}"
      ],
      "os": {
         "available_processors": 4,
--- a/docs/reference/cluster/update-settings.asciidoc
+++ b/docs/reference/cluster/update-settings.asciidoc
@ -220,3 +220,13 @@ All the disable allocation settings have been deprecated in favour for
 Logger values can also be updated by setting `logger.` prefix. More
 settings will be allowed to be updated.
 [float]
 === Field data circuit breaker
 `indices.fielddata.breaker.limit`::
     See <<index-modules-fielddata>>
 `indices.fielddata.breaker.overhead`::
     See <<index-modules-fielddata>>
--- a/docs/reference/docs/bulk.asciidoc
+++ b/docs/reference/docs/bulk.asciidoc
@ -2,8 +2,26 @@
 == Bulk API
 The bulk API makes it possible to perform many index/delete operations
-in a single API call. This can greatly increase the indexing speed. The
+in a single API call. This can greatly increase the indexing speed.
-REST API endpoint is `/_bulk`, and it expects the following JSON
+
 .Client support for bulk requests
 *********************************************
 Some of the officially supported clients provide helpers to assist with
 bulk requests and reindexing of documents from one index to another:
 Perl::
    See https://metacpan.org/pod/Search::Elasticsearch::Bulk[Search::Elasticsearch::Bulk]
    and https://metacpan.org/pod/Search::Elasticsearch::Scroll[Search::Elasticsearch::Scroll]
 Python::
    See http://elasticsearch-py.readthedocs.org/en/master/helpers.html[elasticsearch.helpers.*]
 *********************************************
 The REST API endpoint is `/_bulk`, and it expects the following JSON
 structure:
 [source,js]
@ -19,7 +37,7 @@ optional_source\n
 *NOTE*: the final line of data must end with a newline character `\n`.
-The possible actions are `index`, `create`, `delete` and `update`. 
+The possible actions are `index`, `create`, `delete` and `update`.
 `index` and `create` expect a source on the next
 line, and have the same semantics as the `op_type` parameter to the
 standard index API (i.e. create will fail if a document with the same
--- a/docs/reference/docs/index_.asciidoc
+++ b/docs/reference/docs/index_.asciidoc
@ -272,7 +272,7 @@ parameter. For example:
 --------------------------------------------------
 $ curl -XPUT localhost:9200/twitter/tweet/1?timestamp=2009-11-15T14%3A12%3A12 -d '{
    "user" : "kimchy",
-    "message" : "trying out Elasticsearch",
+    "message" : "trying out Elasticsearch"
 }'
 --------------------------------------------------
--- a/docs/reference/docs/multi-get.asciidoc
+++ b/docs/reference/docs/multi-get.asciidoc
@ -70,6 +70,45 @@ curl 'localhost:9200/test/type/_mget' -d '{
 }'
 --------------------------------------------------
 [float]
 [[mget-type]]
 === Optional Type
 The mget API allows for `_type` to be optional. Set it to `_all` or leave it empty in order
 to fetch the first document matching the id across all types.
 If you don't set the type and have many documents sharing the same `_id`, you will end up
 getting only the first matching document.
 For example, if you have a document 1 within typeA and typeB then following request
 will give you back only the same document twice:
 [source,js]
 --------------------------------------------------
 curl 'localhost:9200/test/_mget' -d '{
    "ids" : ["1", "1"]
 }'
 --------------------------------------------------
 You need in that case to explicitly set the `_type`:
 [source,js]
 --------------------------------------------------
 GET /test/_mget/
 {
  "docs" : [
        {
            "_type":"typeA",
            "_id" : "1"
        },
        {
            "_type":"typeB",
            "_id" : "1"
        }
    ]
 }
 --------------------------------------------------
 [float]
 [[mget-source-filtering]]
 === Source filtering
--- a/docs/reference/docs/termvectors.asciidoc
+++ b/docs/reference/docs/termvectors.asciidoc
@ -19,8 +19,9 @@ retrieved either with a parameter in the url
 curl -XGET 'http://localhost:9200/twitter/tweet/1/_termvector?fields=text,...'
 --------------------------------------------------
-or adding by adding the requested fields in the request body (see
+or by adding the requested fields in the request body (see
-example below).
+example below). Fields can also be specified with wildcards
 in similar way to the <<query-dsl-multi-match-query,multi match query>> added[1.4.0].
 [float]
 === Return values
@ -38,9 +39,11 @@ statistics are returned for all fields but no term statistics.
 * term payloads (`payloads` : true), as base64 encoded bytes
 If the requested information wasn't stored in the index, it will be
-omitted without further warning. See <<mapping-types,type mapping>>
+computed on the fly if possible. See <<mapping-types,type mapping>>
 for how to configure your index to store term vectors.
 added[1.4.0,The ability to computed term vectors on the fly is only available from 1.4.0 onwards (see below)]
 [WARNING]
 ======
 Start and end offsets assume UTF-16 encoding is being used. If you want to use
@ -84,7 +87,7 @@ are therefore only useful as relative measures whereas the absolute
 numbers have no meaning in this context.
 [float]
-=== Example
+=== Example 1
 First, we create an index that stores term vectors, payloads etc. :
@ -222,3 +225,22 @@ Response:
    }
 }
 --------------------------------------------------
 [float]
 === Example 2 added[1.4.0]
 Additionally, term vectors which are not explicitly stored in the index are automatically
 computed on the fly. The following request returns all information and statistics for the
 fields in document `1`, even though the terms haven't been explicitly stored in the index.
 Note that for the field `text`, the terms are not re-generated.
 [source,js]
 --------------------------------------------------
 curl -XGET 'http://localhost:9200/twitter/tweet/1/_termvector?pretty=true' -d '{
  "fields" : ["text", "some_field_without_term_vectors"],
  "offsets" : true,
  "positions" : true,
  "term_statistics" : true,
  "field_statistics" : true
 }'
 --------------------------------------------------
--- a/docs/reference/docs/update.asciidoc
+++ b/docs/reference/docs/update.asciidoc
@ -109,6 +109,23 @@ curl -XPOST 'localhost:9200/test/type1/1/_update' -d '{
 If both `doc` and `script` is specified, then `doc` is ignored. Best is
 to put your field pairs of the partial document in the script itself.
 By default if `doc` is specified then the document is always updated even
 if the merging process doesn't cause any changes.  Specifying `detect_noop`
 as `true` will cause Elasticsearch to check if there are changes and, if
 there aren't, turn the update request into a noop. For example:
 [source,js]
 --------------------------------------------------
 curl -XPOST 'localhost:9200/test/type1/1/_update' -d '{
    "doc" : {
        "name" : "new_name"
    },
    "detect_noop": true
 }'
 --------------------------------------------------
 If `name` was `new_name` before the request was sent then the entire update
 request is ignored.
 There is also support for `upsert`. If the document does
 not already exists, the content of the `upsert` element will be used to
 index the fresh doc:
--- a/docs/reference/getting-started.asciidoc
+++ b/docs/reference/getting-started.asciidoc
@ -66,7 +66,7 @@ Within an index/type, you can store as many documents as you want. Note that alt
 An index can potentially store a large amount of data that can exceed the hardware limits of a single node. For example, a single index of a billion documents taking up 1TB of disk space may not fit on the disk of a single node or may be too slow to serve search requests from a single node alone.
-To solve this problem, Elasticsearch provides the ability to subdivide your index into multiple pieces called shards. When you create an index, you can simply define the number of shards that you want. Each shard is in itself a fully-functional and independent "index" that can be hosted on any node in the cluster. 
+To solve this problem, Elasticsearch provides the ability to subdivide your index into multiple pieces called shards. When you create an index, you can simply define the number of shards that you want. Each shard is in itself a fully-functional and independent "index" that can be hosted on any node in the cluster.
 Sharding is important for two primary reasons:
@ -76,7 +76,7 @@ Sharding is important for two primary reasons:
 The mechanics of how a shard is distributed and also how its documents are aggregated back into search requests are completely managed by Elasticsearch and is transparent to you as the user.
-In a network/cloud environment where failures can be expected anytime, it is very useful and highly recommended to have a failover mechanism in case a shard/node somehow goes offline or disappears for whatever reason. To this end, Elasticsearch allows you to make one or more copies of your index's shards into what are called replica shards, or replicas for short. 
+In a network/cloud environment where failures can be expected anytime, it is very useful and highly recommended to have a failover mechanism in case a shard/node somehow goes offline or disappears for whatever reason. To this end, Elasticsearch allows you to make one or more copies of your index's shards into what are called replica shards, or replicas for short.
 Replication is important for two primary reasons:
@ -93,7 +93,7 @@ With that out of the way, let's get started with the fun part...
 == Installation
-Elasticsearch requires Java 7. Specifically as of this writing, it is recommended that you use the Oracle JDK version 1.7.0_55. Java installation varies from platform to platform so we won't go into those details here. Suffice to say, before you install Elasticsearch, please check your Java version first by running (and then install/upgrade accordingly if needed):
+Elasticsearch requires Java 7. Specifically as of this writing, it is recommended that you use the Oracle JDK version {jdk}. Java installation varies from platform to platform so we won't go into those details here. Suffice to say, before you install Elasticsearch, please check your Java version first by running (and then install/upgrade accordingly if needed):
 [source,sh]
 --------------------------------------------------
@ -103,25 +103,25 @@ echo $JAVA_HOME
 Once we have Java set up, we can then download and run Elasticsearch. The binaries are available from http://www.elasticsearch.org/download[`www.elasticsearch.org/download`] along with all the releases that have been made in the past. For each release, you have a choice among a zip, tar, DEB, or RPM package. For simplicity, let's use the tar package.
-Let's download the Elasticsearch 1.1.1 tar as follows (Windows users should download the zip package):
+Let's download the Elasticsearch {version} tar as follows (Windows users should download the zip package):
-[source,sh]
+["source","sh",subs="attributes,callouts"]
 --------------------------------------------------
-curl -L -O https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-1.1.1.tar.gz
+curl -L -O https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-{version}.tar.gz
 --------------------------------------------------
 Then extract it as follows (Windows users should unzip the zip package):
-[source,sh]
+["source","sh",subs="attributes,callouts"]
 --------------------------------------------------
-tar -xvf elasticsearch-1.1.1.tar.gz
+tar -xvf elasticsearch-{version}.tar.gz
 --------------------------------------------------
 It will then create a bunch of files and folders in your current directory. We then go into the bin directory as follows:
-[source,sh]
+["source","sh",subs="attributes,callouts"]
 --------------------------------------------------
-cd elasticsearch-1.1.1/bin
+cd elasticsearch-{version}/bin
 --------------------------------------------------
 And now we are ready to start our node and single cluster (Windows users should run the elasticsearch.bat file):
@ -133,10 +133,10 @@ And now we are ready to start our node and single cluster (Windows users should
 If everything goes well, you should see a bunch of messages that look like below:
-[source,sh]
+["source","sh",subs="attributes,callouts"]
 --------------------------------------------------
 ./elasticsearch
-[2014-03-13 13:42:17,218][INFO ][node           ] [New Goblin] version[1.1.1], pid[2085], build[5c03844/2014-02-25T15:52:53Z]
+[2014-03-13 13:42:17,218][INFO ][node           ] [New Goblin] version[{version}], pid[2085], build[5c03844/2014-02-25T15:52:53Z]
 [2014-03-13 13:42:17,219][INFO ][node           ] [New Goblin] initializing ...
 [2014-03-13 13:42:17,223][INFO ][plugins        ] [New Goblin] loaded [], sites []
 [2014-03-13 13:42:19,831][INFO ][node           ] [New Goblin] initialized
@ -166,7 +166,7 @@ Also note the line marked http with information about the HTTP address (`192.168
 === The REST API
 Now that we have our node (and cluster) up and running, the next step is to understand how to communicate with it. Fortunately, Elasticsearch provides a very comprehensive and powerful REST API that you can use to interact with your cluster. Among the few things that can be done with the API are as follows:
- 
+
 * Check your cluster, node, and index health, status, and statistics
 * Administer your cluster, node, and index data and metadata
 * Perform CRUD (Create, Read, Update, and Delete) and search operations against your indexes
@ -174,15 +174,15 @@ Now that we have our node (and cluster) up and running, the next step is to unde
 === Cluster Health
-Let's start with a basic health check, which we can use to see how our cluster is doing. We'll be using curl to do this but you can use any tool that allows you to make HTTP/REST calls. Let's assume that we are still on the same node where we started Elasticsearch on and open another command shell window. 
+Let's start with a basic health check, which we can use to see how our cluster is doing. We'll be using curl to do this but you can use any tool that allows you to make HTTP/REST calls. Let's assume that we are still on the same node where we started Elasticsearch on and open another command shell window.
 To check the cluster health, we will be using the http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/cat.html[`_cat` API]. Remember previously that our node HTTP endpoint is available at port `9200`:
- 
+
 [source,sh]
 --------------------------------------------------
 curl 'localhost:9200/_cat/health?v'
 --------------------------------------------------
- 
+
 And the response:
 [source,sh]
@ -191,19 +191,19 @@ epoch      timestamp cluster       status node.total node.data shards pri relo i
 1394735289 14:28:09  elasticsearch green           1         1      0   0    0    0        0
 --------------------------------------------------
-We can see that our cluster named "elasticsearch" is up with a green status. 
+We can see that our cluster named "elasticsearch" is up with a green status.
 Whenever we ask for the cluster health, we either get green, yellow, or red. Green means everything is good (cluster is fully functional), yellow means all data is available but some replicas are not yet allocated (cluster is fully functional), and red means some data is not available for whatever reason. Note that even if a cluster is red, it still is partially functional (i.e. it will continue to serve search requests from the available shards) but you will likely need to fix it ASAP since you have missing data.
- 
+
 Also from the above response, we can see and total of 1 node and that we have 0 shards since we have no data in it yet. Note that since we we are using the default cluster name (elasticsearch) and since Elasticsearch uses multicast network discovery by default to find other nodes, it is possible that you could accidentally start up more than one node in your network and have them all join a single cluster. In this scenario, you may see more than 1 node in the above response.
- 
+
 We can also get a list of nodes in our cluster as follows:
- 
+
 [source,sh]
 --------------------------------------------------
 curl 'localhost:9200/_cat/nodes?v'
 --------------------------------------------------
- 
+
 And the response:
 [source,sh]
@ -214,16 +214,16 @@ mwubuntu1    127.0.1.1            8           4 0.00 d         *      New Goblin
 --------------------------------------------------
 Here, we can see our one node named "New Goblin", which is the single node that is currently in our cluster.
- 
+
 === List All Indexes
 Now let's take a peek at our indexes:
- 
+
 [source,sh]
 --------------------------------------------------
 curl 'localhost:9200/_cat/indices?v'
 --------------------------------------------------
- 
+
 And the response:
 [source,sh]
@ -237,15 +237,15 @@ Which simply means we have no indexes yet in the cluster.
 === Create an Index
 Now let's create an index named "customer" and then list all the indexes again:
- 
+
 [source,sh]
 --------------------------------------------------
 curl -XPUT 'localhost:9200/customer?pretty'
 curl 'localhost:9200/_cat/indices?v'
 --------------------------------------------------
- 
+
 The first command creates the index named "customer" using the PUT verb. We simply append `pretty` to the end of the call to tell it to pretty-print the JSON response (if any).
- 
+
 And the response:
 [source,sh]
@ -261,7 +261,7 @@ yellow customer   5   1          0            0       495b           495b
 --------------------------------------------------
 The results of the second command tells us that we now have 1 index named customer and it has 5 primary shards and 1 replica (the defaults) and it contains 0 documents in it.
- 
+
 You might also notice that the customer index has a yellow health tagged to it. Recall from our previous discussion that yellow means that some replicas are not (yet) allocated. The reason this happens for this index is because Elasticsearch by default created one replica for this index. Since we only have one node running at the moment, that one replica cannot yet be allocated (for high availability) until a later point in time when another node joins the cluster. Once that replica gets allocated onto a second node, the health status for this index will turn to green.
 === Index and Query a Document
@ -275,8 +275,8 @@ Our JSON document: { "name": "John Doe" }
 [source,sh]
 --------------------------------------------------
 curl -XPUT 'localhost:9200/customer/external/1?pretty' -d '
-{ 
+{
-  "name": "John Doe" 
+  "name": "John Doe"
 }'
 --------------------------------------------------
@ -285,8 +285,8 @@ And the response:
 [source,sh]
 --------------------------------------------------
 curl -XPUT 'localhost:9200/customer/external/1?pretty' -d '
-{ 
+{
-  "name": "John Doe" 
+  "name": "John Doe"
 }'
 {
  "_index" : "customer",
@ -300,14 +300,14 @@ curl -XPUT 'localhost:9200/customer/external/1?pretty' -d '
 From the above, we can see that a new customer document was successfully created inside the customer index and the external type. The document also has an internal id of 1 which we specified at index time.
 It is important to note that Elasticsearch does not require you to explicitly create an index first before you can index documents into it. In the previous example, Elasticsearch will automatically create the customer index if it didn't already exist beforehand.
- 
+
 Let's now retrieve that document that we just indexed:
- 
+
 [source,sh]
 --------------------------------------------------
 curl -XGET 'localhost:9200/customer/external/1?pretty'
 --------------------------------------------------
- 
+
 And the response:
 [source,sh]
@ -323,17 +323,17 @@ curl -XGET 'localhost:9200/customer/external/1?pretty'
 --------------------------------------------------
 Nothing out of the ordinary here other than a field, `found`, stating that we found a document with the requested ID 1 and another field, `_source`, which returns the full JSON document that we indexed from the previous step.
- 
+
 === Delete an Index
 Now let's delete the index that we just created and then list all the indexes again:
- 
+
 [source,sh]
 --------------------------------------------------
 curl -XDELETE 'localhost:9200/customer?pretty'
 curl 'localhost:9200/_cat/indices?v'
 --------------------------------------------------
- 
+
 And the response:
 [source,sh]
@ -354,20 +354,20 @@ Before we move on, let's take a closer look again at some of the API commands th
 --------------------------------------------------
 curl -XPUT 'localhost:9200/customer'
 curl -XPUT 'localhost:9200/customer/external/1' -d '
-{ 
+{
-  "name": "John Doe" 
+  "name": "John Doe"
 }'
 curl 'localhost:9200/customer/external/1'
 curl -XDELETE 'localhost:9200/customer'
 --------------------------------------------------
- 
+
 If we study the above commands carefully, we can actually see a pattern of how we access data in Elasticsearch. That pattern can be summarized as follows:
- 
+
 [source,sh]
 --------------------------------------------------
 curl -<REST Verb> <Node>:<Port>/<Index>/<Type>/<ID>
 --------------------------------------------------
- 
+
 This REST access pattern is pervasive throughout all the API commands that if you can simply remember it, you will have a good head start at mastering Elasticsearch.
 == Modifying Your Data
@ -382,8 +382,8 @@ We've previously seen how we can index a single document. Let's recall that comm
 [source,sh]
 --------------------------------------------------
 curl -XPUT 'localhost:9200/customer/external/1?pretty' -d '
-{ 
+{
-  "name": "John Doe" 
+  "name": "John Doe"
 }'
 --------------------------------------------------
@ -392,8 +392,8 @@ Again, the above will index the specified document into the customer index, exte
 [source,sh]
 --------------------------------------------------
 curl -XPUT 'localhost:9200/customer/external/1?pretty' -d '
-{ 
+{
-  "name": "Jane Doe" 
+  "name": "Jane Doe"
 }'
 --------------------------------------------------
@ -402,8 +402,8 @@ The above changes the name of the document with the ID of 1 from "John Doe" to "
 [source,sh]
 --------------------------------------------------
 curl -XPUT 'localhost:9200/customer/external/2?pretty' -d '
-{ 
+{
-  "name": "Jane Doe" 
+  "name": "Jane Doe"
 }'
 --------------------------------------------------
@ -416,8 +416,8 @@ This example shows how to index a document without an explicit ID:
 [source,sh]
 --------------------------------------------------
 curl -XPOST 'localhost:9200/customer/external?pretty' -d '
-{ 
+{
-  "name": "Jane Doe" 
+  "name": "Jane Doe"
 }'
 --------------------------------------------------
--- a/docs/reference/images/Exponential.png
+++ b/docs/reference/images/Exponential.png
--- a/docs/reference/images/Gaussian.png
+++ b/docs/reference/images/Gaussian.png
--- a/docs/reference/images/Linear.png
+++ b/docs/reference/images/Linear.png
--- a/docs/reference/images/lambda.png
+++ b/docs/reference/images/lambda.png
--- a/docs/reference/images/lambda_calc.png
+++ b/docs/reference/images/lambda_calc.png
--- a/docs/reference/images/s_calc.png
+++ b/docs/reference/images/s_calc.png
--- a/docs/reference/images/sigma.png
+++ b/docs/reference/images/sigma.png
--- a/docs/reference/images/sigma_calc.png
+++ b/docs/reference/images/sigma_calc.png
--- a/docs/reference/index-modules/allocation.asciidoc
+++ b/docs/reference/index-modules/allocation.asciidoc
@ -100,7 +100,7 @@ settings API.
 [[disk]]
 === Disk-based Shard Allocation
-coming[1.3.0] disk based shard allocation is enabled from version 1.3.0 onward
+added[1.3.0] disk based shard allocation is enabled from version 1.3.0 onward
 Elasticsearch can be configured to prevent shard
 allocation on nodes depending on disk usage for the node. This
--- a/docs/reference/index-modules/cache.asciidoc
+++ b/docs/reference/index-modules/cache.asciidoc
@ -31,25 +31,3 @@ configured in the node configuration).
 `indices.cache.filter.size` can accept either a percentage value, like
 `30%`, or an exact value, like `512mb`.
 [float]
 [[index-filter]]
 ==== Index Filter Cache
 A filter cache that exists on the index level (on each node). Generally,
 not recommended for use since its memory usage depends on which shards
 are allocated on each node and its hard to predict it. The types are:
 `resident`, `soft` and `weak`.
 All types support the following settings:
 [cols="<,<",options="header",]
 |=======================================================================
 |Setting |Description
 |`index.cache.filter.max_size` |The max size (count, not byte size) of
 the cache (per search segment in a shard). Defaults to not set (`-1`),
 which is usually fine with `soft` cache and proper cacheable filters.
 |`index.cache.filter.expire` |A time based setting that expires filters
 after a certain time of inactivity. Defaults to `-1`. For example, can
 be set to `5m` for a 5 minute expiry.
 |=======================================================================
--- a/docs/reference/index-modules/codec.asciidoc
+++ b/docs/reference/index-modules/codec.asciidoc
@ -144,21 +144,21 @@ Type name: `bloom`
 [TIP]
 ==================================================
-It can sometime make sense to disable bloom filters. For instance, if you are
+As of 1.4, the bloom filters are no longer loaded at search time by
-logging into an index per day, and you have thousands of indices, the bloom
+default: they consume RAM in proportion to the number of unique terms,
-filters can take up a sizable amount of memory. For most queries you are only
+which can quickly add up for certain use cases, and separate
-interested in recent indices, so you don't mind CRUD operations on older
+performance improvements have made the performance gains with bloom
-indices taking slightly longer.
+filters very small.
-In these cases you can disable loading of the bloom filter on  a per-index
+You can enable loading of the bloom filter at search time on a
-basis by updating the index settings:
+per-index basis by updating the index settings:
 [source,js]
 --------------------------------------------------
-PUT /old_index/_settings?index.codec.bloom.load=false
+PUT /old_index/_settings?index.codec.bloom.load=true
 --------------------------------------------------
-This setting, which defaults to `true`, can be updated on a live index. Note,
+This setting, which defaults to `false`, can be updated on a live index. Note,
 however, that changing the value will cause the index to be reopened, which
 will invalidate any existing caches.
--- a/docs/reference/index-modules/fielddata.asciidoc
+++ b/docs/reference/index-modules/fielddata.asciidoc
@ -24,28 +24,63 @@ field data after a certain time of inactivity. Defaults to `-1`. For
 example, can be set to `5m` for a 5 minute expiry.
 |=======================================================================
 [float]
 [[circuit-breaker]]
 === Circuit Breaker
 coming[1.4.0,Prior to 1.4.0 there was only a single circuit breaker for fielddata]
 Elasticsearch contains multiple circuit breakers used to prevent operations from
 causing an OutOfMemoryError. Each breaker specifies a limit for how much memory
 it can use. Additionally, there is a parent-level breaker that specifies the
 total amount of memory that can be used across all breakers.
 The parent-level breaker can be configured with the following setting:
 `indices.breaker.total.limit`::
    Starting limit for overall parent breaker, defaults to 70% of JVM heap
 All circuit breaker settings can be changed dynamically using the cluster update
 settings API.
 [float]
 [[fielddata-circuit-breaker]]
-=== Field data circuit breaker
+==== Field data circuit breaker
 The field data circuit breaker allows Elasticsearch to estimate the amount of
 memory a field will required to be loaded into memory. It can then prevent the
 field data loading by raising an exception. By default the limit is configured
 to 60% of the maximum JVM heap. It can be configured with the following
 parameters:
-[cols="<,<",options="header",]
+`indices.breaker.fielddata.limit`::
-|=======================================================================
+    Limit for fielddata breaker, defaults to 60% of JVM heap
 |Setting |Description
 |`indices.fielddata.breaker.limit` |Maximum size of estimated field data
 to allow loading. Defaults to 60% of the maximum JVM heap.
 |`indices.fielddata.breaker.overhead` |A constant that all field data
 estimations are multiplied with to determine a final estimation. Defaults to
 1.03
 |=======================================================================
-Both the `indices.fielddata.breaker.limit` and
+`indices.breaker.fielddata.overhead`::
-`indices.fielddata.breaker.overhead` can be changed dynamically using the
+    A constant that all field data estimations are multiplied with to determine a
-cluster update settings API.
+    final estimation. Defaults to 1.03
 `indices.fielddata.breaker.limit`::
    deprecated[1.4.0,Replaced by `indices.breaker.fielddata.limit`]
 `indices.fielddata.breaker.overhead`::
    deprecated[1.4.0,Replaced by `indices.breaker.fielddata.overhead`]
 [float]
 [[request-circuit-breaker]]
 ==== Request circuit breaker
 coming[1.4.0]
 The request circuit breaker allows Elasticsearch to prevent per-request data
 structures (for example, memory used for calculating aggregations during a
 request) from exceeding a certain amount of memory.
 `indices.breaker.request.limit`::
    Limit for request breaker, defaults to 40% of JVM heap
 `indices.breaker.request.overhead`::
    A constant that all request estimations are multiplied with to determine a
    final estimation. Defaults to 1
 [float]
 [[fielddata-monitoring]]
@ -73,10 +108,10 @@ data format.
 [source,js]
 --------------------------------------------------
 {
-    tag: {
+    "tag": {
-        type:      "string",
+        "type":      "string",
-        fielddata: {
+        "fielddata": {
-            format: "fst"
+            "format": "fst"
        }
    }
 }
@ -173,10 +208,10 @@ It is possible to force field data to be loaded and cached eagerly through the
 [source,js]
 --------------------------------------------------
 {
-    category: {
+    "category": {
-        type:      "string",
+        "type":      "string",
-        fielddata: {
+        "fielddata": {
-            loading: "eager"
+            "loading": "eager"
        }
    }
 }
@ -187,10 +222,10 @@ Global ordinals can also be eagerly loaded:
 [source,js]
 --------------------------------------------------
 {
-    category: {
+    "category": {
-        type:      "string",
+        "type":      "string",
-        fielddata: {
+        "fielddata": {
-            loading: "eager_global_ordinals"
+            "loading": "eager_global_ordinals"
        }
    }
 }
@ -212,10 +247,10 @@ will return an error.
 [source,js]
 --------------------------------------------------
 {
-    text: {
+    "text": {
-        type:      "string",
+        "type":      "string",
-        fielddata: {
+        "fielddata": {
-            format: "disabled"
+            "format": "disabled"
        }
    }
 }
@ -253,14 +288,14 @@ number of docs that the segment should contain with `min_segment_size`:
 [source,js]
 --------------------------------------------------
 {
-    tag: {
+    "tag": {
-        type:      "string",
+        "type":      "string",
-        fielddata: {
+        "fielddata": {
-            filter: {
+            "filter": {
-                frequency: {
+                "frequency": {
-                    min:              0.001,
+                    "min":              0.001,
-                    max:              0.1,
+                    "max":              0.1,
-                    min_segment_size: 500
+                    "min_segment_size": 500
                }
            }
        }
@ -280,13 +315,13 @@ expression which matches terms beginning with `#`:
 [source,js]
 --------------------------------------------------
 {
-    tweet: {
+    "tweet": {
-        type:      "string",
+        "type":      "string",
-        analyzer:  "whitespace"
+        "analyzer":  "whitespace"
-        fielddata: {
+        "fielddata": {
-            filter: {
+            "filter": {
-                regex: {
+                "regex": {
-                    pattern: "^#.*"
+                    "pattern": "^#.*"
                }
            }
        }
@ -302,18 +337,18 @@ The `frequency` and `regex` filters can be combined:
 [source,js]
 --------------------------------------------------
 {
-    tweet: {
+    "tweet": {
-        type:      "string",
+        "type":      "string",
-        analyzer:  "whitespace"
+        "analyzer":  "whitespace"
-        fielddata: {
+        "fielddata": {
-            filter: {
+            "filter": {
-                regex: {
+                "regex": {
-                    pattern:          "^#.*",
+                    "pattern":          "^#.*",
                },
-                frequency: {
+                "frequency": {
-                    min:              0.001,
+                    "min":              0.001,
-                    max:              0.1,
+                    "max":              0.1,
-                    min_segment_size: 500
+                    "min_segment_size": 500
                }
            }
        }
--- a/docs/reference/index-modules/merge.asciidoc
+++ b/docs/reference/index-modules/merge.asciidoc
@ -193,25 +193,14 @@ scheduler supports this setting:
 `index.merge.scheduler.max_thread_count`::
-The maximum number of concurrent merge threads that may run at once. Defaults
+The maximum number of threads that may be merging at once. Defaults to
-to `1` which works best with spinning-magnets disks.  If you are using
+`Math.max(1, Math.min(3, Runtime.getRuntime().availableProcessors() / 2))`
-a good solid-state disk (SSD) instead then try setting this to `3`.
+which works well for a good solid-state-disk (SSD).  If your index is on
 spinning platter drives instead, decrease this to 1.
 [float]
 ==== SerialMergeScheduler
-A merge scheduler that simply does each merge sequentially using the
+This is accepted for backwards compatibility, but just uses
-calling thread (blocking the operations that triggered the merge or the
+ConcurrentMergeScheduler with index.merge.scheduler.max_thread_count
-index operation). This merge scheduler has a merge thread pool that
+set to 1 so that only 1 merge may run at a time.
 explicitly schedules merges, and it makes sure that merges are serial
 within a shard, yet concurrent across multiple shards.
 The scheduler supports the following settings:
 `index.merge.scheduler.max_merge_at_once`::
 The maximum number of merges a single merge run performs. This setting prevents
 executing unlimited amount of merges in a loop until another shards has a
 chance to get a merge thread from the pool. If this limit is reached the
 merge thread returns to the pool and continues once the the call to a single
 shards is executed. The default is `5`
--- a/docs/reference/index-modules/store.asciidoc
+++ b/docs/reference/index-modules/store.asciidoc
@ -57,8 +57,8 @@ using the index update settings API dynamically.
 File system based storage is the default storage used. There are
 different implementations or _storage types_. The best one for the
 operating environment will be automatically chosen: `mmapfs` on
-Solaris/Linux/Windows 64bit, `simplefs` on Windows 32bit, and
+Windows 64bit, `simplefs` on Windows 32bit, and `default` 
-`niofs` for the rest.
+(hybrid `niofs` and `mmapfs`) for the rest.
 This can be overridden for all indices by adding this to the
 `config/elasticsearch.yml` file:
@ -72,12 +72,11 @@ It can also be set on a per-index basis at index creation time:
 [source,json]
 ---------------------------------
-curl -XPUT localhost:9200/my_index
+curl -XPUT localhost:9200/my_index -d '{
 {
    "settings": {
        "index.store.type": "niofs"
    }
-}
+}';
 ---------------------------------
 The following sections lists all the different storage types supported.
@ -112,6 +111,17 @@ process equal to the size of the file being mapped. Before using this
 class, be sure your have plenty of virtual address space.
 See <<vm-max-map-count>>
 [[default_fs]]
 [float]
 ==== Hybrid MMap / NIO FS added[1.3.0]
 The `default` type stores the shard index on the file system depending on
 the file type by mapping a file into memory (mmap) or using Java NIO. Currently
 only the Lucene term dictionary and doc values files are memory mapped to reduce
 the impact on the operating system. All other files are opened using Lucene `NIOFSDirectory`.
 Address space settings (<<vm-max-map-count>>) might also apply if your term
 dictionaries are large.
 [float]
 [[store-memory]]
 === Memory
--- a/docs/reference/index.asciidoc
+++ b/docs/reference/index.asciidoc
@ -1,6 +1,10 @@
 [[elasticsearch-reference]]
 = Reference
 :version: 1.2.0
 :branch:  1.2
 :jdk:     1.7.0_60
 include::getting-started.asciidoc[]
 include::setup.asciidoc[]
--- a/docs/reference/indices/flush.asciidoc
+++ b/docs/reference/indices/flush.asciidoc
@ -14,6 +14,30 @@ $ curl -XPOST 'http://localhost:9200/twitter/_flush'
 --------------------------------------------------
 [float]
 [[flush-parameters]]
 === Request Parameters
 The flush API accepts the following request parameters:
 [horizontal]
 `wait_if_ongoing`::  If set to `true` the flush operation will block until the
 flush can be executed if another flush operation is already executing.
 The default is `false` and will cause an exception to be thrown on 
 the shard level if another flush operation is already running. coming[1.4.0]
 `full`:: If set to `true` a new index writer is created and settings that have
 been changed related to the index writer will be refreshed. Note: if a full flush
 is required for a setting to take effect this will be part of the settings update
 process and it not required to be executed by the user.
 (This setting can be considered as internal)
 `force`:: Whether a flush should be forced even if it is not necessarily needed ie.
 if no changes will be committed to the index. This is useful if transaction log IDs
 should be incremented even if no uncommitted changes are present.
 (This setting can be considered as internal)
 [float]
 [[flush-multi-index]]
 === Multi Index
 The flush API can be applied to more than one index with a single call,
--- a/docs/reference/mapping.asciidoc
+++ b/docs/reference/mapping.asciidoc
@ -75,3 +75,4 @@ include::mapping/conf-mappings.asciidoc[]
 include::mapping/meta.asciidoc[]
 include::mapping/transform.asciidoc[]
--- a/docs/reference/mapping/fields.asciidoc
+++ b/docs/reference/mapping/fields.asciidoc
@ -21,6 +21,8 @@ include::fields/boost-field.asciidoc[]
 include::fields/parent-field.asciidoc[]
 include::fields/field-names-field.asciidoc[]
 include::fields/routing-field.asciidoc[]
 include::fields/index-field.asciidoc[]
--- a/docs/reference/mapping/fields/boost-field.asciidoc
+++ b/docs/reference/mapping/fields/boost-field.asciidoc
@ -68,3 +68,5 @@ any field the document:
 <1> The original query, now wrapped in a `function_score` query.
 <2> This function returns the value in `my_boost_field`, which is then
    multiplied by the query `_score` for each document.
 Note, that `field_value_factor` is a 1.2.x feature.
--- a/docs/reference/mapping/fields/field-names-field.asciidoc
+++ b/docs/reference/mapping/fields/field-names-field.asciidoc
@ -0,0 +1,11 @@
 [[mapping-field-names-field]]
 === `_field_names`
 added[1.3.0]
 The `_field_names` field indexes the field names of a document, which can later
 be used to search for documents based on the fields that they contain typically
 using the `exists` and `missing` filters.
 `_field_names` is indexed by default for indices that have been created after
 Elasticsearch 1.3.0.
--- a/docs/reference/mapping/fields/timestamp-field.asciidoc
+++ b/docs/reference/mapping/fields/timestamp-field.asciidoc
@ -56,7 +56,7 @@ Will cause `2009-11-15T14:12:12` to be used as the timestamp value for:
 }
 --------------------------------------------------
-Note, using `path` without explicit timestamp value provided require an
+Note, using `path` without explicit timestamp value provided requires an
 additional (though quite fast) parsing phase.
 [float]
--- a/docs/reference/mapping/transform.asciidoc
+++ b/docs/reference/mapping/transform.asciidoc
@ -0,0 +1,61 @@
 [[mapping-transform]]
 == Transform
 added[1.3.0]
 The document can be transformed before it is indexed by registering a
 script in the `transform` element of the mapping.  The result of the
 transform is indexed but the original source is stored in the `_source`
 field.  Example:
 [source,js]
 --------------------------------------------------
 {
    "example" : {
        "transform" : {
            "script" : "if (ctx._source['title']?.startsWith('t')) ctx._source['suggest'] = ctx._source['content']",
            "params" : {
                "variable" : "not used but an example anyway"
            },
            "lang": "groovy"
        },
        "properties": {
           "title": { "type": "string" },
           "content": { "type": "string" },
           "suggest": { "type": "string" }
        }
    }
 }
 --------------------------------------------------
 Its also possible to specify multiple transforms:
 [source,js]
 --------------------------------------------------
 {
    "example" : {
        "transform" : [
            {"script": "ctx._source['suggest'] = ctx._source['content']"}
            {"script": "ctx._source['foo'] = ctx._source['bar'];"}
        ]
    }
 }
 --------------------------------------------------
 Because the result isn't stored in the source it can't normally be fetched by
 source filtering.  It can be highlighted if it is marked as stored.
 === Get Transformed
 The get endpoint will retransform the source if the `_source_transform`
 parameter is set.  Example:
 [source,bash]
 --------------------------------------------------
 curl -XGET "http://localhost:9200/test/example/3?pretty&_source_transform"
 --------------------------------------------------
 The transform is performed before any source filtering but it is mostly
 designed to make it easy to see what was passed to the index for debugging.
 === Immutable Transformation
 Once configured the transform script cannot be modified.  This is not
 because that is technically impossible but instead because madness lies
 down that road.
--- a/docs/reference/mapping/types/nested-type.asciidoc
+++ b/docs/reference/mapping/types/nested-type.asciidoc
@ -1,46 +1,87 @@
 [[mapping-nested-type]]
 === Nested Type
-Nested objects/documents allow to map certain sections in the document
+The `nested` type works like the <<mapping-object-type,`object` type>> except
-indexed as nested allowing to query them as if they are separate docs
+that an array of `objects` is flattened, while an array of `nested`  objects
-joining with the parent owning doc.
+allows each object to be queried independently.  To explain, consider this
-
+document:
 One of the problems when indexing inner objects that occur several times
 in a doc is that "cross object" search match will occur, for example:
 [source,js]
 --------------------------------------------------
 {
-    "obj1" : [
+    "group" : "fans",
    "user" : [
        {
-            "name" : "blue",
+            "first" : "John",
-            "count" : 4
+            "last" :  "Smith"
        },
        {
-            "name" : "green",
+            "first" : "Alice",
-            "count" : 6
+            "last" :  "White"
-        }
+        },
    ]
 }
 --------------------------------------------------
-Searching for name set to blue and count higher than 5 will match the
+If the `user` field is of type `object`, this document would be indexed
-doc, because in the first element the name matches blue, and in the
+internally something like this:
 second element, count matches "higher than 5".
-Nested mapping allows mapping certain inner objects (usually multi
+[source,js]
-instance ones), for example:
+--------------------------------------------------
 {
    "group" :        "fans",
    "user.first" : [ "alice", "john" ],
    "user.last" :  [ "smith", "white" ]
 }
 --------------------------------------------------
 The `first` and `last` fields are flattened, and the association between
 `alice` and `white` is lost.  This document would incorrectly match a query
 for `alice AND smith`.
 If the `user` field is of type `nested`, each object is indexed as a separate
 document, something like this:
 [source,js]
 --------------------------------------------------
 { <1>
    "user.first" : "alice",
    "user.last" :  "white"
 }
 { <1>
    "user.first" : "john",
    "user.last" :  "smith"
 }
 { <2>
    "group" :       "fans"
 }
 --------------------------------------------------
 <1> Hidden nested documents.
 <2> Visible ``parent'' document.
 By keeping each nested object separate, the association between the
 `user.first` and `user.last` fields is maintained. The query for `alice AND
 smith` would *not* match this document.
 Searching on nested docs can be done using either the
 <<query-dsl-nested-query,nested query>> or
 <<query-dsl-nested-filter,nested filter>>.
 ==== Mapping
 The mapping for `nested` fields is the same as `object` fields, except that it
 uses type `nested`:
 [source,js]
 --------------------------------------------------
 {
    "type1" : {
        "properties" : {
-            "obj1" : {
+            "users" : {
                "type" : "nested",
                "properties": {
-                    "name" : {"type": "string", "index": "not_analyzed"},
+                    "first" : {"type": "string" },
-                    "count" : {"type": "integer"}
+                    "last"  : {"type": "string" }
                }
            }
        }
@ -48,26 +89,60 @@ instance ones), for example:
 }
 --------------------------------------------------
-The above will cause all `obj1` to be indexed as a nested doc. The
+NOTE: changing an `object` type to `nested` type requires reindexing.
 mapping is similar in nature to setting `type` to `object`, except that
 it's `nested`. Nested object fields can be defined explicitly as in the
 example above or added dynamically in the same way as for the root object.
-Note: changing an object type to nested type requires reindexing.
+You may want to index inner objects both as `nested` fields *and*  as flattened
 `object` fields, eg for highlighting.  This can be achieved by setting
 `include_in_parent` to `true`:
-The `nested` object fields can also be automatically added to the
+[source,js]
-immediate parent by setting `include_in_parent` to true, and also
+--------------------------------------------------
-included in the root object by setting `include_in_root` to true.
+{
    "type1" : {
        "properties" : {
            "users" : {
                "type" : "nested",
                "include_in_parent": true,
                "properties": {
                    "first" : {"type": "string" },
                    "last"  : {"type": "string" }
                }
            }
        }
    }
 }
 --------------------------------------------------
-Nested docs will also automatically use the root doc `_all` field.
+The result of indexing our example document would be something like this:
-Searching on nested docs can be done using either the
+[source,js]
-<<query-dsl-nested-query,nested query>> or
+--------------------------------------------------
-<<query-dsl-nested-filter,nested filter>>.
+{ <1>
    "user.first" : "alice",
    "user.last" :  "white"
 }
 { <1>
    "user.first" : "john",
    "user.last" :  "smith"
 }
 { <2>
    "group" :        "fans",
    "user.first" : [ "alice", "john" ],
    "user.last" :  [ "smith", "white" ]
 }
 --------------------------------------------------
 <1> Hidden nested documents.
 <2> Visible ``parent'' document.
 [float]
 ==== Internal Implementation
 Nested fields may contain other nested fields.  The `include_in_parent` object
 refers to the direct parent of the field, while the `include_in_root`
 parameter refers only to the topmost ``root'' object or document.
 Nested docs will automatically use the root doc `_all` field only.
 .Internal Implementation
 *********************************************
 Internally, nested objects are indexed as additional documents, but,
 since they can be guaranteed to be indexed within the same "block", it
 allows for extremely fast joining with parent docs.
@ -84,3 +159,4 @@ the `nested` query scope.
 The `_source` field is always associated with the parent document and
 because of that field values via the source can be fetched for nested object.
 *********************************************
--- a/docs/reference/mapping/types/root-object-type.asciidoc
+++ b/docs/reference/mapping/types/root-object-type.asciidoc
@ -1,16 +1,13 @@
 [[mapping-root-object-type]]
 === Root Object Type
-The root object mapping is an
+The root object mapping is an <<mapping-object-type,object type mapping>> that
-<<mapping-object-type,object type mapping>> that
+maps the root object (the type itself). It supports all of the different
-maps the root object (the type itself). On top of all the different
+mappings that can be set using the <<mapping-object-type,object type mapping>>.
 mappings that can be set using the
 <<mapping-object-type,object type mapping>>, it
 allows for additional, type level mapping definitions.
-The root object mapping allows to index a JSON document that either
+The root object mapping allows to index a JSON document that only contains its
-starts with the actual mapping type, or only contains its fields. For
+fields. For example, the following `tweet` JSON can be indexed without
-example, the following `tweet` JSON can be indexed:
+specifying the `tweet` type in the document itself:
 [source,js]
 --------------------------------------------------
@ -19,20 +16,6 @@ example, the following `tweet` JSON can be indexed:
 }
 --------------------------------------------------
 But, also the following JSON can be indexed:
 [source,js]
 --------------------------------------------------
 {
    "tweet" : {
        "message" : "This is a tweet!"
    }
 }
 --------------------------------------------------
 Out of the two, it is preferable to use the document *without* the type
 explicitly set.
 [float]
 ==== Index / Search Analyzers
--- a/docs/reference/modules/advanced-scripting.asciidoc
+++ b/docs/reference/modules/advanced-scripting.asciidoc
@ -37,12 +37,12 @@ depending on the shard the current document resides in.
 `_index.numDocs()`::
-    Number of documents in shard. 
+    Number of documents in shard.
-    
+
 `_index.maxDoc()`::
    Maximal document number in shard.
-    
+
 `_index.numDeletedDocs()`::
    Number of deleted documents in shard.
@ -62,7 +62,7 @@ Field statistics can be accessed with a subscript operator like this:
 `_index['FIELD'].sumttf()`::
    Sum of `ttf` over all terms that appear in field `FIELD` in all documents.
-    
+
 `_index['FIELD'].sumdf()`::
    The sum of `df` s over all terms that appear in field `FIELD` in all
@ -77,7 +77,7 @@ The number of terms in a field cannot be accessed using the `_index` variable. S
 === Term statistics:
 Term statistics for a field can be accessed with a subscript operator like
-this: `_index['FIELD']['TERM']`. This will never return null, even if term or field does not exist. 
+this: `_index['FIELD']['TERM']`. This will never return null, even if term or field does not exist.
 If you do not need the term frequency, call `_index['FIELD'].get('TERM', 0)`
 to avoid uneccesary initialization of the frequencies. The flag will have only
 affect is your set the `index_options` to `docs` (see <<mapping-core-types, mapping documentation>>).
@ -162,11 +162,11 @@ Positions can be accessed with an iterator that returns an object
 Example: sums up all payloads for the term `foo`.
-[source,mvel]
+[source,groovy]
 ---------------------------------------------------------
 termInfo = _index['my_field'].get('foo',_PAYLOADS);
 score = 0;
-for (pos : termInfo) {
+for (pos in termInfo) {
    score = score + pos.payloadAsInt(0);
 }
 return score;
@ -181,4 +181,3 @@ The `_index` variable can only be used to gather statistics for single terms. If
 https://lucene.apache.org/core/4_0_0/core/org/apache/lucene/index/Fields.html[Fields]
 instance. This object can then be used as described in https://lucene.apache.org/core/4_0_0/core/org/apache/lucene/index/Fields.html[lucene doc] to iterate over fields and then for each field iterate over each term in the field.
 The method will return null if the term vectors were not stored.
--- a/docs/reference/modules/discovery/zen.asciidoc
+++ b/docs/reference/modules/discovery/zen.asciidoc
@ -75,7 +75,7 @@ configure the election to handle cases of slow or congested networks
 (higher values assure less chance of failure). Once a node joins, it
 will send a join request to the master (`discovery.zen.join_timeout`)
 with a timeout defaulting at 20 times the ping timeout.
-coming[1.3.0,Previously defaulted to 10 times the ping timeout].
+added[1.3.0,Previously defaulted to 10 times the ping timeout].
 Nodes can be excluded from becoming a master by setting `node.master` to
 `false`. Note, once a node is a client node (`node.client` set to
--- a/docs/reference/modules/gateway.asciidoc
+++ b/docs/reference/modules/gateway.asciidoc
@ -42,14 +42,14 @@ once all `gateway.recover_after...nodes` conditions are met.
 The `gateway.expected_nodes` allows to set how many data and master
 eligible nodes are expected to be in the cluster, and once met, the
-`recover_after_time` is ignored and recovery starts. The
+`gateway.recover_after_time` is ignored and recovery starts.
-`gateway.expected_data_nodes` and `gateway.expected_master_nodes`
+Setting `gateway.expected_nodes` also defaults `gateway.recovery_after_time` to `5m` added[1.3.0, before `expected_nodes`
 required `recovery_after_time` to be set]. The `gateway.expected_data_nodes` and `gateway.expected_master_nodes`
 settings are also supported. For example setting:
 [source,js]
 --------------------------------------------------
 gateway:
    recover_after_nodes: 1
    recover_after_time: 5m
    expected_nodes: 2
 --------------------------------------------------
--- a/docs/reference/modules/gateway/local.asciidoc
+++ b/docs/reference/modules/gateway/local.asciidoc
@ -18,9 +18,8 @@ For example:
 [source,js]
 --------------------------------------------------
 gateway:
-    recover_after_nodes: 1
+    recover_after_nodes: 3
-    recover_after_time: 5m
+    expected_nodes: 5
    expected_nodes: 2
 --------------------------------------------------
 [float]
--- a/docs/reference/modules/http.asciidoc
+++ b/docs/reference/modules/http.asciidoc
@ -42,7 +42,10 @@ i.e. whether a browser on another origin can do requests to
 Elasticsearch. Defaults to `true`.
 |`http.cors.allow-origin` |Which origins to allow. Defaults to `*`,
-i.e. any origin.
+i.e. any origin. If you prepend and append a `/` to the value, this will
 be treated as a regular expression, allowing you to support HTTP and HTTPs.
 for example using `/https?:\/\/localhost(:[0-9]+)?/` would return the
 request header appropriately in both cases.
 |`http.cors.max-age` |Browsers send a "preflight" OPTIONS-request to
 determine CORS settings. `max-age` defines how long the result should
--- a/docs/reference/modules/plugins.asciidoc
+++ b/docs/reference/modules/plugins.asciidoc
@ -191,6 +191,9 @@ You can disable that check using `plugins.check_lucene: false`.
 * https://github.com/elasticsearch/elasticsearch-cloud-azure[Azure Cloud Plugin] - Azure discovery
 * https://github.com/elasticsearch/elasticsearch-cloud-gce[Google Compute Engine Cloud Plugin] - GCE discovery
 .Supported by the community
 * https://github.com/shikhar/eskka[eskka Discovery Plugin] (by Shikhar Bhushan)
 [float]
 [[river]]
 ==== River Plugins
@ -225,6 +228,8 @@ You can disable that check using `plugins.check_lucene: false`.
 * https://github.com/plombard/SubversionRiver[Subversion River Plugin] (by Pascal Lombard)
 * https://github.com/kzwang/elasticsearch-river-dynamodb[DynamoDB River Plugin] (by Kevin Wang)
 * https://github.com/salyh/elasticsearch-river-imap[IMAP/POP3 Email River Plugin] (by Hendrik Saly)
 * https://github.com/codelibs/elasticsearch-river-web[Web River Plugin] (by CodeLibs Project)
 * https://github.com/eea/eea.elasticsearch.river.rdf[EEA ElasticSearch RDF River Plugin] (by the European Environment Agency)
 [float]
 [[transport]]
@ -298,4 +303,6 @@ You can disable that check using `plugins.check_lucene: false`.
 * https://github.com/kzwang/elasticsearch-image[Elasticsearch Image Plugin] (by Kevin Wang)
 * https://github.com/wikimedia/search-highlighter[Elasticsearch Experimental Highlighter] (by Wikimedia Foundation/Nik Everett)
 * https://github.com/salyh/elasticsearch-security-plugin[Elasticsearch Security Plugin] (by Hendrik Saly)
 * https://github.com/codelibs/elasticsearch-taste[Elasticsearch Taste Plugin] (by CodeLibs Project)
 * http://siren.solutions/siren/downloads/[Elasticsearch SIREn Plugin]: Nested data search (by SIREn Solutions)
--- a/docs/reference/modules/scripting.asciidoc
+++ b/docs/reference/modules/scripting.asciidoc
@ -6,28 +6,32 @@ expressions. For example, scripts can be used to return "script fields"
 as part of a search request, or can be used to evaluate a custom score
 for a query and so on.
-The scripting module uses by default http://mvel.codehaus.org/[mvel] as
+deprecated[1.3.0,Mvel has been deprecated and will be removed in 1.4.0]
-the scripting language with some extensions. mvel is used since it is
+
-extremely fast and very simple to use, and in most cases, simple
+added[1.3.0,Groovy scripting support]
-expressions are needed (for example, mathematical equations).
+
 The scripting module uses by default http://groovy.codehaus.org/[groovy]
 (previously http://mvel.codehaus.org/[mvel] in 1.3.x and earlier) as the
 scripting language with some extensions. Groovy is used since it is extremely
 fast and very simple to use.
 Additional `lang` plugins are provided to allow to execute scripts in
 different languages. Currently supported plugins are `lang-javascript`
-for JavaScript, `lang-groovy` for Groovy, and `lang-python` for Python.
+for JavaScript, `lang-mvel` for Mvel, and `lang-python` for Python.
 All places where a `script` parameter can be used, a `lang` parameter
 (on the same level) can be provided to define the language of the
-script. The `lang` options are `mvel`, `js`, `groovy`, `python`, and
+script. The `lang` options are `groovy`, `js`, `mvel`, `python`,
-`native`.
+`expression` and `native`.
-added[1.2.0, Dynamic scripting is disabled by default since version 1.2.0]
+added[1.2.0, Dynamic scripting is disabled for non-sandboxed languages by default since version 1.2.0]
-To increase security, Elasticsearch does not allow you to specify scripts with a
+To increase security, Elasticsearch does not allow you to specify scripts for
-request. Instead, scripts must be placed in the `scripts` directory inside the
+non-sandboxed languages with a request. Instead, scripts must be placed in the
-configuration directory (the directory where elasticsearch.yml is). Scripts
+`scripts` directory inside the configuration directory (the directory where
-placed into this directory will automatically be picked up and be available to
+elasticsearch.yml is). Scripts placed into this directory will automatically be
-be used. Once a script has been placed in this directory, it can be referenced
+picked up and be available to be used. Once a script has been placed in this
-by name. For example, a script called `calculate-score.mvel` can be referenced
+directory, it can be referenced by name. For example, a script called
-in a request like this:
+`calculate-score.groovy` can be referenced in a request like this:
 [source,sh]
 --------------------------------------------------
@ -36,13 +40,13 @@ config
 ├── elasticsearch.yml
 ├── logging.yml
 └── scripts
-    └── calculate-score.mvel
+    └── calculate-score.groovy
 --------------------------------------------------
 [source,sh]
 --------------------------------------------------
-$ cat config/scripts/calculate-score.mvel
+$ cat config/scripts/calculate-score.groovy
-Math.log(_score * 2) + my_modifier
+log(_score * 2) + my_modifier
 --------------------------------------------------
 [source,js]
@ -76,20 +80,92 @@ a script placed under `config/scripts/group1/group2/test.py` will be
 named `group1_group2_test`.
 [float]
-=== Default Scripting Language
+=== Indexed Scripts
 If dynamic scripting is enabled, Elasticsearch allows you to store scripts
 in an internal index known as `.scripts` and reference them by id. There are
 REST endpoints to manage indexed scripts as follows:
 Requests to the scripts endpoint look like :
 [source,js]
 -----------------------------------
 /_scripts/{lang}/{id}
 -----------------------------------
 Where the `lang` part is the language the script is in and the `id` part is the id
 of the script. In the `.scripts` index the type of the document will be set to the `lang`.
 [source,js]
 -----------------------------------
 curl -XPOST localhost:9200/_scripts/groovy/indexedCalculateScore -d '{
     "script": "log(_score * 2) + my_modifier"
 }'
 -----------------------------------
 This will create a document with id: `indexedCalculateScore` and type: `groovy` in the
 `.scripts` index. The type of the document is the language used by the script.
 This script can be accessed at query time by appending `_id` to
 the script parameter and passing the script id. So `script` becomes `script_id`.:
 [source,js]
 --------------------------------------------------
 curl -XPOST localhost:9200/_search -d '{
  "query": {
    "function_score": {
      "query": {
        "match": {
          "body": "foo"
        }
      },
      "functions": [
        {
          "script_score": {
            "script_id": "indexedCalculateScore",
            "lang" : "groovy",
            "params": {
              "my_modifier": 8
            }
          }
        }
      ]
    }
  }
 }'
 --------------------------------------------------
 Note that you must have dynamic scripting enabled to use indexed scripts
 at query time.
 The script can be viewed by:
 [source,js]
 -----------------------------------
 curl -XGET localhost:9200/_scripts/groovy/indexedCalculateScore
 -----------------------------------
 This is rendered as:
 [source,js]
 -----------------------------------
 '{
     "script": "log(_score * 2) + my_modifier"
 }'
 -----------------------------------
 Indexed scripts can be deleted by:
 [source,js]
 -----------------------------------
 curl -XDELETE localhost:9200/_scripts/groovy/indexedCalculateScore
 -----------------------------------
 The default scripting language (assuming no `lang` parameter is
 provided) is `mvel`. In order to change it set the `script.default_lang`
 to the appropriate language.
 [float]
 === Enabling dynamic scripting
-We recommend running Elasticsearch behind an application or proxy,
+We recommend running Elasticsearch behind an application or proxy, which
-which protects Elasticsearch from the outside world. If users are
+protects Elasticsearch from the outside world. If users are allowed to run
-allowed to run dynamic scripts (even in a search request), then they
+dynamic scripts (even in a search request), then they have the same access to
-have the same access to your box as the user that Elasticsearch is
+your box as the user that Elasticsearch is running as. For this reason dynamic
-running as. For this reason dynamic scripting is disabled by default.
+scripting is allowed only for sandboxed languages by default.
 First, you should not run Elasticsearch as the `root` user, as this would allow
 a script to access or do *anything* on your server, without limitations. Second,
@ -109,6 +185,54 @@ _native_ Java scripts registered through plugins, it also allows users to run
 arbitrary scripts via the API. Instead of sending the name of the file as the
 script, the body of the script can be sent instead.
 There are three possible configuration values for the `script.disable_dynamic`
 setting, the default value is `sandbox`:
 [cols="<,<",options="header",]
 |=======================================================================
 |Value |Description
 | `true` |all dynamic scripting is disabled, scripts must be placed in the `config/scripts` directory.
 | `false` |all dynamic scripting is enabled, scripts may be sent as strings in requests.
 | `sandbox` |scripts may be sent as strings for languages that are sandboxed.
 |=======================================================================
 [float]
 === Default Scripting Language
 The default scripting language (assuming no `lang` parameter is provided) is
 `groovy`. In order to change it, set the `script.default_lang` to the
 appropriate language.
 [float]
 === Groovy Sandboxing
 Elasticsearch sandboxes Groovy scripts that are compiled and executed in order
 to ensure they don't perform unwanted actions. There are a number of options
 that can be used for configuring this sandbox:
 `script.groovy.sandbox.receiver_whitelist`::
    Comma-separated list of string classes for objects that may have methods
    invoked.
 `script.groovy.sandbox.package_whitelist`::
    Comma-separated list of packages under which new objects may be constructed.
 `script.groovy.sandbox.class_whitelist`::
    Comma-separated list of classes that are allowed to be constructed.
 `script.groovy.sandbox.method_blacklist`::
    Comma-separated list of methods that are never allowed to be invoked,
    regardless of target object.
 `script.groovy.sandbox.enabled`::
    Flag to disable the sandbox (defaults to `true` meaning the sandbox is
    enabled).
 [float]
 === Automatic Script Reloading
@ -119,10 +243,11 @@ using `watcher.interval` setting, which defaults to `60s`.
 To disable script reloading completely set `script.auto_reload_enabled`
 to `false`.
 [[native-java-scripts]]
 [float]
 === Native (Java) Scripts
-Even though `mvel` is pretty fast, this allows to register native Java based
+Even though `groovy` is pretty fast, this allows to register native Java based
 scripts for faster execution.
 In order to allow for scripts, the `NativeScriptFactory` needs to be
@ -142,14 +267,43 @@ the name of the script as the `script`.
 Note, the scripts need to be in the classpath of elasticsearch. One
 simple way to do it is to create a directory under plugins (choose a
-descriptive name), and place the jar / classes files there, they will be
+descriptive name), and place the jar / classes files there. They will be
 automatically loaded.
 [float]
 === Lucene Expressions Scripts
 [WARNING]
 ========================
 This feature is *experimental* and subject to change in future versions.
 ========================
 Lucene's expressions module provides a mechanism to compile a
 `javascript` expression to bytecode.  This allows very fast execution,
 as if you had written a `native` script.  Expression scripts can be
 used in `script_score`, `script_fields`, sort scripts and numeric aggregation scripts.
 See the link:http://lucene.apache.org/core/4_9_0/expressions/index.html?org/apache/lucene/expressions/js/package-summary.html[expressions module documentation]
 for details on what operators and functions are available.
 Variables in `expression` scripts are available to access:
 * Single valued document fields, e.g. `doc['myfield'].value`
 * Parameters passed into the script, e.g. `mymodifier`
 * The current document's score, `_score` (only available when used in a `script_score`)
 There are a few limitations relative to other script languages:
 * Only numeric fields may be accessed
 * Stored fields are not available
 * If a field is sparse (only some documents contain a value), documents missing the field will have a value of `0`
 [float]
 === Score
-In all scripts that can be used in facets, allow to access the current
+In all scripts that can be used in facets, the current
-doc score using `doc.score`.
+document's score is accessible in `doc.score`.  When using a `script_score`,
 the current score is available in `_score`.
 [float]
 === Computing scores based on terms in scripts
@ -267,7 +421,7 @@ loaded for other purposes.
 [float]
-=== mvel Built In Functions
+=== Groovy Built In Functions
 There are several built in functions that can be used within scripts.
 They include:
@ -275,8 +429,6 @@ They include:
 [cols="<,<",options="header",]
 |=======================================================================
 |Function |Description
 |`time()` |The current time in milliseconds.
 |`sin(a)` |Returns the trigonometric sine of an angle.
 |`cos(a)` |Returns the trigonometric cosine of an angle.
@ -362,3 +514,4 @@ integer with the value of `8`, the result is `0` even though you were
 expecting it to be `0.125`. You may need to enforce precision by
 explicitly using a double like `1.0/num` in order to get the expected
 result.
--- a/docs/reference/modules/snapshots.asciidoc
+++ b/docs/reference/modules/snapshots.asciidoc
@ -132,9 +132,9 @@ Snapshotting process is executed in non-blocking fashion. All indexing and searc
 executed against the index that is being snapshotted. However, a snapshot represents the point-in-time view of the index
 at the moment when snapshot was created, so no records that were added to the index after snapshot process had started
 will be present in the snapshot. The snapshot process starts immediately for the primary shards that has been started
-and are not relocating at the moment. Before version 1.2.0 the snapshot operation fails if cluster has any relocating or
+and are not relocating at the moment. Before version 1.2.0, the snapshot operation fails if the cluster has any relocating or
 initializing primaries of indices participating in the snapshot. Starting with version 1.2.0, Elasticsearch waits for
-are relocating or initializing shards to start before snapshotting them.
+relocation or initialization of shards to complete before snapshotting them.
 Besides creating a copy of each index the snapshot process can also store global cluster metadata, which includes persistent
 cluster settings and templates. The transient settings and registered snapshot repositories are not stored as part of
@ -189,6 +189,7 @@ should be restored as well as prevent global cluster state from being restored b
 <<search-multi-index-type,multi index syntax>>. The `rename_pattern` and `rename_replacement` options can be also used to
 rename index on restore using regular expression that supports referencing the original text as explained
 http://docs.oracle.com/javase/6/docs/api/java/util/regex/Matcher.html#appendReplacement(java.lang.StringBuffer,%20java.lang.String)[here].
 Set `include_aliases` to `false` to prevent aliases from being restored together with associated indices added[1.3.0].
 [source,js]
 -----------------------------------
@ -207,6 +208,16 @@ didn't exist in the cluster. If cluster state is restored, the restored template
 cluster are added and existing templates with the same name are replaced by the restored templates. The restored
 persistent settings are added to the existing persistent settings.
 [float]
 === Partial restore
 added[1.3.0]
 By default, entire restore operation will fail if one or more indices participating in the operation don't have
 snapshots of all shards available. It can occur if some shards failed to snapshot for example. It is still possible to
 restore such indices by setting `partial` to `true`. Please note, that only successfully snapshotted shards will be
 restored in this case and all missing shards will be recreated empty.
 [float]
 === Snapshot status
--- a/docs/reference/query-dsl/filters/geohash-cell-filter.asciidoc
+++ b/docs/reference/query-dsl/filters/geohash-cell-filter.asciidoc
@ -64,7 +64,7 @@ next to the given cell.
 [float]
 ==== Caching
-coming[1.3.0]
+added[1.3.0]
 The result of the filter is not cached by default. The
 `_cache` parameter can be set to `true` to turn caching on.
--- a/docs/reference/query-dsl/filters/has-child-filter.asciidoc
+++ b/docs/reference/query-dsl/filters/has-child-filter.asciidoc
@ -45,7 +45,7 @@ The `has_child` filter also accepts a filter instead of a query:
 [float]
 ==== Min/Max Children
-coming[1.3.0]
+added[1.3.0]
 The `has_child` filter allows you to specify that a minimum and/or maximum
 number of children are required to match for the parent doc to be considered
@ -75,13 +75,24 @@ is specified.
 [float]
 ==== Memory Considerations
-With the current implementation, all `_parent` field values and all `_id`
+In order to support parent-child joins, all of the (string) parent IDs 
-field values of parent documents are loaded into memory (heap) via field data
+must be resident in memory (in the <<index-modules-fielddata,field data cache>>. 
-in order to support fast lookups, so make sure there is enough memory for it.
+Additionaly, every child document is mapped to its parent using a long 
 value (approximately). It is advisable to keep the string parent ID short
 in order to reduce memory usage.
 You can check how much memory is being used by the ID cache using the
 <<indices-stats,indices stats>> or <<cluster-nodes-stats,nodes stats>>
 APIS, eg:
 [source,js]
 --------------------------------------------------
 curl -XGET "http://localhost:9200/_stats/id_cache?pretty&human"
 --------------------------------------------------
 [float]
 ==== Caching
 The `has_child` filter cannot be cached in the filter cache. The `_cache`
 and `_cache_key` options are a no-op in this filter. Also any filter that
-wraps the `has_child` filter either directly or indirectly will not be cached.
+wraps the `has_child` filter either directly or indirectly will not be cached.
--- a/docs/reference/query-dsl/filters/has-parent-filter.asciidoc
+++ b/docs/reference/query-dsl/filters/has-parent-filter.asciidoc
@ -46,11 +46,22 @@ The `has_parent` filter also accepts a filter instead of a query:
 --------------------------------------------------
 [float]
-==== Memory considerations
+==== Memory Considerations
-With the current implementation, all `_parent` field values and all `_id`
+In order to support parent-child joins, all of the (string) parent IDs 
-field values of parent documents are loaded into memory (heap) via field data
+must be resident in memory (in the <<index-modules-fielddata,field data cache>>. 
-in order to support fast lookups, so make sure there is enough memory for it.
+Additionaly, every child document is mapped to its parent using a long 
 value (approximately). It is advisable to keep the string parent ID short
 in order to reduce memory usage.
 You can check how much memory is being used by the ID cache using the
 <<indices-stats,indices stats>> or <<cluster-nodes-stats,nodes stats>>
 APIS, eg:
 [source,js]
 --------------------------------------------------
 curl -XGET "http://localhost:9200/_stats/id_cache?pretty&human"
 --------------------------------------------------
 [float]
 ==== Caching
--- a/docs/reference/query-dsl/queries/filtered-query.asciidoc
+++ b/docs/reference/query-dsl/queries/filtered-query.asciidoc
@ -1,54 +1,159 @@
 [[query-dsl-filtered-query]]
 === Filtered Query
-A query that applies a filter to the results of another query. This
+The `filtered` query is used to combine another query with any
-query maps to Lucene `FilteredQuery`.
+<<query-dsl-filters,filter>>. Filters are usually faster than queries because:
 *   they don't have to calculate the relevance `_score` for each document --
    the answer is just a boolean ``Yes, the document matches the filter'' or
    ``No, the document does not match the filter''.
 *   the results from most filters can be cached in memory, making subsequent
    executions faster.
 TIP: Exclude as many document as you can with a filter, then query just the
 documents that remain.
 [source,js]
 --------------------------------------------------
 {
-    "filtered" : {
+  "filtered": {
-        "query" : {
+    "query": {
-            "term" : { "tag" : "wow" }
+      "match": { "tweet": "full text search" }
-        },
+    },
-        "filter" : {
+    "filter": {
-            "range" : {
+      "range": { "created": { "gte": "now - 1d / d" }}
                "age" : { "from" : 10, "to" : 20 }
            }
        }
    }
  }
 }
 --------------------------------------------------
-The filter object can hold only filter elements, not queries. Filters
+The `filtered` query can be used wherever a `query` is expected, for instance,
-can be much faster compared to queries since they don't perform any
+to use the above example in search request:
-scoring, especially when they are cached.
+
 [source,js]
 --------------------------------------------------
 curl -XGET localhost:9200/_search -d '
 {
  "query": {
    "filtered": { <1>
      "query": {
        "match": { "tweet": "full text search" }
      },
      "filter": {
        "range": { "created": { "gte": "now - 1d / d" }}
      }
    }
  }
 }
 '
 --------------------------------------------------
 <1> The `filtered` query is passed as the value of the `query`
    parameter in the search request.
 ==== Filtering without a query
 If a `query` is not specified, it defaults to the
 <<query-dsl-match-all-query,`match_all` query>>.  This means that the
 `filtered` query can be used to wrap just a filter, so that it can be used
 wherever a query is expected.
 [source,js]
 --------------------------------------------------
 curl -XGET localhost:9200/_search -d '
 {
  "query": {
    "filtered": { <1>
      "filter": {
        "range": { "created": { "gte": "now - 1d / d" }}
      }
    }
  }
 }
 '
 --------------------------------------------------
 <1> No `query` has been specfied, so this request applies just the filter,
   returning all documents created since yesterday.
 ==== Multiple filters
 Multiple filters can be applied by wrapping them in a
 <<query-dsl-bool-filter,`bool` filter>>, for example:
 [source,js]
 --------------------------------------------------
 {
  "filtered": {
    "query": { "match": { "tweet": "full text search" }},
    "filter": {
      "bool": {
        "must": { "range": { "created": { "gte": "now - 1d / d" }}},
        "should": [
          { "term": { "featured": true }},
          { "term": { "starred":  true }}
        ],
        "must_not": { "term": { "deleted": false }}
      }
    }
  }
 }
 --------------------------------------------------
 Similarly, multiple queries can be combined with a
 <<query-dsl-bool-query,`bool` query>>.
 ==== Filter strategy
-The filtered query allows to configure how to intersect the filter with the query:
+You can control how the filter and query are executed with the `strategy`
 parameter:
 [source,js]
 --------------------------------------------------
 {
    "filtered" : {
-        "query" : {
+        "query" :   { ... },
-            // query definition
+        "filter" :  { ... ],
        },
        "filter" : {
            // filter definition
        },
        "strategy": "leap_frog"
    }
 }
 --------------------------------------------------
-[horizontal]
+IMPORTANT: This is an _expert-level_ setting.  Most users can simply ignore it.
 `leap_frog_query_first`::      Look for the first document matching the query, and then alternatively advance the query and the filter to find common matches.
 `leap_frog_filter_first`::     Look for the first document matching the filter, and then alternatively advance the query and the filter to find common matches.
 `leap_frog`::                  Same as `leap_frog_query_first`.
 `query_first`::                If the filter supports random access, then search for documents using the query, and then consult the filter to check whether there is a match. Otherwise fall back to `leap_frog_query_first`.
 `random_access_${threshold}`:: If the filter supports random access and if there is at least one matching document among the first `threshold` ones, then apply the filter first. Otherwise fall back to `leap_frog_query_first`. `${threshold}` must be greater than or equal to `1`.
 `random_access_always`::       Apply the filter first if it supports random access. Otherwise fall back to `leap_frog_query_first`.
-The default strategy is to use `query_first` on filters that are not advanceable such as geo filters and script filters, and `random_access_100` on other filters.
+The `strategy` parameter accepts the following options:
 [horizontal]
 `leap_frog_query_first`::
        Look for the first document matching the query, and then alternatively
        advance the query and the filter to find common matches.
 `leap_frog_filter_first`::
    Look for the first document matching the filter, and then alternatively
    advance the query and the filter to find common matches.
 `leap_frog`::
    Same as `leap_frog_query_first`.
 `query_first`::
    If the filter supports random access, then search for documents using the
    query, and then consult the filter to check whether there is a match.
    Otherwise fall back to `leap_frog_query_first`.
 `random_access_${threshold}`::
    If the filter supports random access and if there is at least one matching
    document among the first `threshold` ones, then apply the filter first.
    Otherwise fall back to `leap_frog_query_first`. `${threshold}` must be
    greater than or equal to `1`.
 `random_access_always`::
    Apply the filter first if it supports random access. Otherwise fall back
    to `leap_frog_query_first`.
 The default strategy is to use `query_first` on filters that are not
 advanceable such as geo filters and script filters, and `random_access_100` on
 other filters.
--- a/docs/reference/query-dsl/queries/function-score-query.asciidoc
+++ b/docs/reference/query-dsl/queries/function-score-query.asciidoc
@ -57,7 +57,7 @@ given filter:
 If no filter is given with a function this is equivalent to specifying
 `"match_all": {}`
-First, each document is scored by the defined functons. The parameter
+First, each document is scored by the defined functions. The parameter
 `score_mode` specifies how the computed scores are combined:
 [horizontal]
@ -151,6 +151,9 @@ that is initialized with a `seed`.
 --------------------------------------------------
 ===== Field Value factor
 added[1.2.0]
 The `field_value_factor` function allows you to use a field from a document to
 influence the score. It's similar to using the `script_score` function, however,
 it avoids the overhead of scripting. If used on a multi-valued field, only the
@ -270,18 +273,33 @@ Normal decay, computed as:
 +
 image:images/Gaussian.png[]
 where image:images/sigma.png[] is computed to assure that the score takes the value `decay` at distance `scale` from `origin`+-`offset`
 image:images/sigma_calc.png[]
 [horizontal]
 `exp`::
 Exponential decay, computed as:
 +
 image:images/Exponential.png[]
 where again the parameter image:images/lambda.png[] is computed to assure that the score takes the value `decay` at distance `scale` from `origin`+-`offset`
 image:images/lambda_calc.png[]
 [horizontal]
 `linear`::
 Linear decay, computed as:
 +
 image:images/Linear.png[].
-+
+
 where again the parameter `s` is computed to assure that the score takes the value `decay` at distance `scale` from `origin`+-`offset`
 image:images/s_calc.png[]
 In contrast to the normal and exponential decay, this function actually
 sets the score to 0 if the field value exceeds twice the user given
 scale value.
--- a/docs/reference/query-dsl/queries/has-child-query.asciidoc
+++ b/docs/reference/query-dsl/queries/has-child-query.asciidoc
@ -56,7 +56,7 @@ inside the `has_child` query:
 [float]
 ==== Min/Max Children
-coming[1.3.0]
+added[1.3.0]
 The `has_child` query allows you to specify that a minimum and/or maximum
 number of children are required to match for the parent doc to be considered
@ -86,6 +86,19 @@ the `score_mode` parameter.
 [float]
 ==== Memory Considerations
-With the current implementation, all `_parent` field values and all `_id`
+In order to support parent-child joins, all of the (string) parent IDs 
-field values of parent documents are loaded into memory (heap) via field data
+must be resident in memory (in the <<index-modules-fielddata,field data cache>>. 
-in order to support fast lookups, so make sure there is enough memory for it.
+Additionaly, every child document is mapped to its parent using a long 
 value (approximately). It is advisable to keep the string parent ID short
 in order to reduce memory usage.
 You can check how much memory is being used by the ID cache using the
 <<indices-stats,indices stats>> or <<cluster-nodes-stats,nodes stats>>
 APIS, eg:
 [source,js]
 --------------------------------------------------
 curl -XGET "http://localhost:9200/_stats/id_cache?pretty&human"
 --------------------------------------------------
--- a/docs/reference/query-dsl/queries/has-parent-query.asciidoc
+++ b/docs/reference/query-dsl/queries/has-parent-query.asciidoc
@ -52,6 +52,19 @@ matching parent document. The score type can be specified with the
 [float]
 ==== Memory Considerations
-With the current implementation, all `_parent` field values and all `_id`
+In order to support parent-child joins, all of the (string) parent IDs 
-field values of parent documents are loaded into memory (heap) via field data
+must be resident in memory (in the <<index-modules-fielddata,field data cache>>. 
-in order to support fast lookups, so make sure there is enough memory for it.
+Additionaly, every child document is mapped to its parent using a long 
 value (approximately). It is advisable to keep the string parent ID short
 in order to reduce memory usage.
 You can check how much memory is being used by the ID cache using the
 <<indices-stats,indices stats>> or <<cluster-nodes-stats,nodes stats>>
 APIS, eg:
 [source,js]
 --------------------------------------------------
 curl -XGET "http://localhost:9200/_stats/id_cache?pretty&human"
 --------------------------------------------------
--- a/docs/reference/query-dsl/queries/match-query.asciidoc
+++ b/docs/reference/query-dsl/queries/match-query.asciidoc
@ -98,13 +98,6 @@ The `cutoff_frequency` can either be relative to the number of documents
 in the index if in the range `[0..1)` or absolute if greater or equal to
 `1.0`.
 Note: If the `cutoff_frequency` is used and the operator is `and`
 _stacked tokens_ (tokens that are on the same position like `synonym` filter emits)
 are not handled gracefully as they are in a pure `and` query. For instance the query
 `fast fox` is analyzed into 3 terms `[fast, quick, fox]` where `quick` is a synonym
 for `fast` on the same token positions the query might require `fast` and `quick` to
 match if the operator is `and`.
 Here is an example showing a query composed of stopwords exclusivly:
 [source,js]
--- a/docs/reference/query-dsl/queries/multi-match-query.asciidoc
+++ b/docs/reference/query-dsl/queries/multi-match-query.asciidoc
@ -25,7 +25,7 @@ Fields can be specified with wildcards, eg:
 --------------------------------------------------
 {
  "multi_match" : {
-    "query":    "Will Smith"
+    "query":    "Will Smith",
    "fields": [ "title", "*_name" ] <1>
  }
 }
--- a/docs/reference/query-dsl/queries/nested-query.asciidoc
+++ b/docs/reference/query-dsl/queries/nested-query.asciidoc
@ -52,7 +52,7 @@ fields referenced inside the query must use the complete path (fully
 qualified).
 The `score_mode` allows to set how inner children matching affects
-scoring of parent. It defaults to `avg`, but can be `total`, `max` and
+scoring of parent. It defaults to `avg`, but can be `sum`, `max` and
 `none`.
 Multi level nesting is automatically supported, and detected, resulting
--- a/docs/reference/query-dsl/queries/query-string-syntax.asciidoc
+++ b/docs/reference/query-dsl/queries/query-string-syntax.asciidoc
@ -23,8 +23,10 @@ search terms, but it is possible to specify other fields in the query syntax:
    status:active
-* where the `title` field contains `quick` or `brown`
+* where the `title` field contains `quick` or `brown`.
  If you omit the OR operator the default operator will be used
    title:(quick OR brown)
    title:(quick brown)
 * where the `author` field contains the exact phrase `"john smith"`
@ -133,7 +135,7 @@ curly brackets `{min TO max}`.
 * All days in 2012:
-    date:[2012/01/01 TO 2012/12/31]
+    date:[2012-01-01 TO 2012-12-31]
 * Numbers 1..5
@ -149,7 +151,7 @@ curly brackets `{min TO max}`.
 * Dates before 2012
-    date:{* TO 2012/01/01}
+    date:{* TO 2012-01-01}
 Curly and square brackets can be combined:
--- a/docs/reference/query-dsl/queries/regexp-query.asciidoc
+++ b/docs/reference/query-dsl/queries/regexp-query.asciidoc
@ -3,6 +3,9 @@
 The `regexp` query allows you to use regular expression term queries.
 See <<regexp-syntax>> for details of the supported regular expression language.
 The "term queries" in that first sentence means that Elasticsearch will apply
 the regexp to the terms produced by the tokenizer for that field, and not
 to the original text of the field.
 *Note*: The performance of a `regexp` query heavily depends on the
 regular expression chosen. Matching everything like `.*` is very slow as
@ -49,7 +52,7 @@ You can also use special flags
 Possible flags are `ALL`, `ANYSTRING`, `AUTOMATON`, `COMPLEMENT`,
 `EMPTY`, `INTERSECTION`, `INTERVAL`, or `NONE`. Please check the
-http://lucene.apache.org/core/4_3_0/core/index.html?org%2Fapache%2Flucene%2Futil%2Fautomaton%2FRegExp.html[Lucene
+http://lucene.apache.org/core/4_9_0/core/org/apache/lucene/util/automaton/RegExp.html[Lucene
 documentation] for their meaning
--- a/docs/reference/query-dsl/queries/template-query.asciidoc
+++ b/docs/reference/query-dsl/queries/template-query.asciidoc
@ -95,6 +95,46 @@ which is then turned into:
 }
 ------------------------------------------
 added[1.3.0]
 You can register a template by storing it in the elasticsearch index `.scripts` or by using the REST API. (See <<search-template>> for more details)
 In order to execute the stored template, reference it by name in the `query`
 parameter:
 [source,js]
 ------------------------------------------
 GET /_search
 {
    "query": {
        "template": {
            "query": "templateName", <1>
            "params" : {
                "template" : "all"
            }
        }
    }
 }
 ------------------------------------------
 <1> Name of the the query template stored in the index.
 [source,js]
 ------------------------------------------
 GET /_search
 {
    "query": {
        "template": {
            "query": "storedTemplate", <1>
            "params" : {
                "template" : "all"
            }
        }
    }
 }
 ------------------------------------------
 There is also a dedicated `template` endpoint, allows you to template an entire search request.
 Please see <<search-template>> for more details.
--- a/docs/reference/query-dsl/queries/top-children-query.asciidoc
+++ b/docs/reference/query-dsl/queries/top-children-query.asciidoc
@ -66,6 +66,19 @@ same scope name that will work against the child documents. For example:
 [float]
 ==== Memory Considerations
-With the current implementation, all `_parent` field values and all `_id`
+In order to support parent-child joins, all of the (string) parent IDs 
-field values of parent documents are loaded into memory (heap) via field data
+must be resident in memory (in the <<index-modules-fielddata,field data cache>>. 
-in order to support fast lookups, so make sure there is enough memory for it.
+Additionaly, every child document is mapped to its parent using a long 
 value (approximately). It is advisable to keep the string parent ID short
 in order to reduce memory usage.
 You can check how much memory is being used by the ID cache using the
 <<indices-stats,indices stats>> or <<cluster-nodes-stats,nodes stats>>
 APIS, eg:
 [source,js]
 --------------------------------------------------
 curl -XGET "http://localhost:9200/_stats/id_cache?pretty&human"
 --------------------------------------------------
--- a/docs/reference/search.asciidoc
+++ b/docs/reference/search.asciidoc
@ -83,10 +83,12 @@ include::search/request-body.asciidoc[]
 include::search/search-template.asciidoc[]
-include::search/facets.asciidoc[]
+include::search/search-shards.asciidoc[]
 include::search/aggregations.asciidoc[]
 include::search/facets.asciidoc[]
 include::search/suggesters.asciidoc[]
 include::search/multi-search.asciidoc[]
--- a/docs/reference/search/aggregations/bucket/geodistance-aggregation.asciidoc
+++ b/docs/reference/search/aggregations/bucket/geodistance-aggregation.asciidoc
@ -82,7 +82,7 @@ By default, the distance unit is `km` but it can also accept: `mi` (miles), `in`
 <1> The distances will be computed as miles
-There are two distance calculation modes: `sloppy_arc` (the default), `arc` (most accurate) and `plane` (fastest). The `arc` calculation is the most accurate one but also the more expensive one in terms of performance. The `sloppy_arc` is faster but less accurate. The `plane` is the fastest but least accurate distance function. Consider using `plane` when your search context is "narrow" and spans smaller geographical areas (like cities or even countries). `plane` may return higher error mergins for searches across very large areas (e.g. cross continent search). The distance calculation type can be set using the `distance_type` parameter:
+There are three distance calculation modes: `sloppy_arc` (the default), `arc` (most accurate) and `plane` (fastest). The `arc` calculation is the most accurate one but also the more expensive one in terms of performance. The `sloppy_arc` is faster but less accurate. The `plane` is the fastest but least accurate distance function. Consider using `plane` when your search context is "narrow" and spans smaller geographical areas (like cities or even countries). `plane` may return higher error mergins for searches across very large areas (e.g. cross continent search). The distance calculation type can be set using the `distance_type` parameter:
 [source,js]
 --------------------------------------------------
--- a/docs/reference/search/aggregations/bucket/histogram-aggregation.asciidoc
+++ b/docs/reference/search/aggregations/bucket/histogram-aggregation.asciidoc
@ -142,7 +142,7 @@ Example:
 --------------------------------------------------
 {
    "query" : {
-        "filtered" : { "range" : { "price" : { "to" : "500" } } }
+        "filtered" : { "filter": { "range" : { "price" : { "to" : "500" } } } }
    },
    "aggs" : {
        "prices" : {
--- a/docs/reference/search/aggregations/bucket/nested-aggregation.asciidoc
+++ b/docs/reference/search/aggregations/bucket/nested-aggregation.asciidoc
@ -34,7 +34,7 @@ The following aggregations will return the minimum price products can be purchas
 {
    "query" : {
        "match" : { "name" : "led tv" }
-    }
+    },
    "aggs" : {
        "resellers" : {
            "nested" : {
--- a/docs/reference/search/aggregations/bucket/significantterms-aggregation.asciidoc
+++ b/docs/reference/search/aggregations/bucket/significantterms-aggregation.asciidoc
@ -194,10 +194,7 @@ where a simple `terms` aggregation would typically show the very popular "consta
 .How are the scores calculated?
 **********************************
-The numbers returned for scores are primarily intended for ranking different suggestions sensibly rather than something easily understood by end users.
+The numbers returned for scores are primarily intended for ranking different suggestions sensibly rather than something easily understood by end users. The scores are derived from the doc frequencies in _foreground_ and _background_ sets. In brief, a term is considered significant if there is a noticeable difference in the frequency in which a term appears in the subset and in the background. The way the terms are ranked can be configured, see "Parameters" section.
 The scores are derived from the doc frequencies in _foreground_ and _background_ sets. The _absolute_ change in popularity (foregroundPercent - backgroundPercent) would favour
 common terms whereas the _relative_ change in popularity (foregroundPercent/ backgroundPercent) would favour rare terms.
 Rare vs common is essentially a precision vs recall balance and so the absolute and relative changes are multiplied to provide a sweet spot between precision and recall.
 **********************************
@ -282,7 +279,35 @@ However, the `size` and `shard size` settings covered in the next section provid
 ==== Parameters
 ===== JLH score
 The scores are derived from the doc frequencies in _foreground_ and _background_ sets. The _absolute_ change in popularity (foregroundPercent - backgroundPercent) would favor common terms whereas the _relative_ change in popularity (foregroundPercent/ backgroundPercent) would favor rare terms. Rare vs common is essentially a precision vs recall balance and so the absolute and relative changes are multiplied to provide a sweet spot between precision and recall.
 ===== mutual information
 added[1.3.0]
 Mutual information as described in "Information Retrieval", Manning et al., Chapter 13.5.1 can be used as significance score by adding the parameter
 [source,js]
 --------------------------------------------------
 	 "mutual_information": {
 	      "include_negatives": true
 	 }
 --------------------------------------------------
 Mutual information does not differentiate between terms that are descriptive for the subset or for documents outside the subset. The significant terms therefore can contain terms that appear more or less frequent in the subset than outside the subset. To filter out the terms that appear less often in the subset than in documents outside the subset, `include_negatives` can be set to `false`. 
 Per default, the assumption is that the documents in the bucket are also contained in the background. If instead you defined a custom background filter that represents a different set of documents that you want to compare to, set 
 [source,js]
 --------------------------------------------------
 "background_is_superset": false
 --------------------------------------------------
 ===== Size & Shard Size
 The `size` parameter can be set to define how many term buckets should be returned out of the overall terms list. By
@ -338,7 +363,7 @@ Terms that score highly will be collected on a shard level and merged with the t
 added[1.2.0] `shard_min_doc_count` parameter
-The parameter `shard_min_doc_count` regulates the _certainty_ a shard has if the term should actually be added to the candidate list or not with respect to the `min_doc_count`. Terms will only be considered if their local shard frequency within the set is higher than the `shard_min_doc_count`. If your dictionary contains many low frequent words and you are not interested in these (for example misspellings), then you can set the `shard_min_doc_count` parameter to filter out candidate terms on a shard level that will with a resonable certainty not reach the required `min_doc_count` even after merging the local frequencies. `shard_min_doc_count` is set to `1` per default and has no effect unless you explicitly set it.
+The parameter `shard_min_doc_count` regulates the _certainty_ a shard has if the term should actually be added to the candidate list or not with respect to the `min_doc_count`. Terms will only be considered if their local shard frequency within the set is higher than the `shard_min_doc_count`. If your dictionary contains many low frequent words and you are not interested in these (for example misspellings), then you can set the `shard_min_doc_count` parameter to filter out candidate terms on a shard level that will with a reasonable certainty not reach the required `min_doc_count` even after merging the local frequencies. `shard_min_doc_count` is set to `1` per default and has no effect unless you explicitly set it.
--- a/docs/reference/search/aggregations/bucket/terms-aggregation.asciidoc
+++ b/docs/reference/search/aggregations/bucket/terms-aggregation.asciidoc
@ -43,7 +43,7 @@ Response:
 By default, the `terms` aggregation will return the buckets for the top ten terms ordered by the `doc_count`. One can
 change this default behaviour by setting the `size` parameter.
-==== Size & Shard Size
+==== Size
 The `size` parameter can be set to define how many term buckets should be returned out of the overall terms list. By
 default, the node coordinating the search process will request each shard to provide its own top `size` term buckets
@ -52,6 +52,87 @@ This means that if the number of unique terms is greater than `size`, the return
 (it could be that the term counts are slightly off and it could even be that a term that should have been in the top
 size buckets was not returned). If set to `0`, the `size` will be set to `Integer.MAX_VALUE`.
 ==== Document counts are approximate
 As described above, the document counts (and the results of any sub aggregations) in the terms aggregation are not always 
 accurate.  This is because each shard provides its own view of what the ordered list of terms should be and these are 
 combined to give a final view. Consider the following scenario:
 A request is made to obtain the top 5 terms in the field product, ordered by descending document count from an index with 
 3 shards. In this case each shard is asked to give its top 5 terms. 
 [source,js]
 --------------------------------------------------
 {
    "aggs" : {
        "products" : {
            "terms" : { 
                "field" : "product",
                "size" : 5
            }
        }
    }
 }
 --------------------------------------------------
 The terms for each of the three shards are shown below with their 
 respective document counts in brackets:
 [width="100%",cols="^2,^2,^2,^2",options="header"]
 |=========================================================
 |    | Shard A        | Shard B        | Shard C
 | 1  | Product A (25) | Product A (30) | Product A (45) 
 | 2  | Product B (18) | Product B (25) | Product C (44) 
 | 3  | Product C (6)  | Product F (17) | Product Z (36) 
 | 4  | Product D (3)  | Product Z (16) | Product G (30) 
 | 5  | Product E (2)  | Product G (15) | Product E (29) 
 | 6  | Product F (2)  | Product H (14) | Product H (28)  
 | 7  | Product G (2)  | Product I (10) | Product Q (2)  
 | 8  | Product H (2)  | Product Q (6)  | Product D (1)  
 | 9  | Product I (1)  | Product J (8)  | 
 | 10 | Product J (1)  | Product C (4)  | 
 |=========================================================
 The shards will return their top 5 terms so the results from the shards will be:
 [width="100%",cols="^2,^2,^2,^2",options="header"]
 |=========================================================
 |    | Shard A        | Shard B        | Shard C
 | 1  | Product A (25) | Product A (30) | Product A (45) 
 | 2  | Product B (18) | Product B (25) | Product C (44) 
 | 3  | Product C (6)  | Product F (17) | Product Z (36) 
 | 4  | Product D (3)  | Product Z (16) | Product G (30) 
 | 5  | Product E (2)  | Product G (15) | Product E (29) 
 |=========================================================
 Taking the top 5 results from each of the shards (as requested) and combining them to make a final top 5 list produces 
 the following:
 [width="40%",cols="^2,^2"]
 |=========================================================
 | 1  | Product A (100) 
 | 2  | Product Z (52) 
 | 3  | Product C (50) 
 | 4  | Product G (45) 
 | 5  | Product B (43) 
 |=========================================================
 Because Product A was returned from all shards we know that its document count value is accurate. Product C was only 
 returned by shards A and C so its document count is shown as 50 but this is not an accurate count. Product C exists on 
 shard B, but its count of 4 was not high enough to put Product C into the top 5 list for that shard. Product Z was also 
 returned only by 2 shards but the third shard does not contain the term. There is no way of knowing, at the point of 
 combining the results to produce the final list of terms, that there is an error in the document count for Product C and 
 not for Product Z. Product H has a document count of 44 across all 3 shards but was not included in the final list of 
 terms because it did not make it into the top five terms on any of the shards.
 ==== Shard Size
 The higher the requested `size` is, the more accurate the results will be, but also, the more expensive it will be to
 compute the final results (both due to bigger priority queues that are managed on a shard level and due to bigger data
@ -70,6 +151,81 @@ NOTE:   `shard_size` cannot be smaller than `size` (as it doesn't make much sens
 added[1.1.0] It is possible to not limit the number of terms that are returned by setting `size` to `0`. Don't use this
 on high-cardinality fields as this will kill both your CPU since terms need to be return sorted, and your network.
 ==== Calculating Document Count Error
 coming[1.4.0] 
 There are two error values which can be shown on the terms aggregation.  The first gives a value for the aggregation as 
 a whole which represents the maximum potential document count for a term which did not make it into the final list of 
 terms. This is calculated as the sum of the document count from the last term returned from each shard .For the example 
 given above the value would be 46 (2 + 15 + 29). This means that in the worst case scenario a term which was not returned 
 could have the 4th highest document count.
 [source,js]
 --------------------------------------------------
 {
    ...
    "aggregations" : {
        "products" : {
            "doc_count_error_upper_bound" : 46,
            "buckets" : [
                {
                    "key" : "Product A",
                    "doc_count" : 100
                },
                {
                    "key" : "Product Z",
                    "doc_count" : 52
                },
                ...
            ]
        }
    }
 }
 --------------------------------------------------
 The second error value can be enabled by setting the `show_term_doc_count_error` parameter to true. This shows an error value 
 for each term returned by the aggregation which represents the 'worst case' error in the document count and can be useful when 
 deciding on a value for the `shard_size` parameter. This is calculated by summing the document counts for the last term returned 
 by all shards which did not return the term. In the example above the error in the document count for Product C would be 15 as 
 Shard B was the only shard not to return the term and the document count of the last termit did return was 15. The actual document 
 count of Product C was 54 so the document count was only actually off by 4 even though the worst case was that it would be off by 
 15.  Product A, however has an error of 0 for its document count, since every shard returned it we can be confident that the count 
 returned is accurate.
 [source,js]
 --------------------------------------------------
 {
    ...
    "aggregations" : {
        "products" : {
            "doc_count_error_upper_bound" : 46,
            "buckets" : [
                {
                    "key" : "Product A",
                    "doc_count" : 100,
                    "doc_count_error_upper_bound" : 0
                },
                {
                    "key" : "Product Z",
                    "doc_count" : 52,
                    "doc_count_error_upper_bound" : 2
                },
                ...
            ]
        }
    }
 }
 --------------------------------------------------
 These errors can only be calculated in this way when the terms are ordered by descending document count. When the aggregation is 
 ordered by the terms values themselves (either ascending or descending) there is no error in the document count since if a shard 
 does not return a particular term which appears in the results from another shard, it must not have that term in its index. When the 
 aggregation is either sorted by a sub aggregation or in order of ascending document count, the error in the document counts cannot be 
 determined and is given a value of -1 to indicate this.
 ==== Order
 The order of the buckets can be customized by setting the `order` parameter. By default, the buckets are ordered by
@ -322,7 +478,7 @@ http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#UNIX_LINES
 ==== Collect mode
-coming[1.3.0] Deferring calculation of child aggregations
+added[1.3.0] Deferring calculation of child aggregations
 For fields with many unique terms and a small number of required results it can be more efficient to delay the calculation
 of child aggregations until the top parent-level aggs have been pruned. Ordinarily, all branches of the aggregation tree
@ -395,15 +551,32 @@ this would typically be too costly in terms of RAM.
 ==== Execution hint
-added[1.2.0] The `global_ordinals` execution mode
+added[1.2.0] Added the `global_ordinals`, `global_ordinals_hash` and `global_ordinals_low_cardinality` execution modes
-There are three mechanisms by which terms aggregations can be executed: either by using field values directly in order to aggregate
+deprecated[1.3.0] Removed the `ordinals` execution mode
-data per-bucket (`map`), by using ordinals of the field values instead of the values themselves (`ordinals`) or by using global
+
-ordinals of the field (`global_ordinals`). The latter is faster, especially for fields with many unique
+There are different mechanisms by which terms aggregations can be executed:
-values. However it can be slower if only a few documents match, when for example a terms aggregator is nested in another
+
-aggregator, this applies for both `ordinals` and `global_ordinals` execution modes. Elasticsearch tries to have sensible
+ - by using field values directly in order to aggregate data per-bucket (`map`)
-defaults when it comes to the execution mode that should be used, but  in case you know that one execution mode may
+ - by using ordinals of the field and preemptively allocating one bucket per ordinal value (`global_ordinals`)
-perform better than the other one, you have the ability to "hint" it to Elasticsearch:
+ - by using ordinals of the field and dynamically allocating one bucket per ordinal value (`global_ordinals_hash`)
 - by using per-segment ordinals to compute counts and remap these counts to global counts using global ordinals (`global_ordinals_low_cardinality`)
 Elasticsearch tries to have sensible defaults so this is something that generally doesn't need to be configured.
 `map` should only be considered when very few documents match a query. Otherwise the ordinals-based execution modes
 are significantly faster. By default, `map` is only used when running an aggregation on scripts, since they don't have
 ordinals.
 `global_ordinals_low_cardinality` only works for leaf terms aggregations but is usually the fastest execution mode. Memory
 usage is linear with the number of unique values in the field, so it is only enabled by default on low-cardinality fields.
 `global_ordinals` is the second fastest option, but the fact that it preemptively allocates buckets can be memory-intensive,
 especially if you have one or more sub aggregations. It is used by default on top-level terms aggregations.
 `global_ordinals_hash` on the contrary to `global_ordinals` and `global_ordinals_low_cardinality` allocates buckets dynamically
 so memory usage is linear to the number of values of the documents that are part of the aggregation scope. It is used by default
 in inner aggregations.
 [source,js]
 --------------------------------------------------
@ -419,6 +592,6 @@ perform better than the other one, you have the ability to "hint" it to Elastics
 }
 --------------------------------------------------
-<1> the possible values are `map`, `ordinals` and `global_ordinals`
+<1> the possible values are `map`, `global_ordinals`, `global_ordinals_hash` and `global_ordinals_low_cardinality`
-Please note that Elasticsearch will ignore this execution hint if it is not applicable.
+Please note that Elasticsearch will ignore this execution hint if it is not applicable and that there is no backward compatibility guarantee on these hints.
--- a/docs/reference/search/aggregations/metrics.asciidoc
+++ b/docs/reference/search/aggregations/metrics.asciidoc
@ -16,6 +16,8 @@ include::metrics/valuecount-aggregation.asciidoc[]
 include::metrics/percentile-aggregation.asciidoc[]
 include::metrics/percentile-rank-aggregation.asciidoc[]
 include::metrics/cardinality-aggregation.asciidoc[]
 include::metrics/geobounds-aggregation.asciidoc[]
--- a/Show More
+++ b/Show More
`@ -75,3 +75,4 @@ include::mapping/conf-mappings.asciidoc[]`

	`include::mapping/meta.asciidoc[]`	`include::mapping/meta.asciidoc[]`

		`include::mapping/transform.asciidoc[]`