Merge branch 'master' into require_units

Conflicts:
	src/main/java/org/elasticsearch/action/bulk/BulkRequest.java
	src/main/java/org/elasticsearch/cluster/metadata/MetaDataIndexUpgradeService.java
	src/main/java/org/elasticsearch/node/internal/InternalSettingsPreparer.java
	src/test/java/org/elasticsearch/snapshots/DedicatedClusterSnapshotRestoreTests.java
This commit is contained in:
Michael McCandless 2015-06-04 13:59:10 -04:00 committed by mikemccand
commit e1197dfea9
871 changed files with 27552 additions and 11765 deletions

2
.gitignore vendored
View File

@ -37,4 +37,4 @@ eclipse-build
nb-configuration.xml
nbactions.xml
/dependency-reduced-pom.xml
dependency-reduced-pom.xml

View File

@ -57,7 +57,7 @@
# Maven will replace the project.name with elasticsearch below. If that
# hasn't been done, we assume that this is not a packaged version and the
# user has forgotten to run Maven to create a package.
IS_PACKAGED_VERSION='${project.name}'
IS_PACKAGED_VERSION='${project.artifactId}'
if [ "$IS_PACKAGED_VERSION" != "elasticsearch" ]; then
cat >&2 << EOF
Error: You must build the project with Maven or download a pre-built package

View File

@ -103,4 +103,6 @@ if [ -e "$CONF_FILE" ]; then
esac
fi
export HOSTNAME=`hostname -s`
exec "$JAVA" $JAVA_OPTS $ES_JAVA_OPTS -Xmx64m -Xms16m -Delasticsearch -Des.path.home="$ES_HOME" $properties -cp "$ES_HOME/lib/*" org.elasticsearch.plugins.PluginManager $args

View File

@ -9,6 +9,8 @@ for %%I in ("%SCRIPT_DIR%..") do set ES_HOME=%%~dpfI
TITLE Elasticsearch Plugin Manager ${project.version}
SET HOSTNAME=%COMPUTERNAME%
"%JAVA_HOME%\bin\java" %JAVA_OPTS% %ES_JAVA_OPTS% -Xmx64m -Xms16m -Des.path.home="%ES_HOME%" -cp "%ES_HOME%/lib/*;" "org.elasticsearch.plugins.PluginManager" %*
goto finally

View File

@ -4,6 +4,10 @@ rootLogger: ${es.logger.level}, console, file
logger:
# log action execution errors for easier debugging
action: DEBUG
# deprecation logging, turn to DEBUG to see them
deprecation: INFO, deprecation_log_file
# reduce the logging for aws, too much is logged under the default INFO
com.amazonaws: WARN
org.apache.http: INFO
@ -24,6 +28,7 @@ logger:
additivity:
index.search.slowlog: false
index.indexing.slowlog: false
deprecation: false
appender:
console:
@ -51,6 +56,14 @@ appender:
#type: pattern
#conversionPattern: "[%d{ISO8601}][%-5p][%-25c] %m%n"
deprecation_log_file:
type: dailyRollingFile
file: ${path.logs}/${cluster.name}_deprecation.log
datePattern: "'.'yyyy-MM-dd"
layout:
type: pattern
conversionPattern: "[%d{ISO8601}][%-5p][%-25c] %m%n"
index_search_slow_log_file:
type: dailyRollingFile
file: ${path.logs}/${cluster.name}_index_search_slowlog.log

View File

@ -30,6 +30,7 @@ import socket
import urllib.request
import subprocess
from functools import partial
from http.client import HTTPConnection
from http.client import HTTPSConnection
@ -72,6 +73,11 @@ PLUGINS = [('license', 'elasticsearch/license/latest'),
LOG = env.get('ES_RELEASE_LOG', '/tmp/elasticsearch_release.log')
# console colors
COLOR_OK = '\033[92m'
COLOR_END = '\033[0m'
COLOR_FAIL = '\033[91m'
def log(msg):
log_plain('\n%s' % msg)
@ -137,9 +143,6 @@ def get_tag_hash(tag):
def get_current_branch():
return os.popen('git rev-parse --abbrev-ref HEAD 2>&1').read().strip()
verify_java_version('1.7') # we require to build with 1.7
verify_mvn_java_version('1.7', MVN)
# Utility that returns the name of the release branch for a given version
def release_branch(version):
return 'release_branch_%s' % version
@ -545,14 +548,6 @@ def print_sonatype_notice():
</settings>
""")
def check_s3_credentials():
if not env.get('AWS_ACCESS_KEY_ID', None) or not env.get('AWS_SECRET_ACCESS_KEY', None):
raise RuntimeError('Could not find "AWS_ACCESS_KEY_ID" / "AWS_SECRET_ACCESS_KEY" in the env variables please export in order to upload to S3')
def check_gpg_credentials():
if not env.get('GPG_KEY_ID', None) or not env.get('GPG_PASSPHRASE', None):
raise RuntimeError('Could not find "GPG_KEY_ID" / "GPG_PASSPHRASE" in the env variables please export in order to sign the packages (also make sure that GPG_KEYRING is set when not in ~/.gnupg)')
def check_command_exists(name, cmd):
try:
subprocess.check_output(cmd, shell=True, stderr=subprocess.STDOUT)
@ -562,9 +557,6 @@ def check_command_exists(name, cmd):
VERSION_FILE = 'src/main/java/org/elasticsearch/Version.java'
POM_FILE = 'pom.xml'
# we print a notice if we can not find the relevant infos in the ~/.m2/settings.xml
print_sonatype_notice()
# finds the highest available bwc version to test against
def find_bwc_version(release_version, bwc_dir='backwards'):
log(' Lookup bwc version in directory [%s]' % bwc_dir)
@ -618,6 +610,60 @@ def check_norelease(path='src'):
if pattern.search(line):
raise RuntimeError('Found //norelease comment in %s line %s' % (full_path, line_number))
def run_and_print(text, run_function):
try:
print(text, end='')
run_function()
print(COLOR_OK + 'OK' + COLOR_END)
return True
except RuntimeError:
print(COLOR_FAIL + 'NOT OK' + COLOR_END)
return False
def check_env_var(text, env_var):
try:
print(text, end='')
env[env_var]
print(COLOR_OK + 'OK' + COLOR_END)
return True
except KeyError:
print(COLOR_FAIL + 'NOT OK' + COLOR_END)
return False
def check_environment_and_commandline_tools(check_only):
checks = list()
checks.append(check_env_var('Checking for AWS env configuration AWS_SECRET_ACCESS_KEY_ID... ', 'AWS_SECRET_ACCESS_KEY'))
checks.append(check_env_var('Checking for AWS env configuration AWS_ACCESS_KEY_ID... ', 'AWS_ACCESS_KEY_ID'))
checks.append(check_env_var('Checking for SONATYPE env configuration SONATYPE_USERNAME... ', 'SONATYPE_USERNAME'))
checks.append(check_env_var('Checking for SONATYPE env configuration SONATYPE_PASSWORD... ', 'SONATYPE_PASSWORD'))
checks.append(check_env_var('Checking for GPG env configuration GPG_KEY_ID... ', 'GPG_KEY_ID'))
checks.append(check_env_var('Checking for GPG env configuration GPG_PASSPHRASE... ', 'GPG_PASSPHRASE'))
checks.append(check_env_var('Checking for S3 repo upload env configuration S3_BUCKET_SYNC_TO... ', 'S3_BUCKET_SYNC_TO'))
checks.append(check_env_var('Checking for git env configuration GIT_AUTHOR_NAME... ', 'GIT_AUTHOR_NAME'))
checks.append(check_env_var('Checking for git env configuration GIT_AUTHOR_EMAIL... ', 'GIT_AUTHOR_EMAIL'))
checks.append(run_and_print('Checking command: rpm... ', partial(check_command_exists, 'rpm', 'rpm --version')))
checks.append(run_and_print('Checking command: dpkg... ', partial(check_command_exists, 'dpkg', 'dpkg --version')))
checks.append(run_and_print('Checking command: gpg... ', partial(check_command_exists, 'gpg', 'gpg --version')))
checks.append(run_and_print('Checking command: expect... ', partial(check_command_exists, 'expect', 'expect -v')))
checks.append(run_and_print('Checking command: createrepo... ', partial(check_command_exists, 'createrepo', 'createrepo --version')))
checks.append(run_and_print('Checking command: s3cmd... ', partial(check_command_exists, 's3cmd', 's3cmd --version')))
checks.append(run_and_print('Checking command: apt-ftparchive... ', partial(check_command_exists, 'apt-ftparchive', 'apt-ftparchive --version')))
# boto, check error code being returned
location = os.path.dirname(os.path.realpath(__file__))
command = 'python %s/upload-s3.py -h' % (location)
checks.append(run_and_print('Testing boto python dependency... ', partial(check_command_exists, 'python-boto', command)))
checks.append(run_and_print('Checking java version... ', partial(verify_java_version, '1.7')))
checks.append(run_and_print('Checking java mvn version... ', partial(verify_mvn_java_version, '1.7', MVN)))
if check_only:
sys.exit(0)
if False in checks:
print("Exiting due to failing checks")
sys.exit(0)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Builds and publishes a Elasticsearch Release')
@ -636,9 +682,12 @@ if __name__ == '__main__':
help='Smoke tests the given release')
parser.add_argument('--bwc', '-w', dest='bwc', metavar='backwards', default='backwards',
help='Backwards compatibility version path to use to run compatibility tests against')
parser.add_argument('--check-only', dest='check_only', action='store_true',
help='Checks and reports for all requirements and then exits')
parser.set_defaults(dryrun=True)
parser.set_defaults(smoke=None)
parser.set_defaults(check_only=False)
args = parser.parse_args()
bwc_path = args.bwc
src_branch = args.branch
@ -649,18 +698,19 @@ if __name__ == '__main__':
build = not args.smoke
smoke_test_version = args.smoke
check_environment_and_commandline_tools(args.check_only)
# we print a notice if we can not find the relevant infos in the ~/.m2/settings.xml
print_sonatype_notice()
# we require to build with 1.7
verify_java_version('1.7')
verify_mvn_java_version('1.7', MVN)
if os.path.exists(LOG):
raise RuntimeError('please remove old release log %s first' % LOG)
check_gpg_credentials()
check_command_exists('gpg', 'gpg --version')
check_command_exists('expect', 'expect -v')
if not dry_run:
check_s3_credentials()
check_command_exists('createrepo', 'createrepo --version')
check_command_exists('s3cmd', 's3cmd --version')
check_command_exists('apt-ftparchive', 'apt-ftparchive --version')
print('WARNING: dryrun is set to "false" - this will push and publish the release')
input('Press Enter to continue...')

View File

@ -33,6 +33,7 @@ my @Groups = qw(
);
my %Group_Labels = (
breaking => 'Breaking changes',
build => 'Build',
deprecation => 'Deprecations',
doc => 'Docs',
feature => 'New features',
@ -70,6 +71,14 @@ sub dump_issues {
$month++;
$year += 1900;
print <<"HTML";
<html>
<head>
<meta charset="UTF-8">
</head>
<body>
HTML
for my $group ( @Groups, 'other' ) {
my $group_issues = $issues->{$group} or next;
print "<h2>$Group_Labels{$group}</h2>\n\n<ul>\n";
@ -115,6 +124,7 @@ sub dump_issues {
print "</ul>";
print "\n\n";
}
print "</body></html>\n";
}
#===================================

View File

@ -30,10 +30,10 @@ MetricsAggregationBuilder aggregation =
AggregationBuilders
.scriptedMetric("agg")
.initScript("_agg['heights'] = []")
.mapScript("if (doc['gender'].value == \"male\") " +
.mapScript(new Script("if (doc['gender'].value == \"male\") " +
"{ _agg.heights.add(doc['height'].value) } " +
"else " +
"{ _agg.heights.add(-1 * doc['height'].value) }");
"{ _agg.heights.add(-1 * doc['height'].value) }"));
--------------------------------------------------
You can also specify a `combine` script which will be executed on each shard:
@ -43,12 +43,12 @@ You can also specify a `combine` script which will be executed on each shard:
MetricsAggregationBuilder aggregation =
AggregationBuilders
.scriptedMetric("agg")
.initScript("_agg['heights'] = []")
.mapScript("if (doc['gender'].value == \"male\") " +
.initScript(new Script("_agg['heights'] = []"))
.mapScript(new Script("if (doc['gender'].value == \"male\") " +
"{ _agg.heights.add(doc['height'].value) } " +
"else " +
"{ _agg.heights.add(-1 * doc['height'].value) }")
.combineScript("heights_sum = 0; for (t in _agg.heights) { heights_sum += t }; return heights_sum");
"{ _agg.heights.add(-1 * doc['height'].value) }"))
.combineScript(new Script("heights_sum = 0; for (t in _agg.heights) { heights_sum += t }; return heights_sum"));
--------------------------------------------------
You can also specify a `reduce` script which will be executed on the node which gets the request:
@ -58,13 +58,13 @@ You can also specify a `reduce` script which will be executed on the node which
MetricsAggregationBuilder aggregation =
AggregationBuilders
.scriptedMetric("agg")
.initScript("_agg['heights'] = []")
.mapScript("if (doc['gender'].value == \"male\") " +
.initScript(new Script("_agg['heights'] = []"))
.mapScript(new Script("if (doc['gender'].value == \"male\") " +
"{ _agg.heights.add(doc['height'].value) } " +
"else " +
"{ _agg.heights.add(-1 * doc['height'].value) }")
.combineScript("heights_sum = 0; for (t in _agg.heights) { heights_sum += t }; return heights_sum")
.reduceScript("heights_sum = 0; for (a in _aggs) { heights_sum += a }; return heights_sum");
"{ _agg.heights.add(-1 * doc['height'].value) }"))
.combineScript(new Script("heights_sum = 0; for (t in _agg.heights) { heights_sum += t }; return heights_sum"))
.reduceScript(new Script("heights_sum = 0; for (a in _aggs) { heights_sum += a }; return heights_sum"));
--------------------------------------------------

View File

@ -22,7 +22,7 @@ Or you can use `prepareUpdate()` method:
[source,java]
--------------------------------------------------
client.prepareUpdate("ttl", "doc", "1")
.setScript("ctx._source.gender = \"male\"" <1> , ScriptService.ScriptType.INLINE)
.setScript(new Script("ctx._source.gender = \"male\"" <1> , ScriptService.ScriptType.INLINE, null, null))
.get();
client.prepareUpdate("ttl", "doc", "1")
@ -46,7 +46,7 @@ The update API allows to update a document based on a script provided:
[source,java]
--------------------------------------------------
UpdateRequest updateRequest = new UpdateRequest("ttl", "doc", "1")
.script("ctx._source.gender = \"male\"");
.script(new Script("ctx._source.gender = \"male\""));
client.update(updateRequest).get();
--------------------------------------------------

View File

@ -73,8 +73,6 @@ Some aggregations work on values extracted from the aggregated documents. Typica
a specific document field which is set using the `field` key for the aggregations. It is also possible to define a
<<modules-scripting,`script`>> which will generate the values (per document).
TIP: The `script` parameter expects an inline script. Use `script_id` for indexed scripts and `script_file` for scripts in the `config/scripts/` directory.
When both `field` and `script` settings are configured for the aggregation, the script will be treated as a
`value script`. While normal scripts are evaluated on a document level (i.e. the script has access to all the data
associated with the document), value scripts are evaluated on the *value* level. In this mode, the values are extracted

View File

@ -128,8 +128,6 @@ It is also possible to customize the key for each range:
==== Script
TIP: The `script` parameter expects an inline script. Use `script_id` for indexed scripts and `script_file` for scripts in the `config/scripts/` directory.
[source,js]
--------------------------------------------------
{
@ -148,6 +146,33 @@ TIP: The `script` parameter expects an inline script. Use `script_id` for indexe
}
--------------------------------------------------
This will interpret the `script` parameter as an `inline` script with the default script language and no script parameters. To use a file script use the following syntax:
[source,js]
--------------------------------------------------
{
"aggs" : {
"price_ranges" : {
"range" : {
"script" : {
"file": "my_script",
"params": {
"field": "price"
}
},
"ranges" : [
{ "to" : 50 },
{ "from" : 50, "to" : 100 },
{ "from" : 100 }
]
}
}
}
}
--------------------------------------------------
TIP: for indexed scripts replace the `file` parameter with an `id` parameter.
==== Value Script
Lets say the product prices are in USD but we would like to get the price ranges in EURO. We can use value script to convert the prices prior the aggregation (assuming conversion rate of 0.8)

View File

@ -358,13 +358,6 @@ Customized scores can be implemented via a script:
--------------------------------------------------
Scripts can be inline (as in above example), indexed or stored on disk. For details on the options, see <<modules-scripting, script documentation>>.
Parameters need to be set as follows:
[horizontal]
`script`:: Inline script, name of script file or name of indexed script. Mandatory.
`script_type`:: One of "inline" (default), "indexed" or "file".
`lang`:: Script language (default "groovy")
`params`:: Script parameters (default empty).
Available parameters in the script are

View File

@ -441,7 +441,27 @@ Generating the terms using a script:
}
--------------------------------------------------
TIP: The `script` parameter expects an inline script. Use `script_id` for indexed scripts and `script_file` for scripts in the `config/scripts/` directory.
This will interpret the `script` parameter as an `inline` script with the default script language and no script parameters. To use a file script use the following syntax:
[source,js]
--------------------------------------------------
{
"aggs" : {
"genders" : {
"terms" : {
"script" : {
"file": "my_script",
"params": {
"field": "gender"
}
}
}
}
}
}
--------------------------------------------------
TIP: for indexed scripts replace the `file` parameter with an `id` parameter.
==== Value Script

View File

@ -47,7 +47,29 @@ Computing the average grade based on a script:
}
--------------------------------------------------
TIP: The `script` parameter expects an inline script. Use `script_id` for indexed scripts and `script_file` for scripts in the `config/scripts/` directory.
This will interpret the `script` parameter as an `inline` script with the default script language and no script parameters. To use a file script use the following syntax:
[source,js]
--------------------------------------------------
{
...,
"aggs" : {
"avg_grade" : {
"avg" : {
"script" : {
"file": "my_script",
"params": {
"field": "grade"
}
}
}
}
}
}
--------------------------------------------------
TIP: for indexed scripts replace the `file` parameter with an `id` parameter.
===== Value Script
@ -63,9 +85,11 @@ It turned out that the exam was way above the level of the students and a grade
"avg_corrected_grade" : {
"avg" : {
"field" : "grade",
"script" : "_value * correction",
"params" : {
"correction" : 1.2
"script" : {
"inline": "_value * correction",
"params" : {
"correction" : 1.2
}
}
}
}

View File

@ -153,7 +153,28 @@ however since hashes need to be computed on the fly.
}
--------------------------------------------------
TIP: The `script` parameter expects an inline script. Use `script_id` for indexed scripts and `script_file` for scripts in the `config/scripts/` directory.
This will interpret the `script` parameter as an `inline` script with the default script language and no script parameters. To use a file script use the following syntax:
[source,js]
--------------------------------------------------
{
"aggs" : {
"author_count" : {
"cardinality" : {
"script" : {
"file": "my_script",
"params": {
"first_name_field": "author.first_name",
"last_name_field": "author.last_name"
}
}
}
}
}
}
--------------------------------------------------
TIP: for indexed scripts replace the `file` parameter with an `id` parameter.
==== Missing value

View File

@ -91,7 +91,29 @@ Computing the grades stats based on a script:
}
--------------------------------------------------
TIP: The `script` parameter expects an inline script. Use `script_id` for indexed scripts and `script_file` for scripts in the `config/scripts/` directory.
This will interpret the `script` parameter as an `inline` script with the default script language and no script parameters. To use a file script use the following syntax:
[source,js]
--------------------------------------------------
{
...,
"aggs" : {
"grades_stats" : {
"extended_stats" : {
"script" : {
"file": "my_script",
"params": {
"field": "grade"
}
}
}
}
}
}
--------------------------------------------------
TIP: for indexed scripts replace the `file` parameter with an `id` parameter.
===== Value Script
@ -107,9 +129,11 @@ It turned out that the exam was way above the level of the students and a grade
"grades_stats" : {
"extended_stats" : {
"field" : "grade",
"script" : "_value * correction",
"params" : {
"correction" : 1.2
"script" : {
"inline": "_value * correction",
"params" : {
"correction" : 1.2
}
}
}
}

View File

@ -44,7 +44,27 @@ Computing the max price value across all document, this time using a script:
}
--------------------------------------------------
TIP: The `script` parameter expects an inline script. Use `script_id` for indexed scripts and `script_file` for scripts in the `config/scripts/` directory.
This will interpret the `script` parameter as an `inline` script with the default script language and no script parameters. To use a file script use the following syntax:
[source,js]
--------------------------------------------------
{
"aggs" : {
"max_price" : {
"max" : {
"script" : {
"file": "my_script",
"params": {
"field": "price"
}
}
}
}
}
}
--------------------------------------------------
TIP: for indexed scripts replace the `file` parameter with an `id` parameter.
==== Value Script
@ -57,9 +77,11 @@ Let's say that the prices of the documents in our index are in USD, but we would
"max_price_in_euros" : {
"max" : {
"field" : "price",
"script" : "_value * conversion_rate",
"params" : {
"conversion_rate" : 1.2
"script" : {
"inline": "_value * conversion_rate",
"params" : {
"conversion_rate" : 1.2
}
}
}
}

View File

@ -44,7 +44,27 @@ Computing the min price value across all document, this time using a script:
}
--------------------------------------------------
TIP: The `script` parameter expects an inline script. Use `script_id` for indexed scripts and `script_file` for scripts in the `config/scripts/` directory.
This will interpret the `script` parameter as an `inline` script with the default script language and no script parameters. To use a file script use the following syntax:
[source,js]
--------------------------------------------------
{
"aggs" : {
"min_price" : {
"min" : {
"script" : {
"file": "my_script",
"params": {
"field": "price"
}
}
}
}
}
}
--------------------------------------------------
TIP: for indexed scripts replace the `file` parameter with an `id` parameter.
==== Value Script
@ -57,9 +77,11 @@ Let's say that the prices of the documents in our index are in USD, but we would
"min_price_in_euros" : {
"min" : {
"field" : "price",
"script" : "_value * conversion_rate",
"params" : {
"conversion_rate" : 1.2
"script" :
"inline": "_value * conversion_rate",
"params" : {
"conversion_rate" : 1.2
}
}
}
}

View File

@ -100,9 +100,11 @@ a script to convert them on-the-fly:
"aggs" : {
"load_time_outlier" : {
"percentiles" : {
"script" : "doc['load_time'].value / timeUnit", <1>
"params" : {
"timeUnit" : 1000 <2>
"script" : {
"inline": "doc['load_time'].value / timeUnit", <1>
"params" : {
"timeUnit" : 1000 <2>
}
}
}
}
@ -113,7 +115,27 @@ a script to convert them on-the-fly:
script to generate values which percentiles are calculated on
<2> Scripting supports parameterized input just like any other script
TIP: The `script` parameter expects an inline script. Use `script_id` for indexed scripts and `script_file` for scripts in the `config/scripts/` directory.
This will interpret the `script` parameter as an `inline` script with the default script language and no script parameters. To use a file script use the following syntax:
[source,js]
--------------------------------------------------
{
"aggs" : {
"load_time_outlier" : {
"percentiles" : {
"script" : {
"file": "my_script",
"params" : {
"timeUnit" : 1000
}
}
}
}
}
}
--------------------------------------------------
TIP: for indexed scripts replace the `file` parameter with an `id` parameter.
[[search-aggregations-metrics-percentile-aggregation-approximation]]
==== Percentiles are (usually) approximate

View File

@ -72,9 +72,11 @@ a script to convert them on-the-fly:
"load_time_outlier" : {
"percentile_ranks" : {
"values" : [3, 5],
"script" : "doc['load_time'].value / timeUnit", <1>
"params" : {
"timeUnit" : 1000 <2>
"script" : {
"inline": "doc['load_time'].value / timeUnit", <1>
"params" : {
"timeUnit" : 1000 <2>
}
}
}
}
@ -85,7 +87,28 @@ a script to convert them on-the-fly:
script to generate values which percentile ranks are calculated on
<2> Scripting supports parameterized input just like any other script
TIP: The `script` parameter expects an inline script. Use `script_id` for indexed scripts and `script_file` for scripts in the `config/scripts/` directory.
This will interpret the `script` parameter as an `inline` script with the default script language and no script parameters. To use a file script use the following syntax:
[source,js]
--------------------------------------------------
{
"aggs" : {
"load_time_outlier" : {
"percentile_ranks" : {
"values" : [3, 5],
"script" : {
"file": "my_script",
"params" : {
"timeUnit" : 1000
}
}
}
}
}
}
--------------------------------------------------
TIP: for indexed scripts replace the `file` parameter with an `id` parameter.
==== Missing value
@ -108,3 +131,4 @@ had a value.
--------------------------------------------------
<1> Documents without a value in the `grade` field will fall into the same bucket as documents that have the value `10`.

View File

@ -45,6 +45,42 @@ The response for the above aggregation:
}
--------------------------------------------------
The above example can also be specified using file scripts as follows:
[source,js]
--------------------------------------------------
{
"query" : {
"match_all" : {}
},
"aggs": {
"profit": {
"scripted_metric": {
"init_script" : {
"file": "my_init_script"
},
"map_script" : {
"file": "my_map_script"
},
"combine_script" : {
"file": "my_combine_script"
},
"params": {
"field": "amount" <1>
},
"reduce_script" : {
"file": "my_reduce_script"
},
}
}
}
}
--------------------------------------------------
<1> script parameters for init, map and combine scripts must be specified in a global `params` object so that it can be share between the scripts
For more details on specifying scripts see <<modules-scripting, script documentation>>.
==== Scope of scripts
The scripted metric aggregation uses scripts at 4 stages of its execution:
@ -225,13 +261,4 @@ params:: Optional. An object whose contents will be passed as variable
--------------------------------------------------
reduce_params:: Optional. An object whose contents will be passed as variables to the `reduce_script`. This can be useful to allow the user to control
the behavior of the reduce phase. If this is not specified the variable will be undefined in the reduce_script execution.
lang:: Optional. The script language used for the scripts. If this is not specified the default scripting language is used.
init_script_file:: Optional. Can be used in place of the `init_script` parameter to provide the script using in a file.
init_script_id:: Optional. Can be used in place of the `init_script` parameter to provide the script using an indexed script.
map_script_file:: Optional. Can be used in place of the `map_script` parameter to provide the script using in a file.
map_script_id:: Optional. Can be used in place of the `map_script` parameter to provide the script using an indexed script.
combine_script_file:: Optional. Can be used in place of the `combine_script` parameter to provide the script using in a file.
combine_script_id:: Optional. Can be used in place of the `combine_script` parameter to provide the script using an indexed script.
reduce_script_file:: Optional. Can be used in place of the `reduce_script` parameter to provide the script using in a file.
reduce_script_id:: Optional. Can be used in place of the `reduce_script` parameter to provide the script using an indexed script.

View File

@ -53,7 +53,29 @@ Computing the grades stats based on a script:
}
--------------------------------------------------
TIP: The `script` parameter expects an inline script. Use `script_id` for indexed scripts and `script_file` for scripts in the `config/scripts/` directory.
This will interpret the `script` parameter as an `inline` script with the default script language and no script parameters. To use a file script use the following syntax:
[source,js]
--------------------------------------------------
{
...,
"aggs" : {
"grades_stats" : {
"stats" : {
"script" : {
"file": "my_script",
"params" : {
"field" : "grade"
}
}
}
}
}
}
--------------------------------------------------
TIP: for indexed scripts replace the `file` parameter with an `id` parameter.
===== Value Script
@ -69,9 +91,11 @@ It turned out that the exam was way above the level of the students and a grade
"grades_stats" : {
"stats" : {
"field" : "grade",
"script" : "_value * correction",
"params" : {
"correction" : 1.2
"script" :
"inline": "_value * correction",
"params" : {
"correction" : 1.2
}
}
}
}

View File

@ -55,7 +55,29 @@ Computing the intraday return based on a script:
}
--------------------------------------------------
TIP: The `script` parameter expects an inline script. Use `script_id` for indexed scripts and `script_file` for scripts in the `config/scripts/` directory.
This will interpret the `script` parameter as an `inline` script with the default script language and no script parameters. To use a file script use the following syntax:
[source,js]
--------------------------------------------------
{
...,
"aggs" : {
"intraday_return" : {
"sum" : {
"script" : {
"file": "my_script",
"params" : {
"field" : "change"
}
}
}
}
}
}
--------------------------------------------------
TIP: for indexed scripts replace the `file` parameter with an `id` parameter.
===== Value Script
@ -71,7 +93,8 @@ Computing the sum of squares over all stock tick changes:
"daytime_return" : {
"sum" : {
"field" : "change",
"script" : "_value * _value" }
"script" : "_value * _value"
}
}
}
}

View File

@ -48,4 +48,26 @@ Counting the values generated by a script:
}
--------------------------------------------------
TIP: The `script` parameter expects an inline script. Use `script_id` for indexed scripts and `script_file` for scripts in the `config/scripts/` directory.
This will interpret the `script` parameter as an `inline` script with the default script language and no script parameters. To use a file script use the following syntax:
[source,js]
--------------------------------------------------
{
...,
"aggs" : {
"grades_count" : {
"value_count" : {
"script" : {
"file": "my_script",
"params" : {
"field" : "grade"
}
}
}
}
}
}
--------------------------------------------------
TIP: for indexed scripts replace the `file` parameter with an `id` parameter.

View File

@ -180,11 +180,11 @@ The default value of `alpha` is `0.5`, and the setting accepts any float from 0-
[[single_0.2alpha]]
.Single Exponential moving average with window of size 10, alpha = 0.2
.EWMA with window of size 10, alpha = 0.2
image::images/pipeline_movavg/single_0.2alpha.png[]
[[single_0.7alpha]]
.Single Exponential moving average with window of size 10, alpha = 0.7
.EWMA with window of size 10, alpha = 0.7
image::images/pipeline_movavg/single_0.7alpha.png[]
==== Holt-Linear
@ -223,13 +223,111 @@ to see. Small values emphasize long-term trends (such as a constant linear tren
values emphasize short-term trends. This will become more apparently when you are predicting values.
[[double_0.2beta]]
.Double Exponential moving average with window of size 100, alpha = 0.5, beta = 0.2
.Holt-Linear moving average with window of size 100, alpha = 0.5, beta = 0.2
image::images/pipeline_movavg/double_0.2beta.png[]
[[double_0.7beta]]
.Double Exponential moving average with window of size 100, alpha = 0.5, beta = 0.7
.Holt-Linear moving average with window of size 100, alpha = 0.5, beta = 0.7
image::images/pipeline_movavg/double_0.7beta.png[]
==== Holt-Winters
The `holt_winters` model (aka "triple exponential") incorporates a third exponential term which
tracks the seasonal aspect of your data. This aggregation therefore smooths based on three components: "level", "trend"
and "seasonality".
The level and trend calculation is identical to `holt` The seasonal calculation looks at the difference between
the current point, and the point one period earlier.
Holt-Winters requires a little more handholding than the other moving averages. You need to specify the "periodicity"
of your data: e.g. if your data has cyclic trends every 7 days, you would set `period: 7`. Similarly if there was
a monthly trend, you would set it to `30`. There is currently no periodicity detection, although that is planned
for future enhancements.
There are two varieties of Holt-Winters: additive and multiplicative.
===== "Cold Start"
Unfortunately, due to the nature of Holt-Winters, it requires two periods of data to "bootstrap" the algorithm. This
means that your `window` must always be *at least* twice the size of your period. An exception will be thrown if it
isn't. It also means that Holt-Winters will not emit a value for the first `2 * period` buckets; the current algorithm
does not backcast.
[[holt_winters_cold_start]]
.Holt-Winters showing a "cold" start where no values are emitted
image::images/pipeline_movavg/triple_untruncated.png[]
Because the "cold start" obscures what the moving average looks like, the rest of the Holt-Winters images are truncated
to not show the "cold start". Just be aware this will always be present at the beginning of your moving averages!
===== Additive Holt-Winters
Additive seasonality is the default; it can also be specified by setting `"type": "add"`. This variety is preferred
when the seasonal affect is additive to your data. E.g. you could simply subtract the seasonal effect to "de-seasonalize"
your data into a flat trend.
The default value of `alpha`, `beta` and `gamma` is `0.5`, and the settings accept any float from 0-1 inclusive.
The default value of `period` is `1`.
[source,js]
--------------------------------------------------
{
"the_movavg":{
"moving_avg":{
"buckets_path": "the_sum",
"model" : "holt_winters",
"settings" : {
"type" : "add",
"alpha" : 0.5,
"beta" : 0.5,
"gamma" : 0.5,
"period" : 7
}
}
}
--------------------------------------------------
[[holt_winters_add]]
.Holt-Winters moving average with window of size 120, alpha = 0.5, beta = 0.7, gamma = 0.3, period = 30
image::images/pipeline_movavg/triple.png[]
===== Multiplicative Holt-Winters
Multiplicative is specified by setting `"type": "mult"`. This variety is preferred when the seasonal affect is
multiplied against your data. E.g. if the seasonal affect is x5 the data, rather than simply adding to it.
The default value of `alpha`, `beta` and `gamma` is `0.5`, and the settings accept any float from 0-1 inclusive.
The default value of `period` is `1`.
[WARNING]
======
Multiplicative Holt-Winters works by dividing each data point by the seasonal value. This is problematic if any of
your data is zero, or if there are gaps in the data (since this results in a divid-by-zero). To combat this, the
`mult` Holt-Winters pads all values by a very small amount (1*10^-10^) so that all values are non-zero. This affects
the result, but only minimally. If your data is non-zero, or you prefer to see `NaN` when zero's are encountered,
you can disable this behavior with `pad: false`
======
[source,js]
--------------------------------------------------
{
"the_movavg":{
"moving_avg":{
"buckets_path": "the_sum",
"model" : "holt_winters",
"settings" : {
"type" : "mult",
"alpha" : 0.5,
"beta" : 0.5,
"gamma" : 0.5,
"period" : 7,
"pad" : true
}
}
}
--------------------------------------------------
==== Prediction
All the moving average model support a "prediction" mode, which will attempt to extrapolate into the future given the
@ -263,7 +361,7 @@ value, we can extrapolate based on local constant trends (in this case the predi
of the series was heading in a downward direction):
[[double_prediction_local]]
.Double Exponential moving average with window of size 100, predict = 20, alpha = 0.5, beta = 0.8
.Holt-Linear moving average with window of size 100, predict = 20, alpha = 0.5, beta = 0.8
image::images/pipeline_movavg/double_prediction_local.png[]
In contrast, if we choose a small `beta`, the predictions are based on the global constant trend. In this series, the
@ -272,3 +370,10 @@ global trend is slightly positive, so the prediction makes a sharp u-turn and be
[[double_prediction_global]]
.Double Exponential moving average with window of size 100, predict = 20, alpha = 0.5, beta = 0.1
image::images/pipeline_movavg/double_prediction_global.png[]
The `holt_winters` model has the potential to deliver the best predictions, since it also incorporates seasonal
fluctuations into the model:
[[holt_winters_prediction_global]]
.Holt-Winters moving average with window of size 120, predict = 25, alpha = 0.8, beta = 0.2, gamma = 0.7, period = 30
image::images/pipeline_movavg/triple_prediction.png[]

View File

@ -5,6 +5,7 @@ An analyzer of type `custom` that allows to combine a `Tokenizer` with
zero or more `Token Filters`, and zero or more `Char Filters`. The
custom analyzer accepts a logical/registered name of the tokenizer to
use, and a list of logical/registered names of token filters.
The name of the custom analyzer must not start with "_".
The following are settings that can be set for a `custom` analyzer type:

View File

@ -81,6 +81,113 @@ being consumed by a monitoring tool, rather than intended for human
consumption. The default for the `human` flag is
`false`.
[float]
=== Response Filtering
All REST APIs accept a `filter_path` parameter that can be used to reduce
the response returned by elasticsearch. This parameter takes a comma
separated list of filters expressed with the dot notation:
[source,sh]
--------------------------------------------------
curl -XGET 'localhost:9200/_search?pretty&filter_path=took,hits.hits._id,hits.hits._score'
{
"took" : 3,
"hits" : {
"hits" : [
{
"_id" : "3640",
"_score" : 1.0
},
{
"_id" : "3642",
"_score" : 1.0
}
]
}
}
--------------------------------------------------
It also supports the `*` wildcard character to match any field or part
of a field's name:
[source,sh]
--------------------------------------------------
curl -XGET 'localhost:9200/_nodes/stats?filter_path=nodes.*.ho*'
{
"nodes" : {
"lvJHed8uQQu4brS-SXKsNA" : {
"host" : "portable"
}
}
}
--------------------------------------------------
And the `**` wildcard can be used to include fields without knowing the
exact path of the field. For example, we can return the Lucene version
of every segment with this request:
[source,sh]
--------------------------------------------------
curl 'localhost:9200/_segments?pretty&filter_path=indices.**.version'
{
"indices" : {
"movies" : {
"shards" : {
"0" : [ {
"segments" : {
"_0" : {
"version" : "5.2.0"
}
}
} ],
"2" : [ {
"segments" : {
"_0" : {
"version" : "5.2.0"
}
}
} ]
}
},
"books" : {
"shards" : {
"0" : [ {
"segments" : {
"_0" : {
"version" : "5.2.0"
}
}
} ]
}
}
}
}
--------------------------------------------------
Note that elasticsearch sometimes returns directly the raw value of a field,
like the `_source` field. If you want to filter _source fields, you should
consider combining the already existing `_source` parameter (see
<<get-source-filtering,Get API>> for more details) with the `filter_path`
parameter like this:
[source,sh]
--------------------------------------------------
curl -XGET 'localhost:9200/_search?pretty&filter_path=hits.hits._source&_source=title'
{
"hits" : {
"hits" : [ {
"_source":{"title":"Book #2"}
}, {
"_source":{"title":"Book #1"}
}, {
"_source":{"title":"Book #3"}
} ]
}
}
--------------------------------------------------
[float]
=== Flat Settings

View File

@ -66,6 +66,10 @@ only those columns to appear.
192.168.56.30 9300 43.9 Ramsey, Doug
--------------------------------------------------
You can also request multiple columns using simple wildcards like
`/_cat/thread_pool?h=ip,bulk.*` to get all headers (or aliases) starting
with `bulk.`.
[float]
[[numeric-formats]]
=== Numeric formats
@ -120,4 +124,4 @@ include::cat/thread_pool.asciidoc[]
include::cat/shards.asciidoc[]
include::cat/segments.asciidoc[]
include::cat/segments.asciidoc[]

View File

@ -187,7 +187,7 @@ the options. Curl example with update actions:
{ "update" : {"_id" : "1", "_type" : "type1", "_index" : "index1", "_retry_on_conflict" : 3} }
{ "doc" : {"field" : "value"} }
{ "update" : { "_id" : "0", "_type" : "type1", "_index" : "index1", "_retry_on_conflict" : 3} }
{ "script" : "ctx._source.counter += param1", "lang" : "js", "params" : {"param1" : 1}, "upsert" : {"counter" : 1}}
{ "script" : { "inline": "ctx._source.counter += param1", "lang" : "js", "params" : {"param1" : 1}}, "upsert" : {"counter" : 1}}
{ "update" : {"_id" : "2", "_type" : "type1", "_index" : "index1", "_retry_on_conflict" : 3} }
{ "doc" : {"field" : "value"}, "doc_as_upsert" : true }
--------------------------------------------------

View File

@ -228,5 +228,7 @@ it's current version is equal to the specified one. This behavior is the same
for all version types with the exception of version type `FORCE` which always
retrieves the document.
Note that Elasticsearch do not store older versions of documents. Only the current version can be retrieved.
Internally, Elasticsearch has marked the old document as deleted and added an
entirely new document. The old version of the document doesnt disappear
immediately, although you wont be able to access it. Elasticsearch cleans up
deleted documents in the background as you continue to index more data.

View File

@ -28,9 +28,11 @@ Now, we can execute a script that would increment the counter:
[source,js]
--------------------------------------------------
curl -XPOST 'localhost:9200/test/type1/1/_update' -d '{
"script" : "ctx._source.counter += count",
"params" : {
"count" : 4
"script" : {
"inline": "ctx._source.counter += count",
"params" : {
"count" : 4
}
}
}'
--------------------------------------------------
@ -41,9 +43,11 @@ will still add it, since its a list):
[source,js]
--------------------------------------------------
curl -XPOST 'localhost:9200/test/type1/1/_update' -d '{
"script" : "ctx._source.tags += tag",
"params" : {
"tag" : "blue"
"script" : {
"inline": "ctx._source.tags += tag",
"params" : {
"tag" : "blue"
}
}
}'
--------------------------------------------------
@ -71,9 +75,11 @@ And, we can delete the doc if the tags contain blue, or ignore (noop):
[source,js]
--------------------------------------------------
curl -XPOST 'localhost:9200/test/type1/1/_update' -d '{
"script" : "ctx._source.tags.contains(tag) ? ctx.op = \"delete\" : ctx.op = \"none\"",
"params" : {
"tag" : "blue"
"script" : {
"inline": "ctx._source.tags.contains(tag) ? ctx.op = \"delete\" : ctx.op = \"none\"",
"params" : {
"tag" : "blue"
}
}
}'
--------------------------------------------------
@ -136,9 +142,11 @@ index the fresh doc:
[source,js]
--------------------------------------------------
curl -XPOST 'localhost:9200/test/type1/1/_update' -d '{
"script" : "ctx._source.counter += count",
"params" : {
"count" : 4
"script" : {
"inline": "ctx._source.counter += count",
"params" : {
"count" : 4
}
},
"upsert" : {
"counter" : 1
@ -153,13 +161,15 @@ new `scripted_upsert` parameter with the value `true`.
[source,js]
--------------------------------------------------
curl -XPOST 'localhost:9200/sessions/session/dh3sgudg8gsrgl/_update' -d '{
"script_id" : "my_web_session_summariser",
"scripted_upsert":true,
"params" : {
"pageViewEvent" : {
"url":"foo.com/bar",
"response":404,
"time":"2014-01-01 12:32"
"script" : {
"id": "my_web_session_summariser",
"params" : {
"pageViewEvent" : {
"url":"foo.com/bar",
"response":404,
"time":"2014-01-01 12:32"
}
}
},
"upsert" : {

View File

@ -566,7 +566,7 @@ Which means that we just successfully bulk indexed 1000 documents into the bank
=== The Search API
Now let's start with some simple searches. There are two basic ways to run searches: one is by sending search parameters through the <<search-uri-request,REST request URI>> and the other by sending them through the<<search-request-body,[REST request body>>. The request body method allows you to be more expressive and also to define your searches in a more readable JSON format. We'll try one example of the request URI method but for the remainder of this tutorial, we will exclusively be using the request body method.
Now let's start with some simple searches. There are two basic ways to run searches: one is by sending search parameters through the <<search-uri-request,REST request URI>> and the other by sending them through the <<search-request-body,REST request body>>. The request body method allows you to be more expressive and also to define your searches in a more readable JSON format. We'll try one example of the request URI method but for the remainder of this tutorial, we will exclusively be using the request body method.
The REST API for search is accessible from the `_search` endpoint. This example returns all documents in the bank index:

Binary file not shown.

After

Width:  |  Height:  |  Size: 96 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 91 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 48 KiB

View File

@ -149,6 +149,7 @@ field data format.
Computes and stores field data data-structures on disk at indexing time.
[float]
[[global-ordinals]]
==== Global ordinals
Global ordinals is a data-structure on top of field data, that maintains an
@ -182,6 +183,7 @@ ordinals is a small because it is very efficiently compressed. Eager loading of
can move the loading time from the first search request, to the refresh itself.
[float]
[[fielddata-loading]]
=== Fielddata loading
By default, field data is loaded lazily, ie. the first time that a query that

View File

@ -59,7 +59,6 @@ and warmers.
* <<indices-refresh>>
* <<indices-flush>>
* <<indices-optimize>>
* <<indices-seal>>
* <<indices-upgrade>>
--
@ -108,8 +107,6 @@ include::indices/refresh.asciidoc[]
include::indices/optimize.asciidoc[]
include::indices/seal.asciidoc[]
include::indices/shadow-replicas.asciidoc[]
include::indices/upgrade.asciidoc[]

View File

@ -10,8 +10,9 @@ trigger flush operations as required in order to clear memory.
[source,js]
--------------------------------------------------
$ curl -XPOST 'http://localhost:9200/twitter/_flush'
POST /twitter/_flush
--------------------------------------------------
// AUTOSENSE
[float]
[[flush-parameters]]
@ -39,7 +40,198 @@ or even on `_all` the indices.
[source,js]
--------------------------------------------------
$ curl -XPOST 'http://localhost:9200/kimchy,elasticsearch/_flush'
POST /kimchy,elasticsearch/_flush
$ curl -XPOST 'http://localhost:9200/_flush'
POST /_flush
--------------------------------------------------
// AUTOSENSE
[[indices-synced-flush]]
=== Synced Flush
Elasticsearch tracks the indexing activity of each shard. Shards that have not
received any indexing operations for 5 minutes are automatically marked as inactive. This presents
an opportunity for Elasticsearch to reduce shard resources and also perform
a special kind of flush, called `synced flush`. A synced flush performs a normal flush, then adds
a generated unique marker (sync_id) to all shards.
Since the sync id marker was added when there were no ongoing indexing operations, it can
be used as a quick way to check if the two shards' lucene indices are identical. This quick sync id
comparison (if present) is used during recovery or restarts to skip the first and
most costly phase of the process. In that case, no segment files need to be copied and
the transaction log replay phase of the recovery can start immediately. Note that since the sync id
marker was applied together with a flush, it is very likely that the transaction log will be empty,
speeding up recoveries even more.
This is particularly useful for use cases having lots of indices which are
never or very rarely updated, such as time based data. This use case typically generates lots of indices whose
recovery without the synced flush marker would take a long time.
To check whether a shard has a marker or not, look for the `commit` section of shard stats returned by
the <<indices-stats,indices stats>> API:
[source,bash]
--------------------------------------------------
GET /twitter/_stats/commit?level=shards
--------------------------------------------------
// AUTOSENSE
which returns something similar to:
[source,js]
--------------------------------------------------
{
...
"indices": {
"twitter": {
"primaries": {},
"total": {},
"shards": {
"0": [
{
"routing": {
...
},
"commit": {
"id": "te7zF7C4UsirqvL6jp/vUg==",
"generation": 2,
"user_data": {
"sync_id": "AU2VU0meX-VX2aNbEUsD" <1>,
...
},
"num_docs": 0
}
}
...
],
...
}
}
}
}
--------------------------------------------------
<1> the `sync id` marker
[float]
=== Synced Flush API
The Synced Flush API allows an administrator to initiate a synced flush manually. This can be particularly useful for
a planned (rolling) cluster restart where you can stop indexing and don't want to wait the default 5 minutes for
idle indices to be sync-flushed automatically.
While handy, there are a couple of caveats for this API:
1. Synced flush is a best effort operation. Any ongoing indexing operations will cause
the synced flush to fail on that shard. This means that some shards may be synced flushed while others aren't. See below for more.
2. The `sync_id` marker is removed as soon as the shard is flushed again. That is because a flush replaces the low level
lucene commit point where the marker is stored. Uncommitted operations in the transaction log do not remove the marker.
In practice, one should consider any indexing operation on an index as removing the marker as a flush can be triggered by Elasticsearch
at any time.
NOTE: It is harmless to request a synced flush while there is ongoing indexing. Shards that are idle will succeed and shards
that are not will fail. Any shards that succeeded will have faster recovery times.
[source,bash]
--------------------------------------------------
POST /twitter/_flush/synced
--------------------------------------------------
// AUTOSENSE
The response contains details about how many shards were successfully sync-flushed and information about any failure.
Here is what it looks like when all shards of a two shards and one replica index successfully
sync-flushed:
[source,js]
--------------------------------------------------
{
"_shards": {
"total": 4,
"successful": 4,
"failed": 0
},
"twitter": {
"total": 4,
"successful": 4,
"failed": 0
}
}
--------------------------------------------------
Here is what it looks like when one shard group failed due to pending operations:
[source,js]
--------------------------------------------------
{
"_shards": {
"total": 4,
"successful": 2,
"failed": 2
},
"twitter": {
"total": 4,
"successful": 2,
"failed": 2,
"failures": [
{
"shard": 1,
"reason": "[2] ongoing operations on primary"
}
]
}
}
--------------------------------------------------
NOTE: The above error is shown when the synced flush failes due to concurrent indexing operations. The HTTP
status code in that case will be `409 CONFLICT`.
Sometimes the failures are specific to a shard copy. The copies that failed will not be eligible for
fast recovery but those that succeeded still will be. This case is reported as follows:
[source,js]
--------------------------------------------------
{
"_shards": {
"total": 4,
"successful": 1,
"failed": 1
},
"twitter": {
"total": 4,
"successful": 3,
"failed": 1,
"failures": [
{
"shard": 1,
"reason": "unexpected error",
"routing": {
"state": "STARTED",
"primary": false,
"node": "SZNr2J_ORxKTLUCydGX4zA",
"relocating_node": null,
"shard": 1,
"index": "twitter"
}
}
]
}
}
--------------------------------------------------
NOTE: When a shard copy fails to sync-flush, the HTTP status code returned will be `409 CONFLICT`.
The synced flush API can be applied to more than one index with a single call,
or even on `_all` the indices.
[source,js]
--------------------------------------------------
POST /kimchy,elasticsearch/_flush/synced
POST /_flush/synced
--------------------------------------------------
// AUTOSENSE

View File

@ -1,91 +0,0 @@
[[indices-seal]]
== Seal
The seal API flushes and adds a "seal" marker to the shards of one or more
indices. The seal is used during recovery or restarts to skip the first and
most costly phase of the process if all copies of the shard have the same seal.
No segment files need to be copied and the transaction log replay phase of the
recovery can start immediately which makes recovery much faster.
There are two important points about seals:
1. They are best effort in that if there are any outstanding write operations
while the seal operation is being performed then the shards which those writes
target won't be sealed but all others will be. See below for more.
2. The seal breaks as soon as the shard issues a new lucene commit. Uncommitted
operations in the transaction log do not break the seal. That is because a seal
marks a point in time snapshot of the segments, a low level lucene commit.
Practically that means that every write operation on the index will remove the
seal.
[source,bash]
--------------------------------------------------
$ curl -XPOST 'http://localhost:9200/twitter/_seal'
--------------------------------------------------
The response contains details about which shards wrote the seal and the reason
in case they failed to write the seal.
Here is what it looks like when all copies single shard index successfully
wrote the seal:
[source,js]
--------------------------------------------------
{
"twitter": [
{
"shard_id": 0,
"responses": {
"5wjOIntuRqy9F_7JRrrLwA": "success",
"M2iCBe-nS5yaInE8volfSg": "success"
},
"message": "success"
}
}
--------------------------------------------------
Here is what it looks like when one copy fails:
[source,js]
--------------------------------------------------
{
"twitter": [
{
"shard_id": 0,
"responses": {
"M2iCBe-nS5yaInE8volfSg": "pending operations",
"5wjOIntuRqy9F_7JRrrLwA": "success"
},
"message": "failed on some copies"
}
}
--------------------------------------------------
Sometimes the failures can be shard wide and they'll look like this:
[source,js]
--------------------------------------------------
{
"twitter": [
{
"shard_id": 0,
"message": "operation counter on primary is non zero [2]"
}
}
--------------------------------------------------
[float]
[[seal-multi-index]]
=== Multi Index
The seal API can be applied to more than one index with a single call,
or even on `_all` the indices.
[source,js]
--------------------------------------------------
curl -XPOST 'http://localhost:9200/kimchy,elasticsearch/_seal'
curl -XPOST 'http://localhost:9200/_seal'
--------------------------------------------------

View File

@ -16,6 +16,25 @@ settings, you need to enable using it in elasticsearch.yml:
node.enable_custom_paths: true
--------------------------------------------------
You will also need to disable the default security manager that Elasticsearch
runs with. You can do this by either passing
`-Des.security.manager.enabled=false` with the parameters while starting
Elasticsearch, or you can disable it in elasticsearch.yml:
[source,yaml]
--------------------------------------------------
security.manager.enabled: false
--------------------------------------------------
[WARNING]
========================
Disabling the security manager means that the Elasticsearch process is not
limited to the directories and files that it can read and write. However,
because the `index.data_path` setting is set when creating the index, the
security manager would prevent writing or reading from the index's location, so
it must be disabled.
========================
You can then create an index with a custom data path, where each node will use
this path for the data:
@ -88,6 +107,12 @@ settings API:
Boolean value indicating this index uses a shared filesystem. Defaults to
the `true` if `index.shadow_replicas` is set to true, `false` otherwise.
`index.shared_filesystem.recover_on_any_node`::
Boolean value indicating whether the primary shards for the index should be
allowed to recover on any node in the cluster, regardless of the number of
replicas or whether the node has previously had the shard allocated to it
before. Defaults to `false`.
=== Node level settings related to shadow replicas
These are non-dynamic settings that need to be configured in `elasticsearch.yml`

View File

@ -54,13 +54,26 @@ curl 'http://localhost:9200/twitter/_upgrade?pretty&human'
[source,js]
--------------------------------------------------
{
"twitter": {
"size": "21gb",
"size_in_bytes": "21000000000",
"size_to_upgrade": "10gb",
"size_to_upgrade_in_bytes": "10000000000"
"size_to_upgrade_ancient": "1gb",
"size_to_upgrade_ancient_in_bytes": "1000000000"
"indices": {
"twitter": {
"size": "21gb",
"size_in_bytes": "21000000000",
"size_to_upgrade": "10gb",
"size_to_upgrade_in_bytes": "10000000000"
"size_to_upgrade_ancient": "1gb",
"size_to_upgrade_ancient_in_bytes": "1000000000"
}
}
}
}
--------------------------------------------------
The level of details in the upgrade status command can be controlled by
setting `level` parameter to `cluster`, `index` (default) or `shard` levels.
For example, you can run the upgrade status command with `level=shard` to
get detailed upgrade information of each individual shard.

View File

@ -198,6 +198,11 @@ year.
|`year_month_day`|A formatter for a four digit year, two digit month of
year, and two digit day of month.
|`epoch_second`|A formatter for the number of seconds since the epoch.
|`epoch_millis`|A formatter for the number of milliseconds since
the epoch.
|=======================================================================
[float]

View File

@ -1,6 +1,9 @@
[[mapping-parent-field]]
=== `_parent`
TIP: It is highly recommend to reindex all indices with `_parent` field created before version 2.x.
The reason for this is to gain from all the optimizations added with the 2.0 release.
The parent field mapping is defined on a child mapping, and points to
the parent type this child relates to. For example, in case of a `blog`
type and a `blog_tag` type child document, the mapping for `blog_tag`
@ -20,8 +23,34 @@ should be:
The mapping is automatically stored and indexed (meaning it can be
searched on using the `_parent` field notation).
==== Field data loading
==== Limitations
Contrary to other fields the fielddata loading is not `lazy`, but `eager`. The reason for this is that when this
field has been enabled it is going to be used in parent/child queries, which heavily relies on field data to perform
efficiently. This can already be observed during indexing after refresh either automatically or manually has been executed.
The `_parent.type` setting can only point to a type that doesn't exist yet.
This means that a type can't become a parent type after is has been created.
The `parent.type` setting can't point to itself. This means self referential
parent/child isn't supported.
Parent/child queries (`has_child` & `has_parent`) can't be used in index aliases.
==== Global ordinals
Parent-child uses <<global-ordinals,global ordinals>> to speed up joins and global ordinals need to be rebuilt after any change to a shard.
The more parent id values are stored in a shard, the longer it takes to rebuild global ordinals for the `_parent` field.
Global ordinals, by default, are built lazily: the first parent-child query or aggregation after a refresh will trigger building of global ordinals.
This can introduce a significant latency spike for your users. You can use <<fielddata-loading,eager_global_ordinals>> to shift the cost of building global ordinals
from query time to refresh time, by mapping the _parent field as follows:
==== Memory usage
The only on heap memory used by parent/child is the global ordinals for the `_parent` field.
How much memory is used for the global ordianls for the `_parent` field in the fielddata cache
can be checked via the <<indices-stats,indices stats>> or <<cluster-nodes-stats,nodes stats>>
APIS, eg:
[source,js]
--------------------------------------------------
curl -XGET "http://localhost:9200/_stats/fielddata?pretty&human&fielddata_fields=_parent"
--------------------------------------------------

View File

@ -79,7 +79,7 @@ format>> used to parse the provided timestamp value. For example:
}
--------------------------------------------------
Note, the default format is `dateOptionalTime`. The timestamp value will
Note, the default format is `epoch_millis||dateOptionalTime`. The timestamp value will
first be parsed as a number and if it fails the format will be tried.
[float]

View File

@ -10,11 +10,13 @@ field. Example:
{
"example" : {
"transform" : {
"script" : "if (ctx._source['title']?.startsWith('t')) ctx._source['suggest'] = ctx._source['content']",
"params" : {
"variable" : "not used but an example anyway"
},
"lang": "groovy"
"script" : {
"inline": "if (ctx._source['title']?.startsWith('t')) ctx._source['suggest'] = ctx._source['content']",
"params" : {
"variable" : "not used but an example anyway"
},
"lang": "groovy"
}
},
"properties": {
"title": { "type": "string" },

View File

@ -349,7 +349,7 @@ date type:
Defaults to the property/field name.
|`format` |The <<mapping-date-format,date
format>>. Defaults to `dateOptionalTime`.
format>>. Defaults to `epoch_millis||dateOptionalTime`.
|`store` |Set to `true` to store actual field in the index, `false` to not
store it. Defaults to `false` (note, the JSON document itself is stored,

View File

@ -42,8 +42,8 @@ and will use the matching format as its format attribute. The date
format itself is explained
<<mapping-date-format,here>>.
The default formats are: `dateOptionalTime` (ISO) and
`yyyy/MM/dd HH:mm:ss Z||yyyy/MM/dd Z`.
The default formats are: `dateOptionalTime` (ISO),
`yyyy/MM/dd HH:mm:ss Z||yyyy/MM/dd Z` and `epoch_millis`.
*Note:* `dynamic_date_formats` are used *only* for dynamically added
date fields, not for `date` fields that you specify in your mapping.

View File

@ -4,6 +4,11 @@
This section discusses the changes that you need to be aware of when migrating
your application to Elasticsearch 2.0.
=== Networking
Elasticsearch now binds to the loopback interface by default (usually 127.0.0.1
or ::1), the setting `network.host` can be specified to change this behavior.
=== Indices API
The <<alias-retrieving, get alias api>> will, by default produce an error response
@ -404,6 +409,12 @@ The `count` search type has been deprecated. All benefits from this search type
now be achieved by using the `query_then_fetch` search type (which is the
default) and setting `size` to `0`.
=== The count api internally uses the search api
The count api is now a shortcut to the search api with `size` set to 0. As a
result, a total failure will result in an exception being returned rather
than a normal response with `count` set to `0` and shard failures.
=== JSONP support
JSONP callback support has now been removed. CORS should be used to access Elasticsearch
@ -620,3 +631,19 @@ anymore, it will only highlight fields that were queried.
The `match` query with type set to `match_phrase_prefix` is not supported by the
postings highlighter. No highlighted snippets will be returned.
[float]
=== Parent/child
Parent/child has been rewritten completely to reduce memory usage and to execute
`has_child` and `has_parent` queries faster and more efficient. The `_parent` field
uses doc values by default. The refactored and improved implementation is only active
for indices created on or after version 2.0.
In order to benefit for all performance and memory improvements we recommend to reindex all
indices that have the `_parent` field created before was upgraded to 2.0.
The following breaks in backwards compatability have been made on indices with the `_parent` field
created on or after clusters with version 2.0:
* The `type` option on the `_parent` field can only point to a parent type that doesn't exist yet,
so this means that an existing type/mapping can no longer become a parent type.
* The `has_child` and `has_parent` queries can no longer be use in alias filters.

View File

@ -8,15 +8,14 @@ configuration, for example, the
network settings allows to set common settings that will be shared among
all network based modules (unless explicitly overridden in each module).
The `network.bind_host` setting allows to control the host different
network components will bind on. By default, the bind host will be
`anyLocalAddress` (typically `0.0.0.0` or `::0`).
The `network.bind_host` setting allows to control the host different network
components will bind on. By default, the bind host will be `anyLoopbackAddress`
(typically `127.0.0.1` or `::1`).
The `network.publish_host` setting allows to control the host the node
will publish itself within the cluster so other nodes will be able to
connect to it. Of course, this can't be the `anyLocalAddress`, and by
default, it will be the first non loopback address (if possible), or the
local address.
The `network.publish_host` setting allows to control the host the node will
publish itself within the cluster so other nodes will be able to connect to it.
Of course, this can't be the `anyLocalAddress`, and by default, it will be the
first loopback address (if possible), or the local address.
The `network.host` setting is a simple setting to automatically set both
`network.bind_host` and `network.publish_host` to the same host value.

View File

@ -293,6 +293,7 @@ deprecated[1.5.0,Rivers have been deprecated. See https://www.elastic.co/blog/d
* https://github.com/karmi/elasticsearch-paramedic[Paramedic Plugin] (by Karel Minařík)
* https://github.com/polyfractal/elasticsearch-segmentspy[SegmentSpy Plugin] (by Zachary Tong)
* https://github.com/xyu/elasticsearch-whatson[Whatson Plugin] (by Xiao Yu)
* https://github.com/lmenezes/elasticsearch-kopf[Kopf Plugin] (by lmenezes)
[float]
[[repository-plugins]]

View File

@ -29,7 +29,7 @@ GET /_search
{
"script_fields": {
"my_field": {
"script": "1 + my_var",
"inline": "1 + my_var",
"params": {
"my_var": 2
}
@ -38,7 +38,7 @@ GET /_search
}
-----------------------------------
Save the contents of the script as a file called `config/scripts/my_script.groovy`
Save the contents of the `inline` field as a file called `config/scripts/my_script.groovy`
on every data node in the cluster:
[source,js]
@ -54,7 +54,7 @@ GET /_search
{
"script_fields": {
"my_field": {
"script_file": "my_script",
"file": "my_script",
"params": {
"my_var": 2
}
@ -67,9 +67,9 @@ GET /_search
Additional `lang` plugins are provided to allow to execute scripts in
different languages. All places where a `script` parameter can be used, a `lang` parameter
(on the same level) can be provided to define the language of the
script. The following are the supported scripting languages:
different languages. All places where a script can be used, a `lang` parameter
can be provided to define the language of the script. The following are the
supported scripting languages:
[cols="<,<,<",options="header",]
|=======================================================================
@ -120,7 +120,7 @@ curl -XPOST localhost:9200/_search -d '{
{
"script_score": {
"lang": "groovy",
"script_file": "calculate-score",
"file": "calculate-score",
"params": {
"my_modifier": 8
}
@ -162,8 +162,8 @@ curl -XPOST localhost:9200/_scripts/groovy/indexedCalculateScore -d '{
This will create a document with id: `indexedCalculateScore` and type: `groovy` in the
`.scripts` index. The type of the document is the language used by the script.
This script can be accessed at query time by appending `_id` to
the script parameter and passing the script id. So `script` becomes `script_id`.:
This script can be accessed at query time by using the `id` script parameter and passing
the script id:
[source,js]
--------------------------------------------------
@ -178,7 +178,7 @@ curl -XPOST localhost:9200/_search -d '{
"functions": [
{
"script_score": {
"script_id": "indexedCalculateScore",
"id": "indexedCalculateScore",
"lang" : "groovy",
"params": {
"my_modifier": 8

View File

@ -3,32 +3,48 @@
[partintro]
--
*elasticsearch* provides a full Query DSL based on JSON to define
queries. In general, there are basic queries such as
<<query-dsl-term-query,term>> or
<<query-dsl-prefix-query,prefix>>. There are
also compound queries like the
<<query-dsl-bool-query,bool>> query.
While queries have scoring capabilities, in some contexts they will
only be used to filter the result set, such as in the
<<query-dsl-filtered-query,filtered>> or
<<query-dsl-constant-score-query,constant_score>>
queries.
Elasticsearch provides a full Query DSL based on JSON to define queries.
Think of the Query DSL as an AST of queries, consisting of two types of
clauses:
Think of the Query DSL as an AST of queries.
Some queries can be used by themselves like the
<<query-dsl-term-query,term>> query but other queries can contain
queries (like the <<query-dsl-bool-query,bool>> query), and each
of these composite queries can contain *any* query of the list of
queries, resulting in the ability to build quite
complex (and interesting) queries.
Leaf query clauses::
Queries can be used in different APIs. For example,
within a <<search-request-query,search query>>, or
as an <<search-aggregations-bucket-filter-aggregation,aggregation filter>>.
This section explains the queries that can form the AST one can use.
Leaf query clauses look for a particular value in a particular field, such as the
<<query-dsl-match-query,`match`>>, <<query-dsl-term-query,`term`>> or
<<query-dsl-range-query,`range`>> queries. These queries can be used
by themselves.
Compound query clauses::
Compound query clauses wrap other leaf *or* compound queries and are used to combine
multiple queries in a logical fashion (such as the
<<query-dsl-bool-query,`bool`>> or <<query-dsl-dis-max-query,`dis_max`>> query),
or to alter their behaviour (such as the <<query-dsl-not-query,`not`>> or
<<query-dsl-constant-score-query,`constant_score`>> query).
Query clauses behave differently depending on whether they are used in
<<query-filter-context,query context or filter context>>.
--
include::query-dsl/index.asciidoc[]
include::query-dsl/query_filter_context.asciidoc[]
include::query-dsl/match-all-query.asciidoc[]
include::query-dsl/full-text-queries.asciidoc[]
include::query-dsl/term-level-queries.asciidoc[]
include::query-dsl/compound-queries.asciidoc[]
include::query-dsl/joining-queries.asciidoc[]
include::query-dsl/geo-queries.asciidoc[]
include::query-dsl/special-queries.asciidoc[]
include::query-dsl/span-queries.asciidoc[]
include::query-dsl/minimum-should-match.asciidoc[]
include::query-dsl/multi-term-rewrite.asciidoc[]

View File

@ -1,5 +1,5 @@
[[query-dsl-and-query]]
== And Query
=== And Query
deprecated[2.0.0, Use the `bool` query instead]

View File

@ -1,5 +1,5 @@
[[query-dsl-bool-query]]
== Bool Query
=== Bool Query
A query that matches documents matching boolean combinations of other
queries. The bool query maps to Lucene `BooleanQuery`. It is built using

View File

@ -1,5 +1,5 @@
[[query-dsl-boosting-query]]
== Boosting Query
=== Boosting Query
The `boosting` query can be used to effectively demote results that
match a given query. Unlike the "NOT" clause in bool query, this still

View File

@ -1,12 +1,12 @@
[[query-dsl-common-terms-query]]
== Common Terms Query
=== Common Terms Query
The `common` terms query is a modern alternative to stopwords which
improves the precision and recall of search results (by taking stopwords
into account), without sacrificing performance.
[float]
=== The problem
==== The problem
Every term in a query has a cost. A search for `"The brown fox"`
requires three term queries, one for each of `"the"`, `"brown"` and
@ -25,7 +25,7 @@ and `"not happy"`) and we lose recall (eg text like `"The The"` or
`"To be or not to be"` would simply not exist in the index).
[float]
=== The solution
==== The solution
The `common` terms query divides the query terms into two groups: more
important (ie _low frequency_ terms) and less important (ie _high
@ -63,7 +63,7 @@ site, common terms like `"clip"` or `"video"` will automatically behave
as stopwords without the need to maintain a manual list.
[float]
=== Examples
==== Examples
In this example, words that have a document frequency greater than 0.1%
(eg `"this"` and `"is"`) will be treated as _common terms_.

View File

@ -0,0 +1,69 @@
[[compound-queries]]
== Compound queries
Compound queries wrap other compound or leaf queries, either to combine their
results and scores, to change their behaviour, or to switch from query to
filter context.
The queries in this group are:
<<query-dsl-constant-score-query,`constant_score` query>>::
A query which wraps another query, but executes it in filter context. All
matching documents are given the same ``constant'' `_score`.
<<query-dsl-bool-query,`bool` query>>::
The default query for combining multiple leaf or compound query clauses, as
`must`, `should`, `must_not`, or `filter` clauses. The `must` and `should`
clauses have their scores combined -- the more matching clauses, the better --
while the `must_not` and `filter` clauses are executed in filter context.
<<query-dsl-dis-max-query,`dis_max` query>>::
A query which accepts multiple queries, and returns any documents which match
any of the query clauses. While the `bool` query combines the scores from all
matching queries, the `dis_max` query uses the score of the single best-
matching query clause.
<<query-dsl-function-score-query,`function_score` query>>::
Modify the scores returned by the main query with functions to take into
account factors like popularity, recency, distance, or custom algorithms
implemented with scripting.
<<query-dsl-boosting-query,`boosting` query>>::
Return documents which match a `positive` query, but reduce the score of
documents which also match a `negative` query.
<<query-dsl-indices-query,`indices` query>>::
Execute one query for the specified indices, and another for other indices.
<<query-dsl-and-query,`and`>>, <<query-dsl-or-query,`or`>>, <<query-dsl-not-query,`not`>>::
Synonyms for the `bool` query.
<<query-dsl-filtered-query,`filtered` query>>::
Combine a query clause in query context with another in filter context. deprecated[2.0.0,Use the `bool` query instead]
<<query-dsl-limit-query,`limit` query>>::
Limits the number of documents examined per shard. deprecated[1.6.0]
include::constant-score-query.asciidoc[]
include::bool-query.asciidoc[]
include::dis-max-query.asciidoc[]
include::function-score-query.asciidoc[]
include::boosting-query.asciidoc[]
include::indices-query.asciidoc[]
include::and-query.asciidoc[]
include::not-query.asciidoc[]
include::or-query.asciidoc[]
include::filtered-query.asciidoc[]
include::limit-query.asciidoc[]

View File

@ -1,5 +1,5 @@
[[query-dsl-constant-score-query]]
== Constant Score Query
=== Constant Score Query
A query that wraps another query and simply returns a
constant score equal to the query boost for every document in the

View File

@ -1,5 +1,5 @@
[[query-dsl-dis-max-query]]
== Dis Max Query
=== Dis Max Query
A query that generates the union of documents produced by its
subqueries, and that scores each document with the maximum score for

View File

@ -1,5 +1,5 @@
[[query-dsl-exists-query]]
== Exists Query
=== Exists Query
Returns documents that have at least one non-`null` value in the original field:
@ -42,7 +42,7 @@ These documents would *not* match the above query:
<3> The `user` field is missing completely.
[float]
==== `null_value` mapping
===== `null_value` mapping
If the field mapping includes the `null_value` setting (see <<mapping-core-types>>)
then explicit `null` values are replaced with the specified `null_value`. For

View File

@ -1,5 +1,5 @@
[[query-dsl-filtered-query]]
== Filtered Query
=== Filtered Query
deprecated[2.0.0, Use the `bool` query instead with a `must` clause for the query and a `filter` clause for the filter]
@ -47,7 +47,7 @@ curl -XGET localhost:9200/_search -d '
<1> The `filtered` query is passed as the value of the `query`
parameter in the search request.
=== Filtering without a query
==== Filtering without a query
If a `query` is not specified, it defaults to the
<<query-dsl-match-all-query,`match_all` query>>. This means that the
@ -71,7 +71,7 @@ curl -XGET localhost:9200/_search -d '
<1> No `query` has been specified, so this request applies just the filter,
returning all documents created since yesterday.
==== Multiple filters
===== Multiple filters
Multiple filters can be applied by wrapping them in a
<<query-dsl-bool-query,`bool` query>>, for example:
@ -95,7 +95,7 @@ Multiple filters can be applied by wrapping them in a
}
--------------------------------------------------
==== Filter strategy
===== Filter strategy
You can control how the filter and query are executed with the `strategy`
parameter:

View File

@ -0,0 +1,44 @@
[[full-text-queries]]
== Full text queries
The high-level full text queries are usually used for running full text
queries on full text fields like the body of an email. They understand how the
field being queried is <<analysis,analyzed>> and will apply each field's
`analyzer` (or `search_analyzer`) to the query string before executing.
The queries in this group are:
<<query-dsl-match-query,`match` query>>::
The standard query for performing full text queries, including fuzzy matching
and phrase or proximity queries.
<<query-dsl-multi-match-query,`multi_match` query>>::
The multi-field version of the `match` query.
<<query-dsl-common-terms-query,`common_terms` query>>::
A more specialized query which gives more preference to uncommon words.
<<query-dsl-query-string-query,`query_string` query>>::
Supports the compact Lucene <<query-string-syntax,query string syntax>>,
allowing you to specify AND|OR|NOT conditions and multi-field search
within a single query string. For expert users only.
<<query-dsl-simple-query-string-query,`simple_query_string`>>::
A simpler, more robust version of the `query_string` syntax suitable
for exposing directly to users.
include::match-query.asciidoc[]
include::multi-match-query.asciidoc[]
include::common-terms-query.asciidoc[]
include::query-string-query.asciidoc[]
include::simple-query-string-query.asciidoc[]

View File

@ -1,15 +1,13 @@
[[query-dsl-function-score-query]]
== Function Score Query
=== Function Score Query
The `function_score` allows you to modify the score of documents that are
retrieved by a query. This can be useful if, for example, a score
function is computationally expensive and it is sufficient to compute
the score on a filtered set of documents.
=== Using function score
To use `function_score`, the user has to define a query and one or
several functions, that compute a new score for each document returned
more functions, that compute a new score for each document returned
by the query.
`function_score` can be used with only one function like this:
@ -89,13 +87,11 @@ query. The parameter `boost_mode` defines how:
`min`:: min of query score and function score
By default, modifying the score does not change which documents match. To exclude
documents that do not meet a certain score threshold the `min_score` parameter can be set to the desired score threshold.
==== Score functions
documents that do not meet a certain score threshold the `min_score` parameter can be set to the desired score threshold.
The `function_score` query provides several types of score functions.
===== Script score
==== Script score
The `script_score` function allows you to wrap another query and customize
the scoring of it optionally with a computation derived from other numeric
@ -120,12 +116,14 @@ script, and provide parameters to it:
[source,js]
--------------------------------------------------
"script_score": {
"lang": "lang",
"params": {
"param1": value1,
"param2": value2
},
"script": "_score * doc['my_numeric_field'].value / pow(param1, param2)"
"script": {
"lang": "lang",
"params": {
"param1": value1,
"param2": value2
},
"inline": "_score * doc['my_numeric_field'].value / pow(param1, param2)"
}
}
--------------------------------------------------
@ -133,7 +131,7 @@ Note that unlike the `custom_score` query, the
score of the query is multiplied with the result of the script scoring. If
you wish to inhibit this, set `"boost_mode": "replace"`
===== Weight
==== Weight
The `weight` score allows you to multiply the score by the provided
`weight`. This can sometimes be desired since boost value set on
@ -145,7 +143,7 @@ not.
"weight" : number
--------------------------------------------------
===== Random
==== Random
The `random_score` generates scores using a hash of the `_uid` field,
with a `seed` for variation. If `seed` is not specified, the current
@ -161,7 +159,7 @@ be a memory intensive operation since the values are unique.
}
--------------------------------------------------
===== Field Value factor
==== Field Value factor
The `field_value_factor` function allows you to use a field from a document to
influence the score. It's similar to using the `script_score` function, however,
@ -205,7 +203,7 @@ is an illegal operation, and an exception will be thrown. Be sure to limit the
values of the field with a range filter to avoid this, or use `log1p` and
`ln1p`.
===== Decay functions
==== Decay functions
Decay functions score a document with a function that decays depending
on the distance of a numeric field value of the document from a user
@ -252,13 +250,13 @@ The `offset` and `decay` parameters are optional.
[horizontal]
`origin`::
The point of origin used for calculating distance. Must be given as a
number for numeric field, date for date fields and geo point for geo fields.
The point of origin used for calculating distance. Must be given as a
number for numeric field, date for date fields and geo point for geo fields.
Required for geo and numeric field. For date fields the default is `now`. Date
math (for example `now-1h`) is supported for origin.
`scale`::
Required for all types. Defines the distance from origin at which the computed
Required for all types. Defines the distance from origin at which the computed
score will equal `decay` parameter. For geo fields: Can be defined as number+unit (1km, 12m,...).
Default unit is meters. For date fields: Can to be defined as a number+unit ("1h", "10d",...).
Default unit is milliseconds. For numeric field: Any number.
@ -358,7 +356,7 @@ Example:
==== Detailed example
===== Detailed example
Suppose you are searching for a hotel in a certain town. Your budget is
limited. Also, you would like the hotel to be close to the town center,
@ -478,7 +476,7 @@ image::https://f.cloud.github.com/assets/4320215/768161/082975c0-e899-11e2-86f7-
image::https://f.cloud.github.com/assets/4320215/768162/0b606884-e899-11e2-907b-aefc77eefef6.png[width="700px"]
===== Linear' decay, keyword `linear`
===== Linear decay, keyword `linear`
When choosing `linear` as the decay function in the above example, the
contour and surface plot of the multiplier looks like this:

View File

@ -1,10 +1,10 @@
[[query-dsl-fuzzy-query]]
== Fuzzy Query
=== Fuzzy Query
The fuzzy query uses similarity based on Levenshtein edit distance for
`string` fields, and a `+/-` margin on numeric and date fields.
=== String fields
==== String fields
The `fuzzy` query generates all possible matching terms that are within the
maximum edit distance specified in `fuzziness` and then checks the term
@ -38,7 +38,7 @@ Or with more advanced settings:
--------------------------------------------------
[float]
==== Parameters
===== Parameters
[horizontal]
`fuzziness`::
@ -62,7 +62,7 @@ are both set to `0`. This could cause every term in the index to be examined!
[float]
=== Numeric and date fields
==== Numeric and date fields
Performs a <<query-dsl-range-query>> ``around'' the value using the
`fuzziness` value as a `+/-` range, where:

View File

@ -1,5 +1,5 @@
[[query-dsl-geo-bounding-box-query]]
== Geo Bounding Box Query
=== Geo Bounding Box Query
A query allowing to filter hits based on a point location using a
bounding box. Assuming the following indexed document:
@ -45,13 +45,13 @@ Then the following simple query can be executed with a
--------------------------------------------------
[float]
=== Accepted Formats
==== Accepted Formats
In much the same way the geo_point type can accept different
representation of the geo point, the filter can accept it as well:
[float]
==== Lat Lon As Properties
===== Lat Lon As Properties
[source,js]
--------------------------------------------------
@ -79,7 +79,7 @@ representation of the geo point, the filter can accept it as well:
--------------------------------------------------
[float]
==== Lat Lon As Array
===== Lat Lon As Array
Format in `[lon, lat]`, note, the order of lon/lat here in order to
conform with http://geojson.org/[GeoJSON].
@ -104,7 +104,7 @@ conform with http://geojson.org/[GeoJSON].
--------------------------------------------------
[float]
==== Lat Lon As String
===== Lat Lon As String
Format in `lat,lon`.
@ -128,7 +128,7 @@ Format in `lat,lon`.
--------------------------------------------------
[float]
==== Geohash
===== Geohash
[source,js]
--------------------------------------------------
@ -150,7 +150,7 @@ Format in `lat,lon`.
--------------------------------------------------
[float]
=== Vertices
==== Vertices
The vertices of the bounding box can either be set by `top_left` and
`bottom_right` or by `top_right` and `bottom_left` parameters. More
@ -182,20 +182,20 @@ values separately.
[float]
=== geo_point Type
==== geo_point Type
The filter *requires* the `geo_point` type to be set on the relevant
field.
[float]
=== Multi Location Per Document
==== Multi Location Per Document
The filter can work with multiple locations / points per document. Once
a single location / point matches the filter, the document will be
included in the filter
[float]
=== Type
==== Type
The type of the bounding box execution by default is set to `memory`,
which means in memory checks if the doc falls within the bounding box

View File

@ -1,5 +1,5 @@
[[query-dsl-geo-distance-query]]
== Geo Distance Query
=== Geo Distance Query
Filters documents that include only hits that exists within a specific
distance from a geo point. Assuming the following indexed json:
@ -40,13 +40,13 @@ filter:
--------------------------------------------------
[float]
=== Accepted Formats
==== Accepted Formats
In much the same way the `geo_point` type can accept different
representation of the geo point, the filter can accept it as well:
[float]
==== Lat Lon As Properties
===== Lat Lon As Properties
[source,js]
--------------------------------------------------
@ -69,7 +69,7 @@ representation of the geo point, the filter can accept it as well:
--------------------------------------------------
[float]
==== Lat Lon As Array
===== Lat Lon As Array
Format in `[lon, lat]`, note, the order of lon/lat here in order to
conform with http://geojson.org/[GeoJSON].
@ -92,7 +92,7 @@ conform with http://geojson.org/[GeoJSON].
--------------------------------------------------
[float]
==== Lat Lon As String
===== Lat Lon As String
Format in `lat,lon`.
@ -114,7 +114,7 @@ Format in `lat,lon`.
--------------------------------------------------
[float]
==== Geohash
===== Geohash
[source,js]
--------------------------------------------------
@ -134,7 +134,7 @@ Format in `lat,lon`.
--------------------------------------------------
[float]
=== Options
==== Options
The following are options allowed on the filter:
@ -160,13 +160,13 @@ The following are options allowed on the filter:
[float]
=== geo_point Type
==== geo_point Type
The filter *requires* the `geo_point` type to be set on the relevant
field.
[float]
=== Multi Location Per Document
==== Multi Location Per Document
The `geo_distance` filter can work with multiple locations / points per
document. Once a single location / point matches the filter, the

View File

@ -1,5 +1,5 @@
[[query-dsl-geo-distance-range-query]]
== Geo Distance Range Query
=== Geo Distance Range Query
Filters documents that exists within a range from a specific point:

View File

@ -1,5 +1,5 @@
[[query-dsl-geo-polygon-query]]
== Geo Polygon Query
=== Geo Polygon Query
A query allowing to include hits that only fall within a polygon of
points. Here is an example:
@ -27,10 +27,10 @@ points. Here is an example:
--------------------------------------------------
[float]
=== Allowed Formats
==== Allowed Formats
[float]
==== Lat Long as Array
===== Lat Long as Array
Format in `[lon, lat]`, note, the order of lon/lat here in order to
conform with http://geojson.org/[GeoJSON].
@ -58,7 +58,7 @@ conform with http://geojson.org/[GeoJSON].
--------------------------------------------------
[float]
==== Lat Lon as String
===== Lat Lon as String
Format in `lat,lon`.
@ -85,7 +85,7 @@ Format in `lat,lon`.
--------------------------------------------------
[float]
==== Geohash
===== Geohash
[source,js]
--------------------------------------------------
@ -110,7 +110,7 @@ Format in `lat,lon`.
--------------------------------------------------
[float]
=== geo_point Type
==== geo_point Type
The filter *requires* the
<<mapping-geo-point-type,geo_point>> type to be

View File

@ -0,0 +1,50 @@
[[geo-queries]]
== Geo queries
Elasticsearch supports two types of geo data:
<<mapping-geo-point-type,`geo_point`>> fields which support lat/lon pairs, and
<<mapping-geo-shape-type,`geo_shape`>> fields, which support points,
lines, circles, polygons, multi-polygons etc.
The queries in this group are:
<<query-dsl-geo-shape-query,`geo_shape`>> query::
Find document with geo-shapes which either intersect, are contained by, or
do not interesect with the specified geo-shape.
<<query-dsl-geo-bounding-box-query,`geo_bounding_box`>> query::
Finds documents with geo-points that fall into the specified rectangle.
<<query-dsl-geo-distance-query,`geo_distance`>> query::
Finds document with geo-points within the specified distance of a central
point.
<<query-dsl-geo-distance-range-query,`geo_distance_range`>> query::
Like the `geo_point` query, but the range starts at a specified distance
from the central point.
<<query-dsl-geo-polygon-query,`geo_polygon`>> query::
Find documents with geo-points within the specified polygon.
<<query-dsl-geohash-cell-query,`geohash_cell`>> query::
Find geo-points whose geohash intersects with the geohash of the specified
point.
include::geo-shape-query.asciidoc[]
include::geo-bounding-box-query.asciidoc[]
include::geo-distance-query.asciidoc[]
include::geo-distance-range-query.asciidoc[]
include::geo-polygon-query.asciidoc[]
include::geohash-cell-query.asciidoc[]

View File

@ -1,26 +1,21 @@
[[query-dsl-geo-shape-query]]
== GeoShape Filter
=== GeoShape Query
Filter documents indexed using the `geo_shape` type.
Requires the <<mapping-geo-shape-type,geo_shape
Mapping>>.
Requires the <<mapping-geo-shape-type,geo_shape Mapping>>.
The `geo_shape` query uses the same grid square representation as the
geo_shape mapping to find documents that have a shape that intersects
with the query shape. It will also use the same PrefixTree configuration
as defined for the field mapping.
[float]
==== Filter Format
The Filter supports two ways of defining the Filter shape, either by
The query supports two ways of defining the query shape, either by
providing a whole shape definition, or by referencing the name of a shape
pre-indexed in another index. Both formats are defined below with
examples.
[float]
===== Provided Shape Definition
==== Inline Shape Definition
Similar to the `geo_shape` type, the `geo_shape` Filter uses
http://www.geojson.org[GeoJSON] to represent shapes.
@ -64,8 +59,7 @@ The following query will find the point using the Elasticsearch's
}
--------------------------------------------------
[float]
===== Pre-Indexed Shape
==== Pre-Indexed Shape
The Filter also supports using a shape which has already been indexed in
another index and/or index type. This is particularly useful for when

View File

@ -1,5 +1,5 @@
[[query-dsl-geohash-cell-query]]
== Geohash Cell Query
=== Geohash Cell Query
The `geohash_cell` query provides access to a hierarchy of geohashes.
By defining a geohash cell, only <<mapping-geo-point-type,geopoints>>

View File

@ -1,5 +1,5 @@
[[query-dsl-has-child-query]]
== Has Child Query
=== Has Child Query
The `has_child` filter accepts a query and the child type to run against, and
results in parent documents that have child docs matching the query. Here is
@ -20,7 +20,7 @@ an example:
--------------------------------------------------
[float]
=== Scoring capabilities
==== Scoring capabilities
The `has_child` also has scoring support. The
supported score types are `min`, `max`, `sum`, `avg` or `none`. The default is
@ -46,7 +46,7 @@ inside the `has_child` query:
--------------------------------------------------
[float]
=== Min/Max Children
==== Min/Max Children
The `has_child` query allows you to specify that a minimum and/or maximum
number of children are required to match for the parent doc to be considered
@ -72,21 +72,3 @@ a match:
The `min_children` and `max_children` parameters can be combined with
the `score_mode` parameter.
[float]
=== Memory Considerations
In order to support parent-child joins, all of the (string) parent IDs
must be resident in memory (in the <<index-modules-fielddata,field data cache>>.
Additionally, every child document is mapped to its parent using a long
value (approximately). It is advisable to keep the string parent ID short
in order to reduce memory usage.
You can check how much memory is being used by the `_parent` field in the fielddata cache
using the <<indices-stats,indices stats>> or <<cluster-nodes-stats,nodes stats>>
APIS, eg:
[source,js]
--------------------------------------------------
curl -XGET "http://localhost:9200/_stats/fielddata?pretty&human&fielddata_fields=_parent"
--------------------------------------------------

View File

@ -1,5 +1,5 @@
[[query-dsl-has-parent-query]]
== Has Parent Query
=== Has Parent Query
The `has_parent` query accepts a query and a parent type. The query is
executed in the parent document space, which is specified by the parent
@ -22,7 +22,7 @@ in the same manner as the `has_child` query.
--------------------------------------------------
[float]
=== Scoring capabilities
==== Scoring capabilities
The `has_parent` also has scoring support. The
supported score types are `score` or `none`. The default is `none` and
@ -47,23 +47,3 @@ matching parent document. The score type can be specified with the
}
}
--------------------------------------------------
[float]
=== Memory Considerations
In order to support parent-child joins, all of the (string) parent IDs
must be resident in memory (in the <<index-modules-fielddata,field data cache>>.
Additionally, every child document is mapped to its parent using a long
value (approximately). It is advisable to keep the string parent ID short
in order to reduce memory usage.
You can check how much memory is being used by the `_parent` field in the fielddata cache
using the <<indices-stats,indices stats>> or <<cluster-nodes-stats,nodes stats>>
APIS, eg:
[source,js]
--------------------------------------------------
curl -XGET "http://localhost:9200/_stats/fielddata?pretty&human&fielddata_fields=_parent"
--------------------------------------------------

View File

@ -1,5 +1,5 @@
[[query-dsl-ids-query]]
== Ids Query
=== Ids Query
Filters documents that only have the provided ids. Note, this query
uses the <<mapping-uid-field,_uid>> field.

View File

@ -1,99 +0,0 @@
include::match-query.asciidoc[]
include::multi-match-query.asciidoc[]
include::and-query.asciidoc[]
include::bool-query.asciidoc[]
include::boosting-query.asciidoc[]
include::common-terms-query.asciidoc[]
include::constant-score-query.asciidoc[]
include::dis-max-query.asciidoc[]
include::exists-query.asciidoc[]
include::filtered-query.asciidoc[]
include::function-score-query.asciidoc[]
include::fuzzy-query.asciidoc[]
include::geo-shape-query.asciidoc[]
include::geo-bounding-box-query.asciidoc[]
include::geo-distance-query.asciidoc[]
include::geo-distance-range-query.asciidoc[]
include::geohash-cell-query.asciidoc[]
include::geo-polygon-query.asciidoc[]
include::has-child-query.asciidoc[]
include::has-parent-query.asciidoc[]
include::ids-query.asciidoc[]
include::indices-query.asciidoc[]
include::limit-query.asciidoc[]
include::match-all-query.asciidoc[]
include::missing-query.asciidoc[]
include::mlt-query.asciidoc[]
include::nested-query.asciidoc[]
include::not-query.asciidoc[]
include::or-query.asciidoc[]
include::prefix-query.asciidoc[]
include::query-string-query.asciidoc[]
include::simple-query-string-query.asciidoc[]
include::range-query.asciidoc[]
include::regexp-query.asciidoc[]
include::span-containing-query.asciidoc[]
include::span-first-query.asciidoc[]
include::span-multi-term-query.asciidoc[]
include::span-near-query.asciidoc[]
include::span-not-query.asciidoc[]
include::span-or-query.asciidoc[]
include::span-term-query.asciidoc[]
include::span-within-query.asciidoc[]
include::term-query.asciidoc[]
include::terms-query.asciidoc[]
include::wildcard-query.asciidoc[]
include::minimum-should-match.asciidoc[]
include::multi-term-rewrite.asciidoc[]
include::script-query.asciidoc[]
include::template-query.asciidoc[]
include::type-query.asciidoc[]

View File

@ -1,5 +1,5 @@
[[query-dsl-indices-query]]
== Indices Query
=== Indices Query
The `indices` query can be used when executed across multiple indices,
allowing to have a query that executes only when executed on an index
@ -29,9 +29,9 @@ documents), and `all` (to match all). Defaults to `all`.
`query` is mandatory, as well as `indices` (or `index`).
[TIP]
===================================================================
====================================================================
The fields order is important: if the `indices` are provided before `query`
or `no_match_query`, the related queries get parsed only against the indices
that they are going to be executed on. This is useful to avoid parsing queries
when it is not necessary and prevent potential mapping errors.
===================================================================
====================================================================

View File

@ -0,0 +1,32 @@
[[joining-queries]]
== Joining queries
Performing full SQL-style joins in a distributed system like Elasticsearch is
prohibitively expensive. Instead, Elasticsearch offers two forms of join
which are designed to scale horizontally.
<<query-dsl-nested-query,`nested` query>>::
Documents may contains fields of type <<mapping-nested-type,`nested`>>. These
fields are used to index arrays of objects, where each object can be queried
(with the `nested` query) as an independent document.
<<query-dsl-has-child-query,`has_child`>> and <<query-dsl-has-parent-query,`has_parent`>> queries::
A <<mapping-parent-field,parent-child relationship>> can exist between two
document types within a single index. The `has_child` query returns parent
documents whose child documents match the specified query, while the
`has_parent` query returns child documents whose parent document matches the
specified query.
Also see the <<query-dsl-terms-lookup,terms-lookup mechanism>> in the `terms`
query, which allows you to build a `terms` query from values contained in
another document.
include::nested-query.asciidoc[]
include::has-child-query.asciidoc[]
include::has-parent-query.asciidoc[]

View File

@ -1,5 +1,5 @@
[[query-dsl-limit-query]]
== Limit Query
=== Limit Query
deprecated[1.6.0, Use <<search-request-body,terminate_after>> instead]

View File

@ -1,20 +1,17 @@
[[query-dsl-match-all-query]]
== Match All Query
A query that matches all documents. Maps to Lucene `MatchAllDocsQuery`.
The most simple query, which matches all documents, giving them all a `_score`
of `1.0`.
[source,js]
--------------------------------------------------
{
"match_all" : { }
}
{ "match_all": {} }
--------------------------------------------------
Which can also have boost associated with it:
The `_score` can be changed with the `boost` parameter:
[source,js]
--------------------------------------------------
{
"match_all" : { "boost" : 1.2 }
}
{ "match_all": { "boost" : 1.2 }}
--------------------------------------------------

View File

@ -1,5 +1,5 @@
[[query-dsl-match-query]]
== Match Query
=== Match Query
A family of `match` queries that accept text/numerics/dates, analyzes
it, and constructs a query out of it. For example:
@ -16,10 +16,8 @@ it, and constructs a query out of it. For example:
Note, `message` is the name of a field, you can substitute the name of
any field (including `_all`) instead.
[float]
=== Types of Match Queries
There are three types of `match` query: `boolean`, `phrase`, and `phrase_prefix`:
[float]
[[query-dsl-match-query-boolean]]
==== boolean
@ -40,7 +38,6 @@ data-type mismatches, such as trying to query a numeric field with a text
query string. Defaults to `false`.
[[query-dsl-match-query-fuzziness]]
[float]
===== Fuzziness
`fuzziness` allows _fuzzy matching_ based on the type of field being queried.
@ -69,7 +66,6 @@ change in structure, `message` is the field name):
--------------------------------------------------
[[query-dsl-match-query-zero]]
[float]
===== Zero terms query
If the analyzer used removes all tokens in a query like a `stop` filter
does, the default behavior is to match no documents at all. In order to
@ -90,7 +86,6 @@ change that the `zero_terms_query` option can be used, which accepts
--------------------------------------------------
[[query-dsl-match-query-cutoff]]
[float]
===== Cutoff frequency
The match query supports a `cutoff_frequency` that allows
@ -132,7 +127,6 @@ that when trying it out on test indexes with low document numbers you
should follow the advice in {defguide}/relevance-is-broken.html[Relevance is broken].
[[query-dsl-match-query-phrase]]
[float]
==== phrase
The `match_phrase` query analyzes the text and creates a `phrase` query
@ -181,9 +175,8 @@ definition, or the default search analyzer, for example:
}
--------------------------------------------------
[float]
[[query-dsl-match-query-phrase-prefix]]
===== match_phrase_prefix
==== match_phrase_prefix
The `match_phrase_prefix` is the same as `match_phrase`, except that it
allows for prefix matches on the last term in the text. For example:

View File

@ -1,5 +1,5 @@
[[query-dsl-missing-query]]
== Missing Query
=== Missing Query
Returns documents that have only `null` values or no value in the original field:
@ -42,7 +42,7 @@ These documents would *not* match the above filter:
<3> This field has one non-`null` value.
[float]
=== `null_value` mapping
==== `null_value` mapping
If the field mapping includes a `null_value` (see <<mapping-core-types>>) then explicit `null` values
are replaced with the specified `null_value`. For instance, if the `user` field were mapped
@ -75,7 +75,7 @@ no values in the `user` field and thus would match the `missing` filter:
--------------------------------------------------
[float]
==== `existence` and `null_value` parameters
===== `existence` and `null_value` parameters
When the field being queried has a `null_value` mapping, then the behaviour of
the `missing` filter can be altered with the `existence` and `null_value`

View File

@ -1,5 +1,5 @@
[[query-dsl-mlt-query]]
== More Like This Query
=== More Like This Query
The More Like This Query (MLT Query) finds documents that are "like" a given
set of documents. In order to do so, MLT selects a set of representative terms
@ -87,7 +87,7 @@ present in the index, the syntax is similar to <<docs-termvectors-artificial-doc
}
--------------------------------------------------
=== How it Works
==== How it Works
Suppose we wanted to find all documents similar to a given input document.
Obviously, the input document itself should be its best match for that type of
@ -139,14 +139,14 @@ curl -s -XPUT 'http://localhost:9200/imdb/' -d '{
}
--------------------------------------------------
=== Parameters
==== Parameters
The only required parameter is `like`, all other parameters have sensible
defaults. There are three types of parameters: one to specify the document
input, the other one for term selection and for query formation.
[float]
=== Document Input Parameters
==== Document Input Parameters
[horizontal]
`like`:: coming[2.0]
@ -179,7 +179,7 @@ A list of documents following the same syntax as the <<docs-multi-get,Multi GET
[float]
[[mlt-query-term-selection]]
=== Term Selection Parameters
==== Term Selection Parameters
[horizontal]
`max_query_terms`::
@ -219,7 +219,7 @@ The analyzer that is used to analyze the free form text. Defaults to the
analyzer associated with the first field in `fields`.
[float]
=== Query Formation Parameters
==== Query Formation Parameters
[horizontal]
`minimum_should_match`::

View File

@ -1,5 +1,5 @@
[[query-dsl-multi-match-query]]
== Multi Match Query
=== Multi Match Query
The `multi_match` query builds on the <<query-dsl-match-query,`match` query>>
to allow multi-field queries:
@ -17,7 +17,7 @@ to allow multi-field queries:
<2> The fields to be queried.
[float]
=== `fields` and per-field boosting
==== `fields` and per-field boosting
Fields can be specified with wildcards, eg:
@ -47,7 +47,7 @@ Individual fields can be boosted with the caret (`^`) notation:
[[multi-match-types]]
[float]
=== Types of `multi_match` query:
==== Types of `multi_match` query:
The way the `multi_match` query is executed internally depends on the `type`
parameter, which can be set to:
@ -70,7 +70,7 @@ parameter, which can be set to:
combines the `_score` from each field. See <<type-phrase>>.
[[type-best-fields]]
=== `best_fields`
==== `best_fields`
The `best_fields` type is most useful when you are searching for multiple
words best found in the same field. For instance ``brown fox'' in a single
@ -121,7 +121,7 @@ and `cutoff_frequency`, as explained in <<query-dsl-match-query, match query>>.
[IMPORTANT]
[[operator-min]]
.`operator` and `minimum_should_match`
==================================================
===================================================
The `best_fields` and `most_fields` types are _field-centric_ -- they generate
a `match` query *per field*. This means that the `operator` and
@ -153,10 +153,10 @@ to match.
See <<type-cross-fields>> for a better solution.
==================================================
===================================================
[[type-most-fields]]
=== `most_fields`
==== `most_fields`
The `most_fields` type is most useful when querying multiple fields that
contain the same text analyzed in different ways. For instance, the main
@ -203,7 +203,7 @@ and `cutoff_frequency`, as explained in <<query-dsl-match-query,match query>>, b
*see <<operator-min>>*.
[[type-phrase]]
=== `phrase` and `phrase_prefix`
==== `phrase` and `phrase_prefix`
The `phrase` and `phrase_prefix` types behave just like <<type-best-fields>>,
but they use a `match_phrase` or `match_phrase_prefix` query instead of a
@ -240,7 +240,7 @@ in <<query-dsl-match-query>>. Type `phrase_prefix` additionally accepts
`max_expansions`.
[[type-cross-fields]]
=== `cross_fields`
==== `cross_fields`
The `cross_fields` type is particularly useful with structured documents where
multiple fields *should* match. For instance, when querying the `first_name`
@ -317,7 +317,7 @@ Also, accepts `analyzer`, `boost`, `operator`, `minimum_should_match`,
`zero_terms_query` and `cutoff_frequency`, as explained in
<<query-dsl-match-query, match query>>.
==== `cross_field` and analysis
===== `cross_field` and analysis
The `cross_field` type can only work in term-centric mode on fields that have
the same analyzer. Fields with the same analyzer are grouped together as in
@ -411,7 +411,7 @@ which will be executed as:
blended("will", fields: [first, first.edge, last.edge, last])
blended("smith", fields: [first, first.edge, last.edge, last])
==== `tie_breaker`
===== `tie_breaker`
By default, each per-term `blended` query will use the best score returned by
any field in a group, then these scores are added together to give the final

View File

@ -1,5 +1,5 @@
[[query-dsl-nested-query]]
== Nested Query
=== Nested Query
Nested query allows to query nested objects / docs (see
<<mapping-nested-type,nested mapping>>). The

View File

@ -1,5 +1,5 @@
[[query-dsl-not-query]]
== Not Query
=== Not Query
A query that filters out matched documents using a query. For example:

View File

@ -1,5 +1,5 @@
[[query-dsl-or-query]]
== Or Query
=== Or Query
deprecated[2.0.0, Use the `bool` query instead]

View File

@ -1,5 +1,5 @@
[[query-dsl-prefix-query]]
== Prefix Query
=== Prefix Query
Matches documents that have fields containing terms with a specified
prefix (*not analyzed*). The prefix query maps to Lucene `PrefixQuery`.

View File

@ -1,5 +1,5 @@
[[query-dsl-query-string-query]]
== Query String Query
=== Query String Query
A query that uses a query parser in order to parse its content. Here is
an example:
@ -89,7 +89,7 @@ rewritten using the
parameter.
[float]
=== Default Field
==== Default Field
When not explicitly specifying the field to search on in the query
string syntax, the `index.query.default_field` will be used to derive
@ -99,7 +99,7 @@ So, if `_all` field is disabled, it might make sense to change it to set
a different default field.
[float]
=== Multi Field
==== Multi Field
The `query_string` query can also run against multiple fields. Fields can be
provided via the `"fields"` parameter (example below).

View File

@ -1,6 +1,6 @@
[[query-string-syntax]]
=== Query string syntax
==== Query string syntax
The query string ``mini-language'' is used by the
<<query-dsl-query-string-query>> and by the
@ -14,7 +14,7 @@ phrase, in the same order.
Operators allow you to customize the search -- the available options are
explained below.
==== Field names
===== Field names
As mentioned in <<query-dsl-query-string-query>>, the `default_field` is searched for the
search terms, but it is possible to specify other fields in the query syntax:
@ -46,7 +46,7 @@ search terms, but it is possible to specify other fields in the query syntax:
_exists_:title
==== Wildcards
===== Wildcards
Wildcard searches can be run on individual terms, using `?` to replace
a single character, and `*` to replace zero or more characters:
@ -58,12 +58,12 @@ perform very badly -- just think how many terms need to be queried to
match the query string `"a* b* c*"`.
[WARNING]
======
=======
Allowing a wildcard at the beginning of a word (eg `"*ing"`) is particularly
heavy, because all terms in the index need to be examined, just in case
they match. Leading wildcards can be disabled by setting
`allow_leading_wildcard` to `false`.
======
=======
Wildcarded terms are not analyzed by default -- they are lowercased
(`lowercase_expanded_terms` defaults to `true`) but no further analysis
@ -72,7 +72,7 @@ is missing some of its letters. However, by setting `analyze_wildcard` to
`true`, an attempt will be made to analyze wildcarded words before searching
the term list for matching terms.
==== Regular expressions
===== Regular expressions
Regular expression patterns can be embedded in the query string by
wrapping them in forward-slashes (`"/"`):
@ -82,7 +82,7 @@ wrapping them in forward-slashes (`"/"`):
The supported regular expression syntax is explained in <<regexp-syntax>>.
[WARNING]
======
=======
The `allow_leading_wildcard` parameter does not have any control over
regular expressions. A query string such as the following would force
Elasticsearch to visit every term in the index:
@ -90,9 +90,9 @@ Elasticsearch to visit every term in the index:
/.*n/
Use with caution!
======
=======
==== Fuzziness
===== Fuzziness
We can search for terms that are
similar to, but not exactly like our search terms, using the ``fuzzy''
@ -112,7 +112,7 @@ sufficient to catch 80% of all human misspellings. It can be specified as:
quikc~1
==== Proximity searches
===== Proximity searches
While a phrase query (eg `"john smith"`) expects all of the terms in exactly
the same order, a proximity query allows the specified words to be further
@ -127,7 +127,7 @@ query string, the more relevant that document is considered to be. When
compared to the above example query, the phrase `"quick fox"` would be
considered more relevant than `"quick brown fox"`.
==== Ranges
===== Ranges
Ranges can be specified for date, numeric or string fields. Inclusive ranges
are specified with square brackets `[min TO max]` and exclusive ranges with
@ -168,20 +168,20 @@ Ranges with one side unbounded can use the following syntax:
age:<=10
[NOTE]
===================================================================
====================================================================
To combine an upper and lower bound with the simplified syntax, you
would need to join two clauses with an `AND` operator:
age:(>=10 AND <20)
age:(+>=10 +<20)
===================================================================
====================================================================
The parsing of ranges in query strings can be complex and error prone. It is
much more reliable to use an explicit <<query-dsl-range-query,`range` query>>.
==== Boosting
===== Boosting
Use the _boost_ operator `^` to make one term more relevant than another.
For instance, if we want to find all documents about foxes, but we are
@ -196,7 +196,7 @@ Boosts can also be applied to phrases or to groups:
"john smith"^2 (foo bar)^4
==== Boolean operators
===== Boolean operators
By default, all terms are optional, as long as one term matches. A search
for `foo bar baz` will find any document that contains one or more of
@ -256,7 +256,7 @@ would look like this:
****
==== Grouping
===== Grouping
Multiple terms or clauses can be grouped together with parentheses, to form
sub-queries:
@ -268,7 +268,7 @@ of a sub-query:
status:(active OR pending) title:(full text search)^2
==== Reserved characters
===== Reserved characters
If you need to use any of the characters which function as operators in your
query itself (and not as operators), then you should escape them with
@ -290,7 +290,7 @@ index is actually `"wifi"`. Escaping the space will protect it from
being touched by the query string parser: `"wi\ fi"`.
****
==== Empty Query
===== Empty Query
If the query string is empty or only contains whitespaces the query will
yield an empty result set.

View File

@ -0,0 +1,77 @@
[[query-filter-context]]
== Query and filter context
The behaviour of a query clause depends on whether it is used in _query context_ or
in _filter context_:
Query context::
+
--
A query clause used in query context answers the question ``__How well does this
document match this query clause?__'' Besides deciding whether or not the
document matches, the query clause also calculates a `_score` representing how
well the document matches, relative to other documents.
Query context is in effect whenever a query clause is passed to a `query` parameter,
such as the `query` parameter in the <<search-request-query,`search`>> API.
--
Filter context::
+
--
In _filter_ context, a query clause answers the question ``__Does this document
match this query clause?__'' The answer is a simple Yes or No -- no scores are
calculated. Filter context is mostly used for filtering structured data, e.g.
* __Does this +timestamp+ fall into the range 2015 to 2016?__
* __Is the +status+ field set to ++"published"++__?
Frequently used filters will be cached automatically by Elasticsearch, to
speed up performance.
Filter context is in effect whenever a query clause is passed to a `filter`
parameter, such as the `filter` or `must_not` parameters in the
<<query-dsl-bool-query,`bool`>> query, the `filter` parameter in the
<<query-dsl-constant-score-query,`constant_score`>> query, or the
<<search-aggregations-bucket-filter-aggregation,`filter`>> aggregation.
--
Below is an example of query clauses being used in query and filter context
in the `search` API. This query will match documents where all of the following
conditions are met:
* The `title` field contains the word `search`.
* The `content` field contains the word `elasticsearch`.
* The `status` field contains the exact word `published`.
* The `publish_date` field contains a date from 1 Jan 2015 onwards.
[source,json]
------------------------------------
GET _search
{
"query": { <1>
"bool": { <2>
"must": [
{ "match": { "title": "Search" }}, <2>
{ "match": { "content": "Elasticsearch" }} <2>
],
"filter": [ <3>
{ "term": { "status": "published" }}, <4>
{ "range": { "publish_date": { "gte": "2015-01-01" }}} <4>
]
}
}
}
------------------------------------
<1> The `query` parameter indicates query context.
<2> The `bool` and two `match` clauses are used in query context,
which means that they are used to score how well each document
matches.
<3> The `filter` parameter indicates filter context.
<4> The `term` and `range` clauses are used in filter context.
They will filter out documents which do not match, but they will
not affect the score for matching documents.
TIP: Use query clauses in query context for conditions which should affect the
score of matching documents (i.e. how well does the document match), and use
all other query clauses in filter context.

View File

@ -1,5 +1,5 @@
[[query-dsl-range-query]]
== Range Query
=== Range Query
Matches documents with fields that have terms within a certain range.
The type of the Lucene query depends on the field type, for `string`
@ -30,7 +30,7 @@ The `range` query accepts the following parameters:
`boost`:: Sets the boost value of the query, defaults to `1.0`
[float]
=== Date options
==== Date options
When applied on `date` fields the `range` filter accepts also a `time_zone` parameter.
The `time_zone` parameter will be applied to your input lower and upper bounds and will

View File

@ -1,5 +1,5 @@
[[query-dsl-regexp-query]]
== Regexp Query
=== Regexp Query
The `regexp` query allows you to use regular expression term queries.
See <<regexp-syntax>> for details of the supported regular expression language.

View File

@ -1,17 +1,17 @@
[[regexp-syntax]]
=== Regular expression syntax
==== Regular expression syntax
Regular expression queries are supported by the `regexp` and the `query_string`
queries. The Lucene regular expression engine
is not Perl-compatible but supports a smaller range of operators.
[NOTE]
====
=====
We will not attempt to explain regular expressions, but
just explain the supported operators.
====
=====
==== Standard operators
===== Standard operators
Anchoring::
+

View File

@ -1,5 +1,5 @@
[[query-dsl-script-query]]
== Script Query
=== Script Query
A query allowing to define
<<modules-scripting,scripts>> as filters. For
@ -20,7 +20,7 @@ example:
----------------------------------------------
[float]
=== Custom Parameters
==== Custom Parameters
Scripts are compiled and cached for faster execution. If the same script
can be used, just with different parameters provider, it is preferable
@ -34,9 +34,11 @@ to use the ability to pass parameters to the script itself, for example:
},
"filter" : {
"script" : {
"script" : "doc['num1'].value > param1"
"params" : {
"param1" : 5
"script" : {
"inline" : "doc['num1'].value > param1"
"params" : {
"param1" : 5
}
}
}
}

View File

@ -1,5 +1,5 @@
[[query-dsl-simple-query-string-query]]
== Simple Query String Query
=== Simple Query String Query
A query that uses the SimpleQueryParser to parse its context. Unlike the
regular `query_string` query, the `simple_query_string` query will never
@ -57,7 +57,7 @@ Defaults to `ROOT`.
|=======================================================================
[float]
==== Simple Query String Syntax
===== Simple Query String Syntax
The `simple_query_string` supports the following special characters:
* `+` signifies AND operation
@ -73,7 +73,7 @@ In order to search for any of these special characters, they will need to
be escaped with `\`.
[float]
=== Default Field
==== Default Field
When not explicitly specifying the field to search on in the query
string syntax, the `index.query.default_field` will be used to derive
which field to search on. It defaults to `_all` field.
@ -82,7 +82,7 @@ So, if `_all` field is disabled, it might make sense to change it to set
a different default field.
[float]
=== Multi Field
==== Multi Field
The fields parameter can also include pattern based field names,
allowing to automatically expand to the relevant fields (dynamically
introduced fields included). For example:
@ -98,7 +98,7 @@ introduced fields included). For example:
--------------------------------------------------
[float]
=== Flags
==== Flags
`simple_query_string` support multiple flags to specify which parsing features
should be enabled. It is specified as a `|`-delimited string with the
`flags` parameter:

View File

@ -1,5 +1,5 @@
[[query-dsl-span-containing-query]]
== Span Containing Query
=== Span Containing Query
Returns matches which enclose another span query. The span containing
query maps to Lucene `SpanContainingQuery`. Here is an example:

View File

@ -1,5 +1,5 @@
[[query-dsl-span-first-query]]
== Span First Query
=== Span First Query
Matches spans near the beginning of a field. The span first query maps
to Lucene `SpanFirstQuery`. Here is an example:

View File

@ -1,5 +1,5 @@
[[query-dsl-span-multi-term-query]]
== Span Multi Term Query
=== Span Multi Term Query
The `span_multi` query allows you to wrap a `multi term query` (one of wildcard,
fuzzy, prefix, term, range or regexp query) as a `span query`, so

View File

@ -1,5 +1,5 @@
[[query-dsl-span-near-query]]
== Span Near Query
=== Span Near Query
Matches spans which are near one another. One can specify _slop_, the
maximum number of intervening unmatched positions, as well as whether

View File

@ -1,5 +1,5 @@
[[query-dsl-span-not-query]]
== Span Not Query
=== Span Not Query
Removes matches which overlap with another span query. The span not
query maps to Lucene `SpanNotQuery`. Here is an example:

Some files were not shown because too many files have changed in this diff Show More