Merge branch 'master' into require_units
Conflicts: src/main/java/org/elasticsearch/action/bulk/BulkRequest.java src/main/java/org/elasticsearch/cluster/metadata/MetaDataIndexUpgradeService.java src/main/java/org/elasticsearch/node/internal/InternalSettingsPreparer.java src/test/java/org/elasticsearch/snapshots/DedicatedClusterSnapshotRestoreTests.java
This commit is contained in:
commit
e1197dfea9
|
@ -37,4 +37,4 @@ eclipse-build
|
|||
nb-configuration.xml
|
||||
nbactions.xml
|
||||
|
||||
/dependency-reduced-pom.xml
|
||||
dependency-reduced-pom.xml
|
||||
|
|
|
@ -57,7 +57,7 @@
|
|||
# Maven will replace the project.name with elasticsearch below. If that
|
||||
# hasn't been done, we assume that this is not a packaged version and the
|
||||
# user has forgotten to run Maven to create a package.
|
||||
IS_PACKAGED_VERSION='${project.name}'
|
||||
IS_PACKAGED_VERSION='${project.artifactId}'
|
||||
if [ "$IS_PACKAGED_VERSION" != "elasticsearch" ]; then
|
||||
cat >&2 << EOF
|
||||
Error: You must build the project with Maven or download a pre-built package
|
||||
|
|
|
@ -103,4 +103,6 @@ if [ -e "$CONF_FILE" ]; then
|
|||
esac
|
||||
fi
|
||||
|
||||
export HOSTNAME=`hostname -s`
|
||||
|
||||
exec "$JAVA" $JAVA_OPTS $ES_JAVA_OPTS -Xmx64m -Xms16m -Delasticsearch -Des.path.home="$ES_HOME" $properties -cp "$ES_HOME/lib/*" org.elasticsearch.plugins.PluginManager $args
|
||||
|
|
|
@ -9,6 +9,8 @@ for %%I in ("%SCRIPT_DIR%..") do set ES_HOME=%%~dpfI
|
|||
|
||||
TITLE Elasticsearch Plugin Manager ${project.version}
|
||||
|
||||
SET HOSTNAME=%COMPUTERNAME%
|
||||
|
||||
"%JAVA_HOME%\bin\java" %JAVA_OPTS% %ES_JAVA_OPTS% -Xmx64m -Xms16m -Des.path.home="%ES_HOME%" -cp "%ES_HOME%/lib/*;" "org.elasticsearch.plugins.PluginManager" %*
|
||||
goto finally
|
||||
|
||||
|
|
|
@ -4,6 +4,10 @@ rootLogger: ${es.logger.level}, console, file
|
|||
logger:
|
||||
# log action execution errors for easier debugging
|
||||
action: DEBUG
|
||||
|
||||
# deprecation logging, turn to DEBUG to see them
|
||||
deprecation: INFO, deprecation_log_file
|
||||
|
||||
# reduce the logging for aws, too much is logged under the default INFO
|
||||
com.amazonaws: WARN
|
||||
org.apache.http: INFO
|
||||
|
@ -24,6 +28,7 @@ logger:
|
|||
additivity:
|
||||
index.search.slowlog: false
|
||||
index.indexing.slowlog: false
|
||||
deprecation: false
|
||||
|
||||
appender:
|
||||
console:
|
||||
|
@ -51,6 +56,14 @@ appender:
|
|||
#type: pattern
|
||||
#conversionPattern: "[%d{ISO8601}][%-5p][%-25c] %m%n"
|
||||
|
||||
deprecation_log_file:
|
||||
type: dailyRollingFile
|
||||
file: ${path.logs}/${cluster.name}_deprecation.log
|
||||
datePattern: "'.'yyyy-MM-dd"
|
||||
layout:
|
||||
type: pattern
|
||||
conversionPattern: "[%d{ISO8601}][%-5p][%-25c] %m%n"
|
||||
|
||||
index_search_slow_log_file:
|
||||
type: dailyRollingFile
|
||||
file: ${path.logs}/${cluster.name}_index_search_slowlog.log
|
||||
|
|
|
@ -30,6 +30,7 @@ import socket
|
|||
import urllib.request
|
||||
import subprocess
|
||||
|
||||
from functools import partial
|
||||
from http.client import HTTPConnection
|
||||
from http.client import HTTPSConnection
|
||||
|
||||
|
@ -72,6 +73,11 @@ PLUGINS = [('license', 'elasticsearch/license/latest'),
|
|||
|
||||
LOG = env.get('ES_RELEASE_LOG', '/tmp/elasticsearch_release.log')
|
||||
|
||||
# console colors
|
||||
COLOR_OK = '\033[92m'
|
||||
COLOR_END = '\033[0m'
|
||||
COLOR_FAIL = '\033[91m'
|
||||
|
||||
def log(msg):
|
||||
log_plain('\n%s' % msg)
|
||||
|
||||
|
@ -137,9 +143,6 @@ def get_tag_hash(tag):
|
|||
def get_current_branch():
|
||||
return os.popen('git rev-parse --abbrev-ref HEAD 2>&1').read().strip()
|
||||
|
||||
verify_java_version('1.7') # we require to build with 1.7
|
||||
verify_mvn_java_version('1.7', MVN)
|
||||
|
||||
# Utility that returns the name of the release branch for a given version
|
||||
def release_branch(version):
|
||||
return 'release_branch_%s' % version
|
||||
|
@ -545,14 +548,6 @@ def print_sonatype_notice():
|
|||
</settings>
|
||||
""")
|
||||
|
||||
def check_s3_credentials():
|
||||
if not env.get('AWS_ACCESS_KEY_ID', None) or not env.get('AWS_SECRET_ACCESS_KEY', None):
|
||||
raise RuntimeError('Could not find "AWS_ACCESS_KEY_ID" / "AWS_SECRET_ACCESS_KEY" in the env variables please export in order to upload to S3')
|
||||
|
||||
def check_gpg_credentials():
|
||||
if not env.get('GPG_KEY_ID', None) or not env.get('GPG_PASSPHRASE', None):
|
||||
raise RuntimeError('Could not find "GPG_KEY_ID" / "GPG_PASSPHRASE" in the env variables please export in order to sign the packages (also make sure that GPG_KEYRING is set when not in ~/.gnupg)')
|
||||
|
||||
def check_command_exists(name, cmd):
|
||||
try:
|
||||
subprocess.check_output(cmd, shell=True, stderr=subprocess.STDOUT)
|
||||
|
@ -562,9 +557,6 @@ def check_command_exists(name, cmd):
|
|||
VERSION_FILE = 'src/main/java/org/elasticsearch/Version.java'
|
||||
POM_FILE = 'pom.xml'
|
||||
|
||||
# we print a notice if we can not find the relevant infos in the ~/.m2/settings.xml
|
||||
print_sonatype_notice()
|
||||
|
||||
# finds the highest available bwc version to test against
|
||||
def find_bwc_version(release_version, bwc_dir='backwards'):
|
||||
log(' Lookup bwc version in directory [%s]' % bwc_dir)
|
||||
|
@ -618,6 +610,60 @@ def check_norelease(path='src'):
|
|||
if pattern.search(line):
|
||||
raise RuntimeError('Found //norelease comment in %s line %s' % (full_path, line_number))
|
||||
|
||||
def run_and_print(text, run_function):
|
||||
try:
|
||||
print(text, end='')
|
||||
run_function()
|
||||
print(COLOR_OK + 'OK' + COLOR_END)
|
||||
return True
|
||||
except RuntimeError:
|
||||
print(COLOR_FAIL + 'NOT OK' + COLOR_END)
|
||||
return False
|
||||
|
||||
def check_env_var(text, env_var):
|
||||
try:
|
||||
print(text, end='')
|
||||
env[env_var]
|
||||
print(COLOR_OK + 'OK' + COLOR_END)
|
||||
return True
|
||||
except KeyError:
|
||||
print(COLOR_FAIL + 'NOT OK' + COLOR_END)
|
||||
return False
|
||||
|
||||
def check_environment_and_commandline_tools(check_only):
|
||||
checks = list()
|
||||
checks.append(check_env_var('Checking for AWS env configuration AWS_SECRET_ACCESS_KEY_ID... ', 'AWS_SECRET_ACCESS_KEY'))
|
||||
checks.append(check_env_var('Checking for AWS env configuration AWS_ACCESS_KEY_ID... ', 'AWS_ACCESS_KEY_ID'))
|
||||
checks.append(check_env_var('Checking for SONATYPE env configuration SONATYPE_USERNAME... ', 'SONATYPE_USERNAME'))
|
||||
checks.append(check_env_var('Checking for SONATYPE env configuration SONATYPE_PASSWORD... ', 'SONATYPE_PASSWORD'))
|
||||
checks.append(check_env_var('Checking for GPG env configuration GPG_KEY_ID... ', 'GPG_KEY_ID'))
|
||||
checks.append(check_env_var('Checking for GPG env configuration GPG_PASSPHRASE... ', 'GPG_PASSPHRASE'))
|
||||
checks.append(check_env_var('Checking for S3 repo upload env configuration S3_BUCKET_SYNC_TO... ', 'S3_BUCKET_SYNC_TO'))
|
||||
checks.append(check_env_var('Checking for git env configuration GIT_AUTHOR_NAME... ', 'GIT_AUTHOR_NAME'))
|
||||
checks.append(check_env_var('Checking for git env configuration GIT_AUTHOR_EMAIL... ', 'GIT_AUTHOR_EMAIL'))
|
||||
|
||||
checks.append(run_and_print('Checking command: rpm... ', partial(check_command_exists, 'rpm', 'rpm --version')))
|
||||
checks.append(run_and_print('Checking command: dpkg... ', partial(check_command_exists, 'dpkg', 'dpkg --version')))
|
||||
checks.append(run_and_print('Checking command: gpg... ', partial(check_command_exists, 'gpg', 'gpg --version')))
|
||||
checks.append(run_and_print('Checking command: expect... ', partial(check_command_exists, 'expect', 'expect -v')))
|
||||
checks.append(run_and_print('Checking command: createrepo... ', partial(check_command_exists, 'createrepo', 'createrepo --version')))
|
||||
checks.append(run_and_print('Checking command: s3cmd... ', partial(check_command_exists, 's3cmd', 's3cmd --version')))
|
||||
checks.append(run_and_print('Checking command: apt-ftparchive... ', partial(check_command_exists, 'apt-ftparchive', 'apt-ftparchive --version')))
|
||||
|
||||
# boto, check error code being returned
|
||||
location = os.path.dirname(os.path.realpath(__file__))
|
||||
command = 'python %s/upload-s3.py -h' % (location)
|
||||
checks.append(run_and_print('Testing boto python dependency... ', partial(check_command_exists, 'python-boto', command)))
|
||||
|
||||
checks.append(run_and_print('Checking java version... ', partial(verify_java_version, '1.7')))
|
||||
checks.append(run_and_print('Checking java mvn version... ', partial(verify_mvn_java_version, '1.7', MVN)))
|
||||
|
||||
if check_only:
|
||||
sys.exit(0)
|
||||
|
||||
if False in checks:
|
||||
print("Exiting due to failing checks")
|
||||
sys.exit(0)
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(description='Builds and publishes a Elasticsearch Release')
|
||||
|
@ -636,9 +682,12 @@ if __name__ == '__main__':
|
|||
help='Smoke tests the given release')
|
||||
parser.add_argument('--bwc', '-w', dest='bwc', metavar='backwards', default='backwards',
|
||||
help='Backwards compatibility version path to use to run compatibility tests against')
|
||||
parser.add_argument('--check-only', dest='check_only', action='store_true',
|
||||
help='Checks and reports for all requirements and then exits')
|
||||
|
||||
parser.set_defaults(dryrun=True)
|
||||
parser.set_defaults(smoke=None)
|
||||
parser.set_defaults(check_only=False)
|
||||
args = parser.parse_args()
|
||||
bwc_path = args.bwc
|
||||
src_branch = args.branch
|
||||
|
@ -649,18 +698,19 @@ if __name__ == '__main__':
|
|||
build = not args.smoke
|
||||
smoke_test_version = args.smoke
|
||||
|
||||
check_environment_and_commandline_tools(args.check_only)
|
||||
|
||||
# we print a notice if we can not find the relevant infos in the ~/.m2/settings.xml
|
||||
print_sonatype_notice()
|
||||
|
||||
# we require to build with 1.7
|
||||
verify_java_version('1.7')
|
||||
verify_mvn_java_version('1.7', MVN)
|
||||
|
||||
if os.path.exists(LOG):
|
||||
raise RuntimeError('please remove old release log %s first' % LOG)
|
||||
|
||||
check_gpg_credentials()
|
||||
check_command_exists('gpg', 'gpg --version')
|
||||
check_command_exists('expect', 'expect -v')
|
||||
|
||||
if not dry_run:
|
||||
check_s3_credentials()
|
||||
check_command_exists('createrepo', 'createrepo --version')
|
||||
check_command_exists('s3cmd', 's3cmd --version')
|
||||
check_command_exists('apt-ftparchive', 'apt-ftparchive --version')
|
||||
print('WARNING: dryrun is set to "false" - this will push and publish the release')
|
||||
input('Press Enter to continue...')
|
||||
|
||||
|
|
|
@ -33,6 +33,7 @@ my @Groups = qw(
|
|||
);
|
||||
my %Group_Labels = (
|
||||
breaking => 'Breaking changes',
|
||||
build => 'Build',
|
||||
deprecation => 'Deprecations',
|
||||
doc => 'Docs',
|
||||
feature => 'New features',
|
||||
|
@ -70,6 +71,14 @@ sub dump_issues {
|
|||
$month++;
|
||||
$year += 1900;
|
||||
|
||||
print <<"HTML";
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
</head>
|
||||
<body>
|
||||
HTML
|
||||
|
||||
for my $group ( @Groups, 'other' ) {
|
||||
my $group_issues = $issues->{$group} or next;
|
||||
print "<h2>$Group_Labels{$group}</h2>\n\n<ul>\n";
|
||||
|
@ -115,6 +124,7 @@ sub dump_issues {
|
|||
print "</ul>";
|
||||
print "\n\n";
|
||||
}
|
||||
print "</body></html>\n";
|
||||
}
|
||||
|
||||
#===================================
|
||||
|
|
|
@ -30,10 +30,10 @@ MetricsAggregationBuilder aggregation =
|
|||
AggregationBuilders
|
||||
.scriptedMetric("agg")
|
||||
.initScript("_agg['heights'] = []")
|
||||
.mapScript("if (doc['gender'].value == \"male\") " +
|
||||
.mapScript(new Script("if (doc['gender'].value == \"male\") " +
|
||||
"{ _agg.heights.add(doc['height'].value) } " +
|
||||
"else " +
|
||||
"{ _agg.heights.add(-1 * doc['height'].value) }");
|
||||
"{ _agg.heights.add(-1 * doc['height'].value) }"));
|
||||
--------------------------------------------------
|
||||
|
||||
You can also specify a `combine` script which will be executed on each shard:
|
||||
|
@ -43,12 +43,12 @@ You can also specify a `combine` script which will be executed on each shard:
|
|||
MetricsAggregationBuilder aggregation =
|
||||
AggregationBuilders
|
||||
.scriptedMetric("agg")
|
||||
.initScript("_agg['heights'] = []")
|
||||
.mapScript("if (doc['gender'].value == \"male\") " +
|
||||
.initScript(new Script("_agg['heights'] = []"))
|
||||
.mapScript(new Script("if (doc['gender'].value == \"male\") " +
|
||||
"{ _agg.heights.add(doc['height'].value) } " +
|
||||
"else " +
|
||||
"{ _agg.heights.add(-1 * doc['height'].value) }")
|
||||
.combineScript("heights_sum = 0; for (t in _agg.heights) { heights_sum += t }; return heights_sum");
|
||||
"{ _agg.heights.add(-1 * doc['height'].value) }"))
|
||||
.combineScript(new Script("heights_sum = 0; for (t in _agg.heights) { heights_sum += t }; return heights_sum"));
|
||||
--------------------------------------------------
|
||||
|
||||
You can also specify a `reduce` script which will be executed on the node which gets the request:
|
||||
|
@ -58,13 +58,13 @@ You can also specify a `reduce` script which will be executed on the node which
|
|||
MetricsAggregationBuilder aggregation =
|
||||
AggregationBuilders
|
||||
.scriptedMetric("agg")
|
||||
.initScript("_agg['heights'] = []")
|
||||
.mapScript("if (doc['gender'].value == \"male\") " +
|
||||
.initScript(new Script("_agg['heights'] = []"))
|
||||
.mapScript(new Script("if (doc['gender'].value == \"male\") " +
|
||||
"{ _agg.heights.add(doc['height'].value) } " +
|
||||
"else " +
|
||||
"{ _agg.heights.add(-1 * doc['height'].value) }")
|
||||
.combineScript("heights_sum = 0; for (t in _agg.heights) { heights_sum += t }; return heights_sum")
|
||||
.reduceScript("heights_sum = 0; for (a in _aggs) { heights_sum += a }; return heights_sum");
|
||||
"{ _agg.heights.add(-1 * doc['height'].value) }"))
|
||||
.combineScript(new Script("heights_sum = 0; for (t in _agg.heights) { heights_sum += t }; return heights_sum"))
|
||||
.reduceScript(new Script("heights_sum = 0; for (a in _aggs) { heights_sum += a }; return heights_sum"));
|
||||
--------------------------------------------------
|
||||
|
||||
|
||||
|
|
|
@ -22,7 +22,7 @@ Or you can use `prepareUpdate()` method:
|
|||
[source,java]
|
||||
--------------------------------------------------
|
||||
client.prepareUpdate("ttl", "doc", "1")
|
||||
.setScript("ctx._source.gender = \"male\"" <1> , ScriptService.ScriptType.INLINE)
|
||||
.setScript(new Script("ctx._source.gender = \"male\"" <1> , ScriptService.ScriptType.INLINE, null, null))
|
||||
.get();
|
||||
|
||||
client.prepareUpdate("ttl", "doc", "1")
|
||||
|
@ -46,7 +46,7 @@ The update API allows to update a document based on a script provided:
|
|||
[source,java]
|
||||
--------------------------------------------------
|
||||
UpdateRequest updateRequest = new UpdateRequest("ttl", "doc", "1")
|
||||
.script("ctx._source.gender = \"male\"");
|
||||
.script(new Script("ctx._source.gender = \"male\""));
|
||||
client.update(updateRequest).get();
|
||||
--------------------------------------------------
|
||||
|
||||
|
|
|
@ -73,8 +73,6 @@ Some aggregations work on values extracted from the aggregated documents. Typica
|
|||
a specific document field which is set using the `field` key for the aggregations. It is also possible to define a
|
||||
<<modules-scripting,`script`>> which will generate the values (per document).
|
||||
|
||||
TIP: The `script` parameter expects an inline script. Use `script_id` for indexed scripts and `script_file` for scripts in the `config/scripts/` directory.
|
||||
|
||||
When both `field` and `script` settings are configured for the aggregation, the script will be treated as a
|
||||
`value script`. While normal scripts are evaluated on a document level (i.e. the script has access to all the data
|
||||
associated with the document), value scripts are evaluated on the *value* level. In this mode, the values are extracted
|
||||
|
|
|
@ -128,8 +128,6 @@ It is also possible to customize the key for each range:
|
|||
|
||||
==== Script
|
||||
|
||||
TIP: The `script` parameter expects an inline script. Use `script_id` for indexed scripts and `script_file` for scripts in the `config/scripts/` directory.
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
|
@ -148,6 +146,33 @@ TIP: The `script` parameter expects an inline script. Use `script_id` for indexe
|
|||
}
|
||||
--------------------------------------------------
|
||||
|
||||
This will interpret the `script` parameter as an `inline` script with the default script language and no script parameters. To use a file script use the following syntax:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"aggs" : {
|
||||
"price_ranges" : {
|
||||
"range" : {
|
||||
"script" : {
|
||||
"file": "my_script",
|
||||
"params": {
|
||||
"field": "price"
|
||||
}
|
||||
},
|
||||
"ranges" : [
|
||||
{ "to" : 50 },
|
||||
{ "from" : 50, "to" : 100 },
|
||||
{ "from" : 100 }
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
TIP: for indexed scripts replace the `file` parameter with an `id` parameter.
|
||||
|
||||
==== Value Script
|
||||
|
||||
Lets say the product prices are in USD but we would like to get the price ranges in EURO. We can use value script to convert the prices prior the aggregation (assuming conversion rate of 0.8)
|
||||
|
|
|
@ -358,13 +358,6 @@ Customized scores can be implemented via a script:
|
|||
--------------------------------------------------
|
||||
|
||||
Scripts can be inline (as in above example), indexed or stored on disk. For details on the options, see <<modules-scripting, script documentation>>.
|
||||
Parameters need to be set as follows:
|
||||
|
||||
[horizontal]
|
||||
`script`:: Inline script, name of script file or name of indexed script. Mandatory.
|
||||
`script_type`:: One of "inline" (default), "indexed" or "file".
|
||||
`lang`:: Script language (default "groovy")
|
||||
`params`:: Script parameters (default empty).
|
||||
|
||||
Available parameters in the script are
|
||||
|
||||
|
|
|
@ -441,7 +441,27 @@ Generating the terms using a script:
|
|||
}
|
||||
--------------------------------------------------
|
||||
|
||||
TIP: The `script` parameter expects an inline script. Use `script_id` for indexed scripts and `script_file` for scripts in the `config/scripts/` directory.
|
||||
This will interpret the `script` parameter as an `inline` script with the default script language and no script parameters. To use a file script use the following syntax:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"aggs" : {
|
||||
"genders" : {
|
||||
"terms" : {
|
||||
"script" : {
|
||||
"file": "my_script",
|
||||
"params": {
|
||||
"field": "gender"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
TIP: for indexed scripts replace the `file` parameter with an `id` parameter.
|
||||
|
||||
|
||||
==== Value Script
|
||||
|
|
|
@ -47,7 +47,29 @@ Computing the average grade based on a script:
|
|||
}
|
||||
--------------------------------------------------
|
||||
|
||||
TIP: The `script` parameter expects an inline script. Use `script_id` for indexed scripts and `script_file` for scripts in the `config/scripts/` directory.
|
||||
This will interpret the `script` parameter as an `inline` script with the default script language and no script parameters. To use a file script use the following syntax:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
...,
|
||||
|
||||
"aggs" : {
|
||||
"avg_grade" : {
|
||||
"avg" : {
|
||||
"script" : {
|
||||
"file": "my_script",
|
||||
"params": {
|
||||
"field": "grade"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
TIP: for indexed scripts replace the `file` parameter with an `id` parameter.
|
||||
|
||||
===== Value Script
|
||||
|
||||
|
@ -63,9 +85,11 @@ It turned out that the exam was way above the level of the students and a grade
|
|||
"avg_corrected_grade" : {
|
||||
"avg" : {
|
||||
"field" : "grade",
|
||||
"script" : "_value * correction",
|
||||
"params" : {
|
||||
"correction" : 1.2
|
||||
"script" : {
|
||||
"inline": "_value * correction",
|
||||
"params" : {
|
||||
"correction" : 1.2
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -153,7 +153,28 @@ however since hashes need to be computed on the fly.
|
|||
}
|
||||
--------------------------------------------------
|
||||
|
||||
TIP: The `script` parameter expects an inline script. Use `script_id` for indexed scripts and `script_file` for scripts in the `config/scripts/` directory.
|
||||
This will interpret the `script` parameter as an `inline` script with the default script language and no script parameters. To use a file script use the following syntax:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"aggs" : {
|
||||
"author_count" : {
|
||||
"cardinality" : {
|
||||
"script" : {
|
||||
"file": "my_script",
|
||||
"params": {
|
||||
"first_name_field": "author.first_name",
|
||||
"last_name_field": "author.last_name"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
TIP: for indexed scripts replace the `file` parameter with an `id` parameter.
|
||||
|
||||
==== Missing value
|
||||
|
||||
|
|
|
@ -91,7 +91,29 @@ Computing the grades stats based on a script:
|
|||
}
|
||||
--------------------------------------------------
|
||||
|
||||
TIP: The `script` parameter expects an inline script. Use `script_id` for indexed scripts and `script_file` for scripts in the `config/scripts/` directory.
|
||||
This will interpret the `script` parameter as an `inline` script with the default script language and no script parameters. To use a file script use the following syntax:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
...,
|
||||
|
||||
"aggs" : {
|
||||
"grades_stats" : {
|
||||
"extended_stats" : {
|
||||
"script" : {
|
||||
"file": "my_script",
|
||||
"params": {
|
||||
"field": "grade"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
TIP: for indexed scripts replace the `file` parameter with an `id` parameter.
|
||||
|
||||
===== Value Script
|
||||
|
||||
|
@ -107,9 +129,11 @@ It turned out that the exam was way above the level of the students and a grade
|
|||
"grades_stats" : {
|
||||
"extended_stats" : {
|
||||
"field" : "grade",
|
||||
"script" : "_value * correction",
|
||||
"params" : {
|
||||
"correction" : 1.2
|
||||
"script" : {
|
||||
"inline": "_value * correction",
|
||||
"params" : {
|
||||
"correction" : 1.2
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -44,7 +44,27 @@ Computing the max price value across all document, this time using a script:
|
|||
}
|
||||
--------------------------------------------------
|
||||
|
||||
TIP: The `script` parameter expects an inline script. Use `script_id` for indexed scripts and `script_file` for scripts in the `config/scripts/` directory.
|
||||
This will interpret the `script` parameter as an `inline` script with the default script language and no script parameters. To use a file script use the following syntax:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"aggs" : {
|
||||
"max_price" : {
|
||||
"max" : {
|
||||
"script" : {
|
||||
"file": "my_script",
|
||||
"params": {
|
||||
"field": "price"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
TIP: for indexed scripts replace the `file` parameter with an `id` parameter.
|
||||
|
||||
==== Value Script
|
||||
|
||||
|
@ -57,9 +77,11 @@ Let's say that the prices of the documents in our index are in USD, but we would
|
|||
"max_price_in_euros" : {
|
||||
"max" : {
|
||||
"field" : "price",
|
||||
"script" : "_value * conversion_rate",
|
||||
"params" : {
|
||||
"conversion_rate" : 1.2
|
||||
"script" : {
|
||||
"inline": "_value * conversion_rate",
|
||||
"params" : {
|
||||
"conversion_rate" : 1.2
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -44,7 +44,27 @@ Computing the min price value across all document, this time using a script:
|
|||
}
|
||||
--------------------------------------------------
|
||||
|
||||
TIP: The `script` parameter expects an inline script. Use `script_id` for indexed scripts and `script_file` for scripts in the `config/scripts/` directory.
|
||||
This will interpret the `script` parameter as an `inline` script with the default script language and no script parameters. To use a file script use the following syntax:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"aggs" : {
|
||||
"min_price" : {
|
||||
"min" : {
|
||||
"script" : {
|
||||
"file": "my_script",
|
||||
"params": {
|
||||
"field": "price"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
TIP: for indexed scripts replace the `file` parameter with an `id` parameter.
|
||||
|
||||
==== Value Script
|
||||
|
||||
|
@ -57,9 +77,11 @@ Let's say that the prices of the documents in our index are in USD, but we would
|
|||
"min_price_in_euros" : {
|
||||
"min" : {
|
||||
"field" : "price",
|
||||
"script" : "_value * conversion_rate",
|
||||
"params" : {
|
||||
"conversion_rate" : 1.2
|
||||
"script" :
|
||||
"inline": "_value * conversion_rate",
|
||||
"params" : {
|
||||
"conversion_rate" : 1.2
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -100,9 +100,11 @@ a script to convert them on-the-fly:
|
|||
"aggs" : {
|
||||
"load_time_outlier" : {
|
||||
"percentiles" : {
|
||||
"script" : "doc['load_time'].value / timeUnit", <1>
|
||||
"params" : {
|
||||
"timeUnit" : 1000 <2>
|
||||
"script" : {
|
||||
"inline": "doc['load_time'].value / timeUnit", <1>
|
||||
"params" : {
|
||||
"timeUnit" : 1000 <2>
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -113,7 +115,27 @@ a script to convert them on-the-fly:
|
|||
script to generate values which percentiles are calculated on
|
||||
<2> Scripting supports parameterized input just like any other script
|
||||
|
||||
TIP: The `script` parameter expects an inline script. Use `script_id` for indexed scripts and `script_file` for scripts in the `config/scripts/` directory.
|
||||
This will interpret the `script` parameter as an `inline` script with the default script language and no script parameters. To use a file script use the following syntax:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"aggs" : {
|
||||
"load_time_outlier" : {
|
||||
"percentiles" : {
|
||||
"script" : {
|
||||
"file": "my_script",
|
||||
"params" : {
|
||||
"timeUnit" : 1000
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
TIP: for indexed scripts replace the `file` parameter with an `id` parameter.
|
||||
|
||||
[[search-aggregations-metrics-percentile-aggregation-approximation]]
|
||||
==== Percentiles are (usually) approximate
|
||||
|
|
|
@ -72,9 +72,11 @@ a script to convert them on-the-fly:
|
|||
"load_time_outlier" : {
|
||||
"percentile_ranks" : {
|
||||
"values" : [3, 5],
|
||||
"script" : "doc['load_time'].value / timeUnit", <1>
|
||||
"params" : {
|
||||
"timeUnit" : 1000 <2>
|
||||
"script" : {
|
||||
"inline": "doc['load_time'].value / timeUnit", <1>
|
||||
"params" : {
|
||||
"timeUnit" : 1000 <2>
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -85,7 +87,28 @@ a script to convert them on-the-fly:
|
|||
script to generate values which percentile ranks are calculated on
|
||||
<2> Scripting supports parameterized input just like any other script
|
||||
|
||||
TIP: The `script` parameter expects an inline script. Use `script_id` for indexed scripts and `script_file` for scripts in the `config/scripts/` directory.
|
||||
This will interpret the `script` parameter as an `inline` script with the default script language and no script parameters. To use a file script use the following syntax:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"aggs" : {
|
||||
"load_time_outlier" : {
|
||||
"percentile_ranks" : {
|
||||
"values" : [3, 5],
|
||||
"script" : {
|
||||
"file": "my_script",
|
||||
"params" : {
|
||||
"timeUnit" : 1000
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
TIP: for indexed scripts replace the `file` parameter with an `id` parameter.
|
||||
|
||||
==== Missing value
|
||||
|
||||
|
@ -108,3 +131,4 @@ had a value.
|
|||
--------------------------------------------------
|
||||
|
||||
<1> Documents without a value in the `grade` field will fall into the same bucket as documents that have the value `10`.
|
||||
|
||||
|
|
|
@ -45,6 +45,42 @@ The response for the above aggregation:
|
|||
}
|
||||
--------------------------------------------------
|
||||
|
||||
The above example can also be specified using file scripts as follows:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"query" : {
|
||||
"match_all" : {}
|
||||
},
|
||||
"aggs": {
|
||||
"profit": {
|
||||
"scripted_metric": {
|
||||
"init_script" : {
|
||||
"file": "my_init_script"
|
||||
},
|
||||
"map_script" : {
|
||||
"file": "my_map_script"
|
||||
},
|
||||
"combine_script" : {
|
||||
"file": "my_combine_script"
|
||||
},
|
||||
"params": {
|
||||
"field": "amount" <1>
|
||||
},
|
||||
"reduce_script" : {
|
||||
"file": "my_reduce_script"
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
<1> script parameters for init, map and combine scripts must be specified in a global `params` object so that it can be share between the scripts
|
||||
|
||||
For more details on specifying scripts see <<modules-scripting, script documentation>>.
|
||||
|
||||
==== Scope of scripts
|
||||
|
||||
The scripted metric aggregation uses scripts at 4 stages of its execution:
|
||||
|
@ -225,13 +261,4 @@ params:: Optional. An object whose contents will be passed as variable
|
|||
--------------------------------------------------
|
||||
reduce_params:: Optional. An object whose contents will be passed as variables to the `reduce_script`. This can be useful to allow the user to control
|
||||
the behavior of the reduce phase. If this is not specified the variable will be undefined in the reduce_script execution.
|
||||
lang:: Optional. The script language used for the scripts. If this is not specified the default scripting language is used.
|
||||
init_script_file:: Optional. Can be used in place of the `init_script` parameter to provide the script using in a file.
|
||||
init_script_id:: Optional. Can be used in place of the `init_script` parameter to provide the script using an indexed script.
|
||||
map_script_file:: Optional. Can be used in place of the `map_script` parameter to provide the script using in a file.
|
||||
map_script_id:: Optional. Can be used in place of the `map_script` parameter to provide the script using an indexed script.
|
||||
combine_script_file:: Optional. Can be used in place of the `combine_script` parameter to provide the script using in a file.
|
||||
combine_script_id:: Optional. Can be used in place of the `combine_script` parameter to provide the script using an indexed script.
|
||||
reduce_script_file:: Optional. Can be used in place of the `reduce_script` parameter to provide the script using in a file.
|
||||
reduce_script_id:: Optional. Can be used in place of the `reduce_script` parameter to provide the script using an indexed script.
|
||||
|
||||
|
|
|
@ -53,7 +53,29 @@ Computing the grades stats based on a script:
|
|||
}
|
||||
--------------------------------------------------
|
||||
|
||||
TIP: The `script` parameter expects an inline script. Use `script_id` for indexed scripts and `script_file` for scripts in the `config/scripts/` directory.
|
||||
This will interpret the `script` parameter as an `inline` script with the default script language and no script parameters. To use a file script use the following syntax:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
...,
|
||||
|
||||
"aggs" : {
|
||||
"grades_stats" : {
|
||||
"stats" : {
|
||||
"script" : {
|
||||
"file": "my_script",
|
||||
"params" : {
|
||||
"field" : "grade"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
TIP: for indexed scripts replace the `file` parameter with an `id` parameter.
|
||||
|
||||
===== Value Script
|
||||
|
||||
|
@ -69,9 +91,11 @@ It turned out that the exam was way above the level of the students and a grade
|
|||
"grades_stats" : {
|
||||
"stats" : {
|
||||
"field" : "grade",
|
||||
"script" : "_value * correction",
|
||||
"params" : {
|
||||
"correction" : 1.2
|
||||
"script" :
|
||||
"inline": "_value * correction",
|
||||
"params" : {
|
||||
"correction" : 1.2
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -55,7 +55,29 @@ Computing the intraday return based on a script:
|
|||
}
|
||||
--------------------------------------------------
|
||||
|
||||
TIP: The `script` parameter expects an inline script. Use `script_id` for indexed scripts and `script_file` for scripts in the `config/scripts/` directory.
|
||||
This will interpret the `script` parameter as an `inline` script with the default script language and no script parameters. To use a file script use the following syntax:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
...,
|
||||
|
||||
"aggs" : {
|
||||
"intraday_return" : {
|
||||
"sum" : {
|
||||
"script" : {
|
||||
"file": "my_script",
|
||||
"params" : {
|
||||
"field" : "change"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
TIP: for indexed scripts replace the `file` parameter with an `id` parameter.
|
||||
|
||||
===== Value Script
|
||||
|
||||
|
@ -71,7 +93,8 @@ Computing the sum of squares over all stock tick changes:
|
|||
"daytime_return" : {
|
||||
"sum" : {
|
||||
"field" : "change",
|
||||
"script" : "_value * _value" }
|
||||
"script" : "_value * _value"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -48,4 +48,26 @@ Counting the values generated by a script:
|
|||
}
|
||||
--------------------------------------------------
|
||||
|
||||
TIP: The `script` parameter expects an inline script. Use `script_id` for indexed scripts and `script_file` for scripts in the `config/scripts/` directory.
|
||||
This will interpret the `script` parameter as an `inline` script with the default script language and no script parameters. To use a file script use the following syntax:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
...,
|
||||
|
||||
"aggs" : {
|
||||
"grades_count" : {
|
||||
"value_count" : {
|
||||
"script" : {
|
||||
"file": "my_script",
|
||||
"params" : {
|
||||
"field" : "grade"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
TIP: for indexed scripts replace the `file` parameter with an `id` parameter.
|
||||
|
|
|
@ -180,11 +180,11 @@ The default value of `alpha` is `0.5`, and the setting accepts any float from 0-
|
|||
|
||||
|
||||
[[single_0.2alpha]]
|
||||
.Single Exponential moving average with window of size 10, alpha = 0.2
|
||||
.EWMA with window of size 10, alpha = 0.2
|
||||
image::images/pipeline_movavg/single_0.2alpha.png[]
|
||||
|
||||
[[single_0.7alpha]]
|
||||
.Single Exponential moving average with window of size 10, alpha = 0.7
|
||||
.EWMA with window of size 10, alpha = 0.7
|
||||
image::images/pipeline_movavg/single_0.7alpha.png[]
|
||||
|
||||
==== Holt-Linear
|
||||
|
@ -223,13 +223,111 @@ to see. Small values emphasize long-term trends (such as a constant linear tren
|
|||
values emphasize short-term trends. This will become more apparently when you are predicting values.
|
||||
|
||||
[[double_0.2beta]]
|
||||
.Double Exponential moving average with window of size 100, alpha = 0.5, beta = 0.2
|
||||
.Holt-Linear moving average with window of size 100, alpha = 0.5, beta = 0.2
|
||||
image::images/pipeline_movavg/double_0.2beta.png[]
|
||||
|
||||
[[double_0.7beta]]
|
||||
.Double Exponential moving average with window of size 100, alpha = 0.5, beta = 0.7
|
||||
.Holt-Linear moving average with window of size 100, alpha = 0.5, beta = 0.7
|
||||
image::images/pipeline_movavg/double_0.7beta.png[]
|
||||
|
||||
==== Holt-Winters
|
||||
|
||||
The `holt_winters` model (aka "triple exponential") incorporates a third exponential term which
|
||||
tracks the seasonal aspect of your data. This aggregation therefore smooths based on three components: "level", "trend"
|
||||
and "seasonality".
|
||||
|
||||
The level and trend calculation is identical to `holt` The seasonal calculation looks at the difference between
|
||||
the current point, and the point one period earlier.
|
||||
|
||||
Holt-Winters requires a little more handholding than the other moving averages. You need to specify the "periodicity"
|
||||
of your data: e.g. if your data has cyclic trends every 7 days, you would set `period: 7`. Similarly if there was
|
||||
a monthly trend, you would set it to `30`. There is currently no periodicity detection, although that is planned
|
||||
for future enhancements.
|
||||
|
||||
There are two varieties of Holt-Winters: additive and multiplicative.
|
||||
|
||||
===== "Cold Start"
|
||||
|
||||
Unfortunately, due to the nature of Holt-Winters, it requires two periods of data to "bootstrap" the algorithm. This
|
||||
means that your `window` must always be *at least* twice the size of your period. An exception will be thrown if it
|
||||
isn't. It also means that Holt-Winters will not emit a value for the first `2 * period` buckets; the current algorithm
|
||||
does not backcast.
|
||||
|
||||
[[holt_winters_cold_start]]
|
||||
.Holt-Winters showing a "cold" start where no values are emitted
|
||||
image::images/pipeline_movavg/triple_untruncated.png[]
|
||||
|
||||
Because the "cold start" obscures what the moving average looks like, the rest of the Holt-Winters images are truncated
|
||||
to not show the "cold start". Just be aware this will always be present at the beginning of your moving averages!
|
||||
|
||||
===== Additive Holt-Winters
|
||||
|
||||
Additive seasonality is the default; it can also be specified by setting `"type": "add"`. This variety is preferred
|
||||
when the seasonal affect is additive to your data. E.g. you could simply subtract the seasonal effect to "de-seasonalize"
|
||||
your data into a flat trend.
|
||||
|
||||
The default value of `alpha`, `beta` and `gamma` is `0.5`, and the settings accept any float from 0-1 inclusive.
|
||||
The default value of `period` is `1`.
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"the_movavg":{
|
||||
"moving_avg":{
|
||||
"buckets_path": "the_sum",
|
||||
"model" : "holt_winters",
|
||||
"settings" : {
|
||||
"type" : "add",
|
||||
"alpha" : 0.5,
|
||||
"beta" : 0.5,
|
||||
"gamma" : 0.5,
|
||||
"period" : 7
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
|
||||
[[holt_winters_add]]
|
||||
.Holt-Winters moving average with window of size 120, alpha = 0.5, beta = 0.7, gamma = 0.3, period = 30
|
||||
image::images/pipeline_movavg/triple.png[]
|
||||
|
||||
===== Multiplicative Holt-Winters
|
||||
|
||||
Multiplicative is specified by setting `"type": "mult"`. This variety is preferred when the seasonal affect is
|
||||
multiplied against your data. E.g. if the seasonal affect is x5 the data, rather than simply adding to it.
|
||||
|
||||
The default value of `alpha`, `beta` and `gamma` is `0.5`, and the settings accept any float from 0-1 inclusive.
|
||||
The default value of `period` is `1`.
|
||||
|
||||
[WARNING]
|
||||
======
|
||||
Multiplicative Holt-Winters works by dividing each data point by the seasonal value. This is problematic if any of
|
||||
your data is zero, or if there are gaps in the data (since this results in a divid-by-zero). To combat this, the
|
||||
`mult` Holt-Winters pads all values by a very small amount (1*10^-10^) so that all values are non-zero. This affects
|
||||
the result, but only minimally. If your data is non-zero, or you prefer to see `NaN` when zero's are encountered,
|
||||
you can disable this behavior with `pad: false`
|
||||
======
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"the_movavg":{
|
||||
"moving_avg":{
|
||||
"buckets_path": "the_sum",
|
||||
"model" : "holt_winters",
|
||||
"settings" : {
|
||||
"type" : "mult",
|
||||
"alpha" : 0.5,
|
||||
"beta" : 0.5,
|
||||
"gamma" : 0.5,
|
||||
"period" : 7,
|
||||
"pad" : true
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
==== Prediction
|
||||
|
||||
All the moving average model support a "prediction" mode, which will attempt to extrapolate into the future given the
|
||||
|
@ -263,7 +361,7 @@ value, we can extrapolate based on local constant trends (in this case the predi
|
|||
of the series was heading in a downward direction):
|
||||
|
||||
[[double_prediction_local]]
|
||||
.Double Exponential moving average with window of size 100, predict = 20, alpha = 0.5, beta = 0.8
|
||||
.Holt-Linear moving average with window of size 100, predict = 20, alpha = 0.5, beta = 0.8
|
||||
image::images/pipeline_movavg/double_prediction_local.png[]
|
||||
|
||||
In contrast, if we choose a small `beta`, the predictions are based on the global constant trend. In this series, the
|
||||
|
@ -272,3 +370,10 @@ global trend is slightly positive, so the prediction makes a sharp u-turn and be
|
|||
[[double_prediction_global]]
|
||||
.Double Exponential moving average with window of size 100, predict = 20, alpha = 0.5, beta = 0.1
|
||||
image::images/pipeline_movavg/double_prediction_global.png[]
|
||||
|
||||
The `holt_winters` model has the potential to deliver the best predictions, since it also incorporates seasonal
|
||||
fluctuations into the model:
|
||||
|
||||
[[holt_winters_prediction_global]]
|
||||
.Holt-Winters moving average with window of size 120, predict = 25, alpha = 0.8, beta = 0.2, gamma = 0.7, period = 30
|
||||
image::images/pipeline_movavg/triple_prediction.png[]
|
||||
|
|
|
@ -5,6 +5,7 @@ An analyzer of type `custom` that allows to combine a `Tokenizer` with
|
|||
zero or more `Token Filters`, and zero or more `Char Filters`. The
|
||||
custom analyzer accepts a logical/registered name of the tokenizer to
|
||||
use, and a list of logical/registered names of token filters.
|
||||
The name of the custom analyzer must not start with "_".
|
||||
|
||||
The following are settings that can be set for a `custom` analyzer type:
|
||||
|
||||
|
|
|
@ -81,6 +81,113 @@ being consumed by a monitoring tool, rather than intended for human
|
|||
consumption. The default for the `human` flag is
|
||||
`false`.
|
||||
|
||||
[float]
|
||||
=== Response Filtering
|
||||
|
||||
All REST APIs accept a `filter_path` parameter that can be used to reduce
|
||||
the response returned by elasticsearch. This parameter takes a comma
|
||||
separated list of filters expressed with the dot notation:
|
||||
|
||||
[source,sh]
|
||||
--------------------------------------------------
|
||||
curl -XGET 'localhost:9200/_search?pretty&filter_path=took,hits.hits._id,hits.hits._score'
|
||||
{
|
||||
"took" : 3,
|
||||
"hits" : {
|
||||
"hits" : [
|
||||
{
|
||||
"_id" : "3640",
|
||||
"_score" : 1.0
|
||||
},
|
||||
{
|
||||
"_id" : "3642",
|
||||
"_score" : 1.0
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
It also supports the `*` wildcard character to match any field or part
|
||||
of a field's name:
|
||||
|
||||
[source,sh]
|
||||
--------------------------------------------------
|
||||
curl -XGET 'localhost:9200/_nodes/stats?filter_path=nodes.*.ho*'
|
||||
{
|
||||
"nodes" : {
|
||||
"lvJHed8uQQu4brS-SXKsNA" : {
|
||||
"host" : "portable"
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
And the `**` wildcard can be used to include fields without knowing the
|
||||
exact path of the field. For example, we can return the Lucene version
|
||||
of every segment with this request:
|
||||
|
||||
[source,sh]
|
||||
--------------------------------------------------
|
||||
curl 'localhost:9200/_segments?pretty&filter_path=indices.**.version'
|
||||
{
|
||||
"indices" : {
|
||||
"movies" : {
|
||||
"shards" : {
|
||||
"0" : [ {
|
||||
"segments" : {
|
||||
"_0" : {
|
||||
"version" : "5.2.0"
|
||||
}
|
||||
}
|
||||
} ],
|
||||
"2" : [ {
|
||||
"segments" : {
|
||||
"_0" : {
|
||||
"version" : "5.2.0"
|
||||
}
|
||||
}
|
||||
} ]
|
||||
}
|
||||
},
|
||||
"books" : {
|
||||
"shards" : {
|
||||
"0" : [ {
|
||||
"segments" : {
|
||||
"_0" : {
|
||||
"version" : "5.2.0"
|
||||
}
|
||||
}
|
||||
} ]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
Note that elasticsearch sometimes returns directly the raw value of a field,
|
||||
like the `_source` field. If you want to filter _source fields, you should
|
||||
consider combining the already existing `_source` parameter (see
|
||||
<<get-source-filtering,Get API>> for more details) with the `filter_path`
|
||||
parameter like this:
|
||||
|
||||
[source,sh]
|
||||
--------------------------------------------------
|
||||
curl -XGET 'localhost:9200/_search?pretty&filter_path=hits.hits._source&_source=title'
|
||||
{
|
||||
"hits" : {
|
||||
"hits" : [ {
|
||||
"_source":{"title":"Book #2"}
|
||||
}, {
|
||||
"_source":{"title":"Book #1"}
|
||||
}, {
|
||||
"_source":{"title":"Book #3"}
|
||||
} ]
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
|
||||
[float]
|
||||
=== Flat Settings
|
||||
|
||||
|
|
|
@ -66,6 +66,10 @@ only those columns to appear.
|
|||
192.168.56.30 9300 43.9 Ramsey, Doug
|
||||
--------------------------------------------------
|
||||
|
||||
You can also request multiple columns using simple wildcards like
|
||||
`/_cat/thread_pool?h=ip,bulk.*` to get all headers (or aliases) starting
|
||||
with `bulk.`.
|
||||
|
||||
[float]
|
||||
[[numeric-formats]]
|
||||
=== Numeric formats
|
||||
|
|
|
@ -187,7 +187,7 @@ the options. Curl example with update actions:
|
|||
{ "update" : {"_id" : "1", "_type" : "type1", "_index" : "index1", "_retry_on_conflict" : 3} }
|
||||
{ "doc" : {"field" : "value"} }
|
||||
{ "update" : { "_id" : "0", "_type" : "type1", "_index" : "index1", "_retry_on_conflict" : 3} }
|
||||
{ "script" : "ctx._source.counter += param1", "lang" : "js", "params" : {"param1" : 1}, "upsert" : {"counter" : 1}}
|
||||
{ "script" : { "inline": "ctx._source.counter += param1", "lang" : "js", "params" : {"param1" : 1}}, "upsert" : {"counter" : 1}}
|
||||
{ "update" : {"_id" : "2", "_type" : "type1", "_index" : "index1", "_retry_on_conflict" : 3} }
|
||||
{ "doc" : {"field" : "value"}, "doc_as_upsert" : true }
|
||||
--------------------------------------------------
|
||||
|
|
|
@ -228,5 +228,7 @@ it's current version is equal to the specified one. This behavior is the same
|
|||
for all version types with the exception of version type `FORCE` which always
|
||||
retrieves the document.
|
||||
|
||||
Note that Elasticsearch do not store older versions of documents. Only the current version can be retrieved.
|
||||
|
||||
Internally, Elasticsearch has marked the old document as deleted and added an
|
||||
entirely new document. The old version of the document doesn’t disappear
|
||||
immediately, although you won’t be able to access it. Elasticsearch cleans up
|
||||
deleted documents in the background as you continue to index more data.
|
||||
|
|
|
@ -28,9 +28,11 @@ Now, we can execute a script that would increment the counter:
|
|||
[source,js]
|
||||
--------------------------------------------------
|
||||
curl -XPOST 'localhost:9200/test/type1/1/_update' -d '{
|
||||
"script" : "ctx._source.counter += count",
|
||||
"params" : {
|
||||
"count" : 4
|
||||
"script" : {
|
||||
"inline": "ctx._source.counter += count",
|
||||
"params" : {
|
||||
"count" : 4
|
||||
}
|
||||
}
|
||||
}'
|
||||
--------------------------------------------------
|
||||
|
@ -41,9 +43,11 @@ will still add it, since its a list):
|
|||
[source,js]
|
||||
--------------------------------------------------
|
||||
curl -XPOST 'localhost:9200/test/type1/1/_update' -d '{
|
||||
"script" : "ctx._source.tags += tag",
|
||||
"params" : {
|
||||
"tag" : "blue"
|
||||
"script" : {
|
||||
"inline": "ctx._source.tags += tag",
|
||||
"params" : {
|
||||
"tag" : "blue"
|
||||
}
|
||||
}
|
||||
}'
|
||||
--------------------------------------------------
|
||||
|
@ -71,9 +75,11 @@ And, we can delete the doc if the tags contain blue, or ignore (noop):
|
|||
[source,js]
|
||||
--------------------------------------------------
|
||||
curl -XPOST 'localhost:9200/test/type1/1/_update' -d '{
|
||||
"script" : "ctx._source.tags.contains(tag) ? ctx.op = \"delete\" : ctx.op = \"none\"",
|
||||
"params" : {
|
||||
"tag" : "blue"
|
||||
"script" : {
|
||||
"inline": "ctx._source.tags.contains(tag) ? ctx.op = \"delete\" : ctx.op = \"none\"",
|
||||
"params" : {
|
||||
"tag" : "blue"
|
||||
}
|
||||
}
|
||||
}'
|
||||
--------------------------------------------------
|
||||
|
@ -136,9 +142,11 @@ index the fresh doc:
|
|||
[source,js]
|
||||
--------------------------------------------------
|
||||
curl -XPOST 'localhost:9200/test/type1/1/_update' -d '{
|
||||
"script" : "ctx._source.counter += count",
|
||||
"params" : {
|
||||
"count" : 4
|
||||
"script" : {
|
||||
"inline": "ctx._source.counter += count",
|
||||
"params" : {
|
||||
"count" : 4
|
||||
}
|
||||
},
|
||||
"upsert" : {
|
||||
"counter" : 1
|
||||
|
@ -153,13 +161,15 @@ new `scripted_upsert` parameter with the value `true`.
|
|||
[source,js]
|
||||
--------------------------------------------------
|
||||
curl -XPOST 'localhost:9200/sessions/session/dh3sgudg8gsrgl/_update' -d '{
|
||||
"script_id" : "my_web_session_summariser",
|
||||
"scripted_upsert":true,
|
||||
"params" : {
|
||||
"pageViewEvent" : {
|
||||
"url":"foo.com/bar",
|
||||
"response":404,
|
||||
"time":"2014-01-01 12:32"
|
||||
"script" : {
|
||||
"id": "my_web_session_summariser",
|
||||
"params" : {
|
||||
"pageViewEvent" : {
|
||||
"url":"foo.com/bar",
|
||||
"response":404,
|
||||
"time":"2014-01-01 12:32"
|
||||
}
|
||||
}
|
||||
},
|
||||
"upsert" : {
|
||||
|
|
|
@ -566,7 +566,7 @@ Which means that we just successfully bulk indexed 1000 documents into the bank
|
|||
|
||||
=== The Search API
|
||||
|
||||
Now let's start with some simple searches. There are two basic ways to run searches: one is by sending search parameters through the <<search-uri-request,REST request URI>> and the other by sending them through the<<search-request-body,[REST request body>>. The request body method allows you to be more expressive and also to define your searches in a more readable JSON format. We'll try one example of the request URI method but for the remainder of this tutorial, we will exclusively be using the request body method.
|
||||
Now let's start with some simple searches. There are two basic ways to run searches: one is by sending search parameters through the <<search-uri-request,REST request URI>> and the other by sending them through the <<search-request-body,REST request body>>. The request body method allows you to be more expressive and also to define your searches in a more readable JSON format. We'll try one example of the request URI method but for the remainder of this tutorial, we will exclusively be using the request body method.
|
||||
|
||||
The REST API for search is accessible from the `_search` endpoint. This example returns all documents in the bank index:
|
||||
|
||||
|
|
Binary file not shown.
After Width: | Height: | Size: 96 KiB |
Binary file not shown.
After Width: | Height: | Size: 91 KiB |
Binary file not shown.
After Width: | Height: | Size: 48 KiB |
|
@ -149,6 +149,7 @@ field data format.
|
|||
Computes and stores field data data-structures on disk at indexing time.
|
||||
|
||||
[float]
|
||||
[[global-ordinals]]
|
||||
==== Global ordinals
|
||||
|
||||
Global ordinals is a data-structure on top of field data, that maintains an
|
||||
|
@ -182,6 +183,7 @@ ordinals is a small because it is very efficiently compressed. Eager loading of
|
|||
can move the loading time from the first search request, to the refresh itself.
|
||||
|
||||
[float]
|
||||
[[fielddata-loading]]
|
||||
=== Fielddata loading
|
||||
|
||||
By default, field data is loaded lazily, ie. the first time that a query that
|
||||
|
|
|
@ -59,7 +59,6 @@ and warmers.
|
|||
* <<indices-refresh>>
|
||||
* <<indices-flush>>
|
||||
* <<indices-optimize>>
|
||||
* <<indices-seal>>
|
||||
* <<indices-upgrade>>
|
||||
|
||||
--
|
||||
|
@ -108,8 +107,6 @@ include::indices/refresh.asciidoc[]
|
|||
|
||||
include::indices/optimize.asciidoc[]
|
||||
|
||||
include::indices/seal.asciidoc[]
|
||||
|
||||
include::indices/shadow-replicas.asciidoc[]
|
||||
|
||||
include::indices/upgrade.asciidoc[]
|
||||
|
|
|
@ -10,8 +10,9 @@ trigger flush operations as required in order to clear memory.
|
|||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
$ curl -XPOST 'http://localhost:9200/twitter/_flush'
|
||||
POST /twitter/_flush
|
||||
--------------------------------------------------
|
||||
// AUTOSENSE
|
||||
|
||||
[float]
|
||||
[[flush-parameters]]
|
||||
|
@ -39,7 +40,198 @@ or even on `_all` the indices.
|
|||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
$ curl -XPOST 'http://localhost:9200/kimchy,elasticsearch/_flush'
|
||||
POST /kimchy,elasticsearch/_flush
|
||||
|
||||
$ curl -XPOST 'http://localhost:9200/_flush'
|
||||
POST /_flush
|
||||
--------------------------------------------------
|
||||
// AUTOSENSE
|
||||
|
||||
[[indices-synced-flush]]
|
||||
=== Synced Flush
|
||||
|
||||
Elasticsearch tracks the indexing activity of each shard. Shards that have not
|
||||
received any indexing operations for 5 minutes are automatically marked as inactive. This presents
|
||||
an opportunity for Elasticsearch to reduce shard resources and also perform
|
||||
a special kind of flush, called `synced flush`. A synced flush performs a normal flush, then adds
|
||||
a generated unique marker (sync_id) to all shards.
|
||||
|
||||
Since the sync id marker was added when there were no ongoing indexing operations, it can
|
||||
be used as a quick way to check if the two shards' lucene indices are identical. This quick sync id
|
||||
comparison (if present) is used during recovery or restarts to skip the first and
|
||||
most costly phase of the process. In that case, no segment files need to be copied and
|
||||
the transaction log replay phase of the recovery can start immediately. Note that since the sync id
|
||||
marker was applied together with a flush, it is very likely that the transaction log will be empty,
|
||||
speeding up recoveries even more.
|
||||
|
||||
This is particularly useful for use cases having lots of indices which are
|
||||
never or very rarely updated, such as time based data. This use case typically generates lots of indices whose
|
||||
recovery without the synced flush marker would take a long time.
|
||||
|
||||
To check whether a shard has a marker or not, look for the `commit` section of shard stats returned by
|
||||
the <<indices-stats,indices stats>> API:
|
||||
|
||||
[source,bash]
|
||||
--------------------------------------------------
|
||||
GET /twitter/_stats/commit?level=shards
|
||||
--------------------------------------------------
|
||||
// AUTOSENSE
|
||||
|
||||
|
||||
which returns something similar to:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
...
|
||||
"indices": {
|
||||
"twitter": {
|
||||
"primaries": {},
|
||||
"total": {},
|
||||
"shards": {
|
||||
"0": [
|
||||
{
|
||||
"routing": {
|
||||
...
|
||||
},
|
||||
"commit": {
|
||||
"id": "te7zF7C4UsirqvL6jp/vUg==",
|
||||
"generation": 2,
|
||||
"user_data": {
|
||||
"sync_id": "AU2VU0meX-VX2aNbEUsD" <1>,
|
||||
...
|
||||
},
|
||||
"num_docs": 0
|
||||
}
|
||||
}
|
||||
...
|
||||
],
|
||||
...
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
<1> the `sync id` marker
|
||||
|
||||
[float]
|
||||
=== Synced Flush API
|
||||
|
||||
The Synced Flush API allows an administrator to initiate a synced flush manually. This can be particularly useful for
|
||||
a planned (rolling) cluster restart where you can stop indexing and don't want to wait the default 5 minutes for
|
||||
idle indices to be sync-flushed automatically.
|
||||
|
||||
While handy, there are a couple of caveats for this API:
|
||||
|
||||
1. Synced flush is a best effort operation. Any ongoing indexing operations will cause
|
||||
the synced flush to fail on that shard. This means that some shards may be synced flushed while others aren't. See below for more.
|
||||
2. The `sync_id` marker is removed as soon as the shard is flushed again. That is because a flush replaces the low level
|
||||
lucene commit point where the marker is stored. Uncommitted operations in the transaction log do not remove the marker.
|
||||
In practice, one should consider any indexing operation on an index as removing the marker as a flush can be triggered by Elasticsearch
|
||||
at any time.
|
||||
|
||||
|
||||
NOTE: It is harmless to request a synced flush while there is ongoing indexing. Shards that are idle will succeed and shards
|
||||
that are not will fail. Any shards that succeeded will have faster recovery times.
|
||||
|
||||
|
||||
[source,bash]
|
||||
--------------------------------------------------
|
||||
POST /twitter/_flush/synced
|
||||
--------------------------------------------------
|
||||
// AUTOSENSE
|
||||
|
||||
The response contains details about how many shards were successfully sync-flushed and information about any failure.
|
||||
|
||||
Here is what it looks like when all shards of a two shards and one replica index successfully
|
||||
sync-flushed:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"_shards": {
|
||||
"total": 4,
|
||||
"successful": 4,
|
||||
"failed": 0
|
||||
},
|
||||
"twitter": {
|
||||
"total": 4,
|
||||
"successful": 4,
|
||||
"failed": 0
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
|
||||
Here is what it looks like when one shard group failed due to pending operations:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"_shards": {
|
||||
"total": 4,
|
||||
"successful": 2,
|
||||
"failed": 2
|
||||
},
|
||||
"twitter": {
|
||||
"total": 4,
|
||||
"successful": 2,
|
||||
"failed": 2,
|
||||
"failures": [
|
||||
{
|
||||
"shard": 1,
|
||||
"reason": "[2] ongoing operations on primary"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
NOTE: The above error is shown when the synced flush failes due to concurrent indexing operations. The HTTP
|
||||
status code in that case will be `409 CONFLICT`.
|
||||
|
||||
Sometimes the failures are specific to a shard copy. The copies that failed will not be eligible for
|
||||
fast recovery but those that succeeded still will be. This case is reported as follows:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"_shards": {
|
||||
"total": 4,
|
||||
"successful": 1,
|
||||
"failed": 1
|
||||
},
|
||||
"twitter": {
|
||||
"total": 4,
|
||||
"successful": 3,
|
||||
"failed": 1,
|
||||
"failures": [
|
||||
{
|
||||
"shard": 1,
|
||||
"reason": "unexpected error",
|
||||
"routing": {
|
||||
"state": "STARTED",
|
||||
"primary": false,
|
||||
"node": "SZNr2J_ORxKTLUCydGX4zA",
|
||||
"relocating_node": null,
|
||||
"shard": 1,
|
||||
"index": "twitter"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
|
||||
NOTE: When a shard copy fails to sync-flush, the HTTP status code returned will be `409 CONFLICT`.
|
||||
|
||||
The synced flush API can be applied to more than one index with a single call,
|
||||
or even on `_all` the indices.
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
POST /kimchy,elasticsearch/_flush/synced
|
||||
|
||||
POST /_flush/synced
|
||||
--------------------------------------------------
|
||||
// AUTOSENSE
|
|
@ -1,91 +0,0 @@
|
|||
[[indices-seal]]
|
||||
== Seal
|
||||
|
||||
The seal API flushes and adds a "seal" marker to the shards of one or more
|
||||
indices. The seal is used during recovery or restarts to skip the first and
|
||||
most costly phase of the process if all copies of the shard have the same seal.
|
||||
No segment files need to be copied and the transaction log replay phase of the
|
||||
recovery can start immediately which makes recovery much faster.
|
||||
|
||||
There are two important points about seals:
|
||||
1. They are best effort in that if there are any outstanding write operations
|
||||
while the seal operation is being performed then the shards which those writes
|
||||
target won't be sealed but all others will be. See below for more.
|
||||
2. The seal breaks as soon as the shard issues a new lucene commit. Uncommitted
|
||||
operations in the transaction log do not break the seal. That is because a seal
|
||||
marks a point in time snapshot of the segments, a low level lucene commit.
|
||||
Practically that means that every write operation on the index will remove the
|
||||
seal.
|
||||
|
||||
[source,bash]
|
||||
--------------------------------------------------
|
||||
$ curl -XPOST 'http://localhost:9200/twitter/_seal'
|
||||
--------------------------------------------------
|
||||
|
||||
The response contains details about which shards wrote the seal and the reason
|
||||
in case they failed to write the seal.
|
||||
|
||||
Here is what it looks like when all copies single shard index successfully
|
||||
wrote the seal:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"twitter": [
|
||||
{
|
||||
"shard_id": 0,
|
||||
"responses": {
|
||||
"5wjOIntuRqy9F_7JRrrLwA": "success",
|
||||
"M2iCBe-nS5yaInE8volfSg": "success"
|
||||
},
|
||||
"message": "success"
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
|
||||
Here is what it looks like when one copy fails:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"twitter": [
|
||||
{
|
||||
"shard_id": 0,
|
||||
"responses": {
|
||||
"M2iCBe-nS5yaInE8volfSg": "pending operations",
|
||||
"5wjOIntuRqy9F_7JRrrLwA": "success"
|
||||
},
|
||||
"message": "failed on some copies"
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
|
||||
Sometimes the failures can be shard wide and they'll look like this:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"twitter": [
|
||||
{
|
||||
"shard_id": 0,
|
||||
"message": "operation counter on primary is non zero [2]"
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
|
||||
[float]
|
||||
[[seal-multi-index]]
|
||||
=== Multi Index
|
||||
|
||||
The seal API can be applied to more than one index with a single call,
|
||||
or even on `_all` the indices.
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
curl -XPOST 'http://localhost:9200/kimchy,elasticsearch/_seal'
|
||||
|
||||
curl -XPOST 'http://localhost:9200/_seal'
|
||||
--------------------------------------------------
|
|
@ -16,6 +16,25 @@ settings, you need to enable using it in elasticsearch.yml:
|
|||
node.enable_custom_paths: true
|
||||
--------------------------------------------------
|
||||
|
||||
You will also need to disable the default security manager that Elasticsearch
|
||||
runs with. You can do this by either passing
|
||||
`-Des.security.manager.enabled=false` with the parameters while starting
|
||||
Elasticsearch, or you can disable it in elasticsearch.yml:
|
||||
|
||||
[source,yaml]
|
||||
--------------------------------------------------
|
||||
security.manager.enabled: false
|
||||
--------------------------------------------------
|
||||
|
||||
[WARNING]
|
||||
========================
|
||||
Disabling the security manager means that the Elasticsearch process is not
|
||||
limited to the directories and files that it can read and write. However,
|
||||
because the `index.data_path` setting is set when creating the index, the
|
||||
security manager would prevent writing or reading from the index's location, so
|
||||
it must be disabled.
|
||||
========================
|
||||
|
||||
You can then create an index with a custom data path, where each node will use
|
||||
this path for the data:
|
||||
|
||||
|
@ -88,6 +107,12 @@ settings API:
|
|||
Boolean value indicating this index uses a shared filesystem. Defaults to
|
||||
the `true` if `index.shadow_replicas` is set to true, `false` otherwise.
|
||||
|
||||
`index.shared_filesystem.recover_on_any_node`::
|
||||
Boolean value indicating whether the primary shards for the index should be
|
||||
allowed to recover on any node in the cluster, regardless of the number of
|
||||
replicas or whether the node has previously had the shard allocated to it
|
||||
before. Defaults to `false`.
|
||||
|
||||
=== Node level settings related to shadow replicas
|
||||
|
||||
These are non-dynamic settings that need to be configured in `elasticsearch.yml`
|
||||
|
|
|
@ -54,13 +54,26 @@ curl 'http://localhost:9200/twitter/_upgrade?pretty&human'
|
|||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"twitter": {
|
||||
"size": "21gb",
|
||||
"size_in_bytes": "21000000000",
|
||||
"size_to_upgrade": "10gb",
|
||||
"size_to_upgrade_in_bytes": "10000000000"
|
||||
"size_to_upgrade_ancient": "1gb",
|
||||
"size_to_upgrade_ancient_in_bytes": "1000000000"
|
||||
"indices": {
|
||||
"twitter": {
|
||||
"size": "21gb",
|
||||
"size_in_bytes": "21000000000",
|
||||
"size_to_upgrade": "10gb",
|
||||
"size_to_upgrade_in_bytes": "10000000000"
|
||||
"size_to_upgrade_ancient": "1gb",
|
||||
"size_to_upgrade_ancient_in_bytes": "1000000000"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
The level of details in the upgrade status command can be controlled by
|
||||
setting `level` parameter to `cluster`, `index` (default) or `shard` levels.
|
||||
For example, you can run the upgrade status command with `level=shard` to
|
||||
get detailed upgrade information of each individual shard.
|
|
@ -198,6 +198,11 @@ year.
|
|||
|
||||
|`year_month_day`|A formatter for a four digit year, two digit month of
|
||||
year, and two digit day of month.
|
||||
|
||||
|`epoch_second`|A formatter for the number of seconds since the epoch.
|
||||
|
||||
|`epoch_millis`|A formatter for the number of milliseconds since
|
||||
the epoch.
|
||||
|=======================================================================
|
||||
|
||||
[float]
|
||||
|
|
|
@ -1,6 +1,9 @@
|
|||
[[mapping-parent-field]]
|
||||
=== `_parent`
|
||||
|
||||
TIP: It is highly recommend to reindex all indices with `_parent` field created before version 2.x.
|
||||
The reason for this is to gain from all the optimizations added with the 2.0 release.
|
||||
|
||||
The parent field mapping is defined on a child mapping, and points to
|
||||
the parent type this child relates to. For example, in case of a `blog`
|
||||
type and a `blog_tag` type child document, the mapping for `blog_tag`
|
||||
|
@ -20,8 +23,34 @@ should be:
|
|||
The mapping is automatically stored and indexed (meaning it can be
|
||||
searched on using the `_parent` field notation).
|
||||
|
||||
==== Field data loading
|
||||
==== Limitations
|
||||
|
||||
Contrary to other fields the fielddata loading is not `lazy`, but `eager`. The reason for this is that when this
|
||||
field has been enabled it is going to be used in parent/child queries, which heavily relies on field data to perform
|
||||
efficiently. This can already be observed during indexing after refresh either automatically or manually has been executed.
|
||||
The `_parent.type` setting can only point to a type that doesn't exist yet.
|
||||
This means that a type can't become a parent type after is has been created.
|
||||
|
||||
The `parent.type` setting can't point to itself. This means self referential
|
||||
parent/child isn't supported.
|
||||
|
||||
Parent/child queries (`has_child` & `has_parent`) can't be used in index aliases.
|
||||
|
||||
==== Global ordinals
|
||||
|
||||
Parent-child uses <<global-ordinals,global ordinals>> to speed up joins and global ordinals need to be rebuilt after any change to a shard.
|
||||
The more parent id values are stored in a shard, the longer it takes to rebuild global ordinals for the `_parent` field.
|
||||
|
||||
Global ordinals, by default, are built lazily: the first parent-child query or aggregation after a refresh will trigger building of global ordinals.
|
||||
This can introduce a significant latency spike for your users. You can use <<fielddata-loading,eager_global_ordinals>> to shift the cost of building global ordinals
|
||||
from query time to refresh time, by mapping the _parent field as follows:
|
||||
|
||||
==== Memory usage
|
||||
|
||||
The only on heap memory used by parent/child is the global ordinals for the `_parent` field.
|
||||
|
||||
How much memory is used for the global ordianls for the `_parent` field in the fielddata cache
|
||||
can be checked via the <<indices-stats,indices stats>> or <<cluster-nodes-stats,nodes stats>>
|
||||
APIS, eg:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
curl -XGET "http://localhost:9200/_stats/fielddata?pretty&human&fielddata_fields=_parent"
|
||||
--------------------------------------------------
|
||||
|
|
|
@ -79,7 +79,7 @@ format>> used to parse the provided timestamp value. For example:
|
|||
}
|
||||
--------------------------------------------------
|
||||
|
||||
Note, the default format is `dateOptionalTime`. The timestamp value will
|
||||
Note, the default format is `epoch_millis||dateOptionalTime`. The timestamp value will
|
||||
first be parsed as a number and if it fails the format will be tried.
|
||||
|
||||
[float]
|
||||
|
|
|
@ -10,11 +10,13 @@ field. Example:
|
|||
{
|
||||
"example" : {
|
||||
"transform" : {
|
||||
"script" : "if (ctx._source['title']?.startsWith('t')) ctx._source['suggest'] = ctx._source['content']",
|
||||
"params" : {
|
||||
"variable" : "not used but an example anyway"
|
||||
},
|
||||
"lang": "groovy"
|
||||
"script" : {
|
||||
"inline": "if (ctx._source['title']?.startsWith('t')) ctx._source['suggest'] = ctx._source['content']",
|
||||
"params" : {
|
||||
"variable" : "not used but an example anyway"
|
||||
},
|
||||
"lang": "groovy"
|
||||
}
|
||||
},
|
||||
"properties": {
|
||||
"title": { "type": "string" },
|
||||
|
|
|
@ -349,7 +349,7 @@ date type:
|
|||
Defaults to the property/field name.
|
||||
|
||||
|`format` |The <<mapping-date-format,date
|
||||
format>>. Defaults to `dateOptionalTime`.
|
||||
format>>. Defaults to `epoch_millis||dateOptionalTime`.
|
||||
|
||||
|`store` |Set to `true` to store actual field in the index, `false` to not
|
||||
store it. Defaults to `false` (note, the JSON document itself is stored,
|
||||
|
|
|
@ -42,8 +42,8 @@ and will use the matching format as its format attribute. The date
|
|||
format itself is explained
|
||||
<<mapping-date-format,here>>.
|
||||
|
||||
The default formats are: `dateOptionalTime` (ISO) and
|
||||
`yyyy/MM/dd HH:mm:ss Z||yyyy/MM/dd Z`.
|
||||
The default formats are: `dateOptionalTime` (ISO),
|
||||
`yyyy/MM/dd HH:mm:ss Z||yyyy/MM/dd Z` and `epoch_millis`.
|
||||
|
||||
*Note:* `dynamic_date_formats` are used *only* for dynamically added
|
||||
date fields, not for `date` fields that you specify in your mapping.
|
||||
|
|
|
@ -4,6 +4,11 @@
|
|||
This section discusses the changes that you need to be aware of when migrating
|
||||
your application to Elasticsearch 2.0.
|
||||
|
||||
=== Networking
|
||||
|
||||
Elasticsearch now binds to the loopback interface by default (usually 127.0.0.1
|
||||
or ::1), the setting `network.host` can be specified to change this behavior.
|
||||
|
||||
=== Indices API
|
||||
|
||||
The <<alias-retrieving, get alias api>> will, by default produce an error response
|
||||
|
@ -404,6 +409,12 @@ The `count` search type has been deprecated. All benefits from this search type
|
|||
now be achieved by using the `query_then_fetch` search type (which is the
|
||||
default) and setting `size` to `0`.
|
||||
|
||||
=== The count api internally uses the search api
|
||||
|
||||
The count api is now a shortcut to the search api with `size` set to 0. As a
|
||||
result, a total failure will result in an exception being returned rather
|
||||
than a normal response with `count` set to `0` and shard failures.
|
||||
|
||||
=== JSONP support
|
||||
|
||||
JSONP callback support has now been removed. CORS should be used to access Elasticsearch
|
||||
|
@ -620,3 +631,19 @@ anymore, it will only highlight fields that were queried.
|
|||
The `match` query with type set to `match_phrase_prefix` is not supported by the
|
||||
postings highlighter. No highlighted snippets will be returned.
|
||||
|
||||
[float]
|
||||
=== Parent/child
|
||||
|
||||
Parent/child has been rewritten completely to reduce memory usage and to execute
|
||||
`has_child` and `has_parent` queries faster and more efficient. The `_parent` field
|
||||
uses doc values by default. The refactored and improved implementation is only active
|
||||
for indices created on or after version 2.0.
|
||||
|
||||
In order to benefit for all performance and memory improvements we recommend to reindex all
|
||||
indices that have the `_parent` field created before was upgraded to 2.0.
|
||||
|
||||
The following breaks in backwards compatability have been made on indices with the `_parent` field
|
||||
created on or after clusters with version 2.0:
|
||||
* The `type` option on the `_parent` field can only point to a parent type that doesn't exist yet,
|
||||
so this means that an existing type/mapping can no longer become a parent type.
|
||||
* The `has_child` and `has_parent` queries can no longer be use in alias filters.
|
||||
|
|
|
@ -8,15 +8,14 @@ configuration, for example, the
|
|||
network settings allows to set common settings that will be shared among
|
||||
all network based modules (unless explicitly overridden in each module).
|
||||
|
||||
The `network.bind_host` setting allows to control the host different
|
||||
network components will bind on. By default, the bind host will be
|
||||
`anyLocalAddress` (typically `0.0.0.0` or `::0`).
|
||||
The `network.bind_host` setting allows to control the host different network
|
||||
components will bind on. By default, the bind host will be `anyLoopbackAddress`
|
||||
(typically `127.0.0.1` or `::1`).
|
||||
|
||||
The `network.publish_host` setting allows to control the host the node
|
||||
will publish itself within the cluster so other nodes will be able to
|
||||
connect to it. Of course, this can't be the `anyLocalAddress`, and by
|
||||
default, it will be the first non loopback address (if possible), or the
|
||||
local address.
|
||||
The `network.publish_host` setting allows to control the host the node will
|
||||
publish itself within the cluster so other nodes will be able to connect to it.
|
||||
Of course, this can't be the `anyLocalAddress`, and by default, it will be the
|
||||
first loopback address (if possible), or the local address.
|
||||
|
||||
The `network.host` setting is a simple setting to automatically set both
|
||||
`network.bind_host` and `network.publish_host` to the same host value.
|
||||
|
|
|
@ -293,6 +293,7 @@ deprecated[1.5.0,Rivers have been deprecated. See https://www.elastic.co/blog/d
|
|||
* https://github.com/karmi/elasticsearch-paramedic[Paramedic Plugin] (by Karel Minařík)
|
||||
* https://github.com/polyfractal/elasticsearch-segmentspy[SegmentSpy Plugin] (by Zachary Tong)
|
||||
* https://github.com/xyu/elasticsearch-whatson[Whatson Plugin] (by Xiao Yu)
|
||||
* https://github.com/lmenezes/elasticsearch-kopf[Kopf Plugin] (by lmenezes)
|
||||
|
||||
[float]
|
||||
[[repository-plugins]]
|
||||
|
|
|
@ -29,7 +29,7 @@ GET /_search
|
|||
{
|
||||
"script_fields": {
|
||||
"my_field": {
|
||||
"script": "1 + my_var",
|
||||
"inline": "1 + my_var",
|
||||
"params": {
|
||||
"my_var": 2
|
||||
}
|
||||
|
@ -38,7 +38,7 @@ GET /_search
|
|||
}
|
||||
-----------------------------------
|
||||
|
||||
Save the contents of the script as a file called `config/scripts/my_script.groovy`
|
||||
Save the contents of the `inline` field as a file called `config/scripts/my_script.groovy`
|
||||
on every data node in the cluster:
|
||||
|
||||
[source,js]
|
||||
|
@ -54,7 +54,7 @@ GET /_search
|
|||
{
|
||||
"script_fields": {
|
||||
"my_field": {
|
||||
"script_file": "my_script",
|
||||
"file": "my_script",
|
||||
"params": {
|
||||
"my_var": 2
|
||||
}
|
||||
|
@ -67,9 +67,9 @@ GET /_search
|
|||
|
||||
|
||||
Additional `lang` plugins are provided to allow to execute scripts in
|
||||
different languages. All places where a `script` parameter can be used, a `lang` parameter
|
||||
(on the same level) can be provided to define the language of the
|
||||
script. The following are the supported scripting languages:
|
||||
different languages. All places where a script can be used, a `lang` parameter
|
||||
can be provided to define the language of the script. The following are the
|
||||
supported scripting languages:
|
||||
|
||||
[cols="<,<,<",options="header",]
|
||||
|=======================================================================
|
||||
|
@ -120,7 +120,7 @@ curl -XPOST localhost:9200/_search -d '{
|
|||
{
|
||||
"script_score": {
|
||||
"lang": "groovy",
|
||||
"script_file": "calculate-score",
|
||||
"file": "calculate-score",
|
||||
"params": {
|
||||
"my_modifier": 8
|
||||
}
|
||||
|
@ -162,8 +162,8 @@ curl -XPOST localhost:9200/_scripts/groovy/indexedCalculateScore -d '{
|
|||
This will create a document with id: `indexedCalculateScore` and type: `groovy` in the
|
||||
`.scripts` index. The type of the document is the language used by the script.
|
||||
|
||||
This script can be accessed at query time by appending `_id` to
|
||||
the script parameter and passing the script id. So `script` becomes `script_id`.:
|
||||
This script can be accessed at query time by using the `id` script parameter and passing
|
||||
the script id:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
|
@ -178,7 +178,7 @@ curl -XPOST localhost:9200/_search -d '{
|
|||
"functions": [
|
||||
{
|
||||
"script_score": {
|
||||
"script_id": "indexedCalculateScore",
|
||||
"id": "indexedCalculateScore",
|
||||
"lang" : "groovy",
|
||||
"params": {
|
||||
"my_modifier": 8
|
||||
|
|
|
@ -3,32 +3,48 @@
|
|||
|
||||
[partintro]
|
||||
--
|
||||
*elasticsearch* provides a full Query DSL based on JSON to define
|
||||
queries. In general, there are basic queries such as
|
||||
<<query-dsl-term-query,term>> or
|
||||
<<query-dsl-prefix-query,prefix>>. There are
|
||||
also compound queries like the
|
||||
<<query-dsl-bool-query,bool>> query.
|
||||
|
||||
While queries have scoring capabilities, in some contexts they will
|
||||
only be used to filter the result set, such as in the
|
||||
<<query-dsl-filtered-query,filtered>> or
|
||||
<<query-dsl-constant-score-query,constant_score>>
|
||||
queries.
|
||||
Elasticsearch provides a full Query DSL based on JSON to define queries.
|
||||
Think of the Query DSL as an AST of queries, consisting of two types of
|
||||
clauses:
|
||||
|
||||
Think of the Query DSL as an AST of queries.
|
||||
Some queries can be used by themselves like the
|
||||
<<query-dsl-term-query,term>> query but other queries can contain
|
||||
queries (like the <<query-dsl-bool-query,bool>> query), and each
|
||||
of these composite queries can contain *any* query of the list of
|
||||
queries, resulting in the ability to build quite
|
||||
complex (and interesting) queries.
|
||||
Leaf query clauses::
|
||||
|
||||
Queries can be used in different APIs. For example,
|
||||
within a <<search-request-query,search query>>, or
|
||||
as an <<search-aggregations-bucket-filter-aggregation,aggregation filter>>.
|
||||
This section explains the queries that can form the AST one can use.
|
||||
Leaf query clauses look for a particular value in a particular field, such as the
|
||||
<<query-dsl-match-query,`match`>>, <<query-dsl-term-query,`term`>> or
|
||||
<<query-dsl-range-query,`range`>> queries. These queries can be used
|
||||
by themselves.
|
||||
|
||||
Compound query clauses::
|
||||
|
||||
Compound query clauses wrap other leaf *or* compound queries and are used to combine
|
||||
multiple queries in a logical fashion (such as the
|
||||
<<query-dsl-bool-query,`bool`>> or <<query-dsl-dis-max-query,`dis_max`>> query),
|
||||
or to alter their behaviour (such as the <<query-dsl-not-query,`not`>> or
|
||||
<<query-dsl-constant-score-query,`constant_score`>> query).
|
||||
|
||||
Query clauses behave differently depending on whether they are used in
|
||||
<<query-filter-context,query context or filter context>>.
|
||||
--
|
||||
|
||||
include::query-dsl/index.asciidoc[]
|
||||
include::query-dsl/query_filter_context.asciidoc[]
|
||||
|
||||
include::query-dsl/match-all-query.asciidoc[]
|
||||
|
||||
include::query-dsl/full-text-queries.asciidoc[]
|
||||
|
||||
include::query-dsl/term-level-queries.asciidoc[]
|
||||
|
||||
include::query-dsl/compound-queries.asciidoc[]
|
||||
|
||||
include::query-dsl/joining-queries.asciidoc[]
|
||||
|
||||
include::query-dsl/geo-queries.asciidoc[]
|
||||
|
||||
include::query-dsl/special-queries.asciidoc[]
|
||||
|
||||
include::query-dsl/span-queries.asciidoc[]
|
||||
|
||||
include::query-dsl/minimum-should-match.asciidoc[]
|
||||
|
||||
include::query-dsl/multi-term-rewrite.asciidoc[]
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
[[query-dsl-and-query]]
|
||||
== And Query
|
||||
=== And Query
|
||||
|
||||
deprecated[2.0.0, Use the `bool` query instead]
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
[[query-dsl-bool-query]]
|
||||
== Bool Query
|
||||
=== Bool Query
|
||||
|
||||
A query that matches documents matching boolean combinations of other
|
||||
queries. The bool query maps to Lucene `BooleanQuery`. It is built using
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
[[query-dsl-boosting-query]]
|
||||
== Boosting Query
|
||||
=== Boosting Query
|
||||
|
||||
The `boosting` query can be used to effectively demote results that
|
||||
match a given query. Unlike the "NOT" clause in bool query, this still
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
[[query-dsl-common-terms-query]]
|
||||
== Common Terms Query
|
||||
=== Common Terms Query
|
||||
|
||||
The `common` terms query is a modern alternative to stopwords which
|
||||
improves the precision and recall of search results (by taking stopwords
|
||||
into account), without sacrificing performance.
|
||||
|
||||
[float]
|
||||
=== The problem
|
||||
==== The problem
|
||||
|
||||
Every term in a query has a cost. A search for `"The brown fox"`
|
||||
requires three term queries, one for each of `"the"`, `"brown"` and
|
||||
|
@ -25,7 +25,7 @@ and `"not happy"`) and we lose recall (eg text like `"The The"` or
|
|||
`"To be or not to be"` would simply not exist in the index).
|
||||
|
||||
[float]
|
||||
=== The solution
|
||||
==== The solution
|
||||
|
||||
The `common` terms query divides the query terms into two groups: more
|
||||
important (ie _low frequency_ terms) and less important (ie _high
|
||||
|
@ -63,7 +63,7 @@ site, common terms like `"clip"` or `"video"` will automatically behave
|
|||
as stopwords without the need to maintain a manual list.
|
||||
|
||||
[float]
|
||||
=== Examples
|
||||
==== Examples
|
||||
|
||||
In this example, words that have a document frequency greater than 0.1%
|
||||
(eg `"this"` and `"is"`) will be treated as _common terms_.
|
||||
|
|
|
@ -0,0 +1,69 @@
|
|||
[[compound-queries]]
|
||||
== Compound queries
|
||||
|
||||
Compound queries wrap other compound or leaf queries, either to combine their
|
||||
results and scores, to change their behaviour, or to switch from query to
|
||||
filter context.
|
||||
|
||||
The queries in this group are:
|
||||
|
||||
<<query-dsl-constant-score-query,`constant_score` query>>::
|
||||
|
||||
A query which wraps another query, but executes it in filter context. All
|
||||
matching documents are given the same ``constant'' `_score`.
|
||||
|
||||
<<query-dsl-bool-query,`bool` query>>::
|
||||
|
||||
The default query for combining multiple leaf or compound query clauses, as
|
||||
`must`, `should`, `must_not`, or `filter` clauses. The `must` and `should`
|
||||
clauses have their scores combined -- the more matching clauses, the better --
|
||||
while the `must_not` and `filter` clauses are executed in filter context.
|
||||
|
||||
<<query-dsl-dis-max-query,`dis_max` query>>::
|
||||
|
||||
A query which accepts multiple queries, and returns any documents which match
|
||||
any of the query clauses. While the `bool` query combines the scores from all
|
||||
matching queries, the `dis_max` query uses the score of the single best-
|
||||
matching query clause.
|
||||
|
||||
<<query-dsl-function-score-query,`function_score` query>>::
|
||||
|
||||
Modify the scores returned by the main query with functions to take into
|
||||
account factors like popularity, recency, distance, or custom algorithms
|
||||
implemented with scripting.
|
||||
|
||||
<<query-dsl-boosting-query,`boosting` query>>::
|
||||
|
||||
Return documents which match a `positive` query, but reduce the score of
|
||||
documents which also match a `negative` query.
|
||||
|
||||
<<query-dsl-indices-query,`indices` query>>::
|
||||
|
||||
Execute one query for the specified indices, and another for other indices.
|
||||
|
||||
<<query-dsl-and-query,`and`>>, <<query-dsl-or-query,`or`>>, <<query-dsl-not-query,`not`>>::
|
||||
|
||||
Synonyms for the `bool` query.
|
||||
|
||||
<<query-dsl-filtered-query,`filtered` query>>::
|
||||
|
||||
Combine a query clause in query context with another in filter context. deprecated[2.0.0,Use the `bool` query instead]
|
||||
|
||||
<<query-dsl-limit-query,`limit` query>>::
|
||||
|
||||
Limits the number of documents examined per shard. deprecated[1.6.0]
|
||||
|
||||
|
||||
include::constant-score-query.asciidoc[]
|
||||
include::bool-query.asciidoc[]
|
||||
include::dis-max-query.asciidoc[]
|
||||
include::function-score-query.asciidoc[]
|
||||
include::boosting-query.asciidoc[]
|
||||
include::indices-query.asciidoc[]
|
||||
include::and-query.asciidoc[]
|
||||
include::not-query.asciidoc[]
|
||||
include::or-query.asciidoc[]
|
||||
include::filtered-query.asciidoc[]
|
||||
include::limit-query.asciidoc[]
|
||||
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
[[query-dsl-constant-score-query]]
|
||||
== Constant Score Query
|
||||
=== Constant Score Query
|
||||
|
||||
A query that wraps another query and simply returns a
|
||||
constant score equal to the query boost for every document in the
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
[[query-dsl-dis-max-query]]
|
||||
== Dis Max Query
|
||||
=== Dis Max Query
|
||||
|
||||
A query that generates the union of documents produced by its
|
||||
subqueries, and that scores each document with the maximum score for
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
[[query-dsl-exists-query]]
|
||||
== Exists Query
|
||||
=== Exists Query
|
||||
|
||||
Returns documents that have at least one non-`null` value in the original field:
|
||||
|
||||
|
@ -42,7 +42,7 @@ These documents would *not* match the above query:
|
|||
<3> The `user` field is missing completely.
|
||||
|
||||
[float]
|
||||
==== `null_value` mapping
|
||||
===== `null_value` mapping
|
||||
|
||||
If the field mapping includes the `null_value` setting (see <<mapping-core-types>>)
|
||||
then explicit `null` values are replaced with the specified `null_value`. For
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
[[query-dsl-filtered-query]]
|
||||
== Filtered Query
|
||||
=== Filtered Query
|
||||
|
||||
deprecated[2.0.0, Use the `bool` query instead with a `must` clause for the query and a `filter` clause for the filter]
|
||||
|
||||
|
@ -47,7 +47,7 @@ curl -XGET localhost:9200/_search -d '
|
|||
<1> The `filtered` query is passed as the value of the `query`
|
||||
parameter in the search request.
|
||||
|
||||
=== Filtering without a query
|
||||
==== Filtering without a query
|
||||
|
||||
If a `query` is not specified, it defaults to the
|
||||
<<query-dsl-match-all-query,`match_all` query>>. This means that the
|
||||
|
@ -71,7 +71,7 @@ curl -XGET localhost:9200/_search -d '
|
|||
<1> No `query` has been specified, so this request applies just the filter,
|
||||
returning all documents created since yesterday.
|
||||
|
||||
==== Multiple filters
|
||||
===== Multiple filters
|
||||
|
||||
Multiple filters can be applied by wrapping them in a
|
||||
<<query-dsl-bool-query,`bool` query>>, for example:
|
||||
|
@ -95,7 +95,7 @@ Multiple filters can be applied by wrapping them in a
|
|||
}
|
||||
--------------------------------------------------
|
||||
|
||||
==== Filter strategy
|
||||
===== Filter strategy
|
||||
|
||||
You can control how the filter and query are executed with the `strategy`
|
||||
parameter:
|
||||
|
|
|
@ -0,0 +1,44 @@
|
|||
[[full-text-queries]]
|
||||
== Full text queries
|
||||
|
||||
The high-level full text queries are usually used for running full text
|
||||
queries on full text fields like the body of an email. They understand how the
|
||||
field being queried is <<analysis,analyzed>> and will apply each field's
|
||||
`analyzer` (or `search_analyzer`) to the query string before executing.
|
||||
|
||||
The queries in this group are:
|
||||
|
||||
<<query-dsl-match-query,`match` query>>::
|
||||
|
||||
The standard query for performing full text queries, including fuzzy matching
|
||||
and phrase or proximity queries.
|
||||
|
||||
<<query-dsl-multi-match-query,`multi_match` query>>::
|
||||
|
||||
The multi-field version of the `match` query.
|
||||
|
||||
<<query-dsl-common-terms-query,`common_terms` query>>::
|
||||
|
||||
A more specialized query which gives more preference to uncommon words.
|
||||
|
||||
<<query-dsl-query-string-query,`query_string` query>>::
|
||||
|
||||
Supports the compact Lucene <<query-string-syntax,query string syntax>>,
|
||||
allowing you to specify AND|OR|NOT conditions and multi-field search
|
||||
within a single query string. For expert users only.
|
||||
|
||||
<<query-dsl-simple-query-string-query,`simple_query_string`>>::
|
||||
|
||||
A simpler, more robust version of the `query_string` syntax suitable
|
||||
for exposing directly to users.
|
||||
|
||||
include::match-query.asciidoc[]
|
||||
|
||||
include::multi-match-query.asciidoc[]
|
||||
|
||||
include::common-terms-query.asciidoc[]
|
||||
|
||||
include::query-string-query.asciidoc[]
|
||||
|
||||
include::simple-query-string-query.asciidoc[]
|
||||
|
|
@ -1,15 +1,13 @@
|
|||
[[query-dsl-function-score-query]]
|
||||
== Function Score Query
|
||||
=== Function Score Query
|
||||
|
||||
The `function_score` allows you to modify the score of documents that are
|
||||
retrieved by a query. This can be useful if, for example, a score
|
||||
function is computationally expensive and it is sufficient to compute
|
||||
the score on a filtered set of documents.
|
||||
|
||||
=== Using function score
|
||||
|
||||
To use `function_score`, the user has to define a query and one or
|
||||
several functions, that compute a new score for each document returned
|
||||
more functions, that compute a new score for each document returned
|
||||
by the query.
|
||||
|
||||
`function_score` can be used with only one function like this:
|
||||
|
@ -91,11 +89,9 @@ query. The parameter `boost_mode` defines how:
|
|||
By default, modifying the score does not change which documents match. To exclude
|
||||
documents that do not meet a certain score threshold the `min_score` parameter can be set to the desired score threshold.
|
||||
|
||||
==== Score functions
|
||||
|
||||
The `function_score` query provides several types of score functions.
|
||||
|
||||
===== Script score
|
||||
==== Script score
|
||||
|
||||
The `script_score` function allows you to wrap another query and customize
|
||||
the scoring of it optionally with a computation derived from other numeric
|
||||
|
@ -120,12 +116,14 @@ script, and provide parameters to it:
|
|||
[source,js]
|
||||
--------------------------------------------------
|
||||
"script_score": {
|
||||
"lang": "lang",
|
||||
"params": {
|
||||
"param1": value1,
|
||||
"param2": value2
|
||||
},
|
||||
"script": "_score * doc['my_numeric_field'].value / pow(param1, param2)"
|
||||
"script": {
|
||||
"lang": "lang",
|
||||
"params": {
|
||||
"param1": value1,
|
||||
"param2": value2
|
||||
},
|
||||
"inline": "_score * doc['my_numeric_field'].value / pow(param1, param2)"
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
|
@ -133,7 +131,7 @@ Note that unlike the `custom_score` query, the
|
|||
score of the query is multiplied with the result of the script scoring. If
|
||||
you wish to inhibit this, set `"boost_mode": "replace"`
|
||||
|
||||
===== Weight
|
||||
==== Weight
|
||||
|
||||
The `weight` score allows you to multiply the score by the provided
|
||||
`weight`. This can sometimes be desired since boost value set on
|
||||
|
@ -145,7 +143,7 @@ not.
|
|||
"weight" : number
|
||||
--------------------------------------------------
|
||||
|
||||
===== Random
|
||||
==== Random
|
||||
|
||||
The `random_score` generates scores using a hash of the `_uid` field,
|
||||
with a `seed` for variation. If `seed` is not specified, the current
|
||||
|
@ -161,7 +159,7 @@ be a memory intensive operation since the values are unique.
|
|||
}
|
||||
--------------------------------------------------
|
||||
|
||||
===== Field Value factor
|
||||
==== Field Value factor
|
||||
|
||||
The `field_value_factor` function allows you to use a field from a document to
|
||||
influence the score. It's similar to using the `script_score` function, however,
|
||||
|
@ -205,7 +203,7 @@ is an illegal operation, and an exception will be thrown. Be sure to limit the
|
|||
values of the field with a range filter to avoid this, or use `log1p` and
|
||||
`ln1p`.
|
||||
|
||||
===== Decay functions
|
||||
==== Decay functions
|
||||
|
||||
Decay functions score a document with a function that decays depending
|
||||
on the distance of a numeric field value of the document from a user
|
||||
|
@ -358,7 +356,7 @@ Example:
|
|||
|
||||
|
||||
|
||||
==== Detailed example
|
||||
===== Detailed example
|
||||
|
||||
Suppose you are searching for a hotel in a certain town. Your budget is
|
||||
limited. Also, you would like the hotel to be close to the town center,
|
||||
|
@ -478,7 +476,7 @@ image::https://f.cloud.github.com/assets/4320215/768161/082975c0-e899-11e2-86f7-
|
|||
|
||||
image::https://f.cloud.github.com/assets/4320215/768162/0b606884-e899-11e2-907b-aefc77eefef6.png[width="700px"]
|
||||
|
||||
===== Linear' decay, keyword `linear`
|
||||
===== Linear decay, keyword `linear`
|
||||
|
||||
When choosing `linear` as the decay function in the above example, the
|
||||
contour and surface plot of the multiplier looks like this:
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
[[query-dsl-fuzzy-query]]
|
||||
== Fuzzy Query
|
||||
=== Fuzzy Query
|
||||
|
||||
The fuzzy query uses similarity based on Levenshtein edit distance for
|
||||
`string` fields, and a `+/-` margin on numeric and date fields.
|
||||
|
||||
=== String fields
|
||||
==== String fields
|
||||
|
||||
The `fuzzy` query generates all possible matching terms that are within the
|
||||
maximum edit distance specified in `fuzziness` and then checks the term
|
||||
|
@ -38,7 +38,7 @@ Or with more advanced settings:
|
|||
--------------------------------------------------
|
||||
|
||||
[float]
|
||||
==== Parameters
|
||||
===== Parameters
|
||||
|
||||
[horizontal]
|
||||
`fuzziness`::
|
||||
|
@ -62,7 +62,7 @@ are both set to `0`. This could cause every term in the index to be examined!
|
|||
|
||||
|
||||
[float]
|
||||
=== Numeric and date fields
|
||||
==== Numeric and date fields
|
||||
|
||||
Performs a <<query-dsl-range-query>> ``around'' the value using the
|
||||
`fuzziness` value as a `+/-` range, where:
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
[[query-dsl-geo-bounding-box-query]]
|
||||
== Geo Bounding Box Query
|
||||
=== Geo Bounding Box Query
|
||||
|
||||
A query allowing to filter hits based on a point location using a
|
||||
bounding box. Assuming the following indexed document:
|
||||
|
@ -45,13 +45,13 @@ Then the following simple query can be executed with a
|
|||
--------------------------------------------------
|
||||
|
||||
[float]
|
||||
=== Accepted Formats
|
||||
==== Accepted Formats
|
||||
|
||||
In much the same way the geo_point type can accept different
|
||||
representation of the geo point, the filter can accept it as well:
|
||||
|
||||
[float]
|
||||
==== Lat Lon As Properties
|
||||
===== Lat Lon As Properties
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
|
@ -79,7 +79,7 @@ representation of the geo point, the filter can accept it as well:
|
|||
--------------------------------------------------
|
||||
|
||||
[float]
|
||||
==== Lat Lon As Array
|
||||
===== Lat Lon As Array
|
||||
|
||||
Format in `[lon, lat]`, note, the order of lon/lat here in order to
|
||||
conform with http://geojson.org/[GeoJSON].
|
||||
|
@ -104,7 +104,7 @@ conform with http://geojson.org/[GeoJSON].
|
|||
--------------------------------------------------
|
||||
|
||||
[float]
|
||||
==== Lat Lon As String
|
||||
===== Lat Lon As String
|
||||
|
||||
Format in `lat,lon`.
|
||||
|
||||
|
@ -128,7 +128,7 @@ Format in `lat,lon`.
|
|||
--------------------------------------------------
|
||||
|
||||
[float]
|
||||
==== Geohash
|
||||
===== Geohash
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
|
@ -150,7 +150,7 @@ Format in `lat,lon`.
|
|||
--------------------------------------------------
|
||||
|
||||
[float]
|
||||
=== Vertices
|
||||
==== Vertices
|
||||
|
||||
The vertices of the bounding box can either be set by `top_left` and
|
||||
`bottom_right` or by `top_right` and `bottom_left` parameters. More
|
||||
|
@ -182,20 +182,20 @@ values separately.
|
|||
|
||||
|
||||
[float]
|
||||
=== geo_point Type
|
||||
==== geo_point Type
|
||||
|
||||
The filter *requires* the `geo_point` type to be set on the relevant
|
||||
field.
|
||||
|
||||
[float]
|
||||
=== Multi Location Per Document
|
||||
==== Multi Location Per Document
|
||||
|
||||
The filter can work with multiple locations / points per document. Once
|
||||
a single location / point matches the filter, the document will be
|
||||
included in the filter
|
||||
|
||||
[float]
|
||||
=== Type
|
||||
==== Type
|
||||
|
||||
The type of the bounding box execution by default is set to `memory`,
|
||||
which means in memory checks if the doc falls within the bounding box
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
[[query-dsl-geo-distance-query]]
|
||||
== Geo Distance Query
|
||||
=== Geo Distance Query
|
||||
|
||||
Filters documents that include only hits that exists within a specific
|
||||
distance from a geo point. Assuming the following indexed json:
|
||||
|
@ -40,13 +40,13 @@ filter:
|
|||
--------------------------------------------------
|
||||
|
||||
[float]
|
||||
=== Accepted Formats
|
||||
==== Accepted Formats
|
||||
|
||||
In much the same way the `geo_point` type can accept different
|
||||
representation of the geo point, the filter can accept it as well:
|
||||
|
||||
[float]
|
||||
==== Lat Lon As Properties
|
||||
===== Lat Lon As Properties
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
|
@ -69,7 +69,7 @@ representation of the geo point, the filter can accept it as well:
|
|||
--------------------------------------------------
|
||||
|
||||
[float]
|
||||
==== Lat Lon As Array
|
||||
===== Lat Lon As Array
|
||||
|
||||
Format in `[lon, lat]`, note, the order of lon/lat here in order to
|
||||
conform with http://geojson.org/[GeoJSON].
|
||||
|
@ -92,7 +92,7 @@ conform with http://geojson.org/[GeoJSON].
|
|||
--------------------------------------------------
|
||||
|
||||
[float]
|
||||
==== Lat Lon As String
|
||||
===== Lat Lon As String
|
||||
|
||||
Format in `lat,lon`.
|
||||
|
||||
|
@ -114,7 +114,7 @@ Format in `lat,lon`.
|
|||
--------------------------------------------------
|
||||
|
||||
[float]
|
||||
==== Geohash
|
||||
===== Geohash
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
|
@ -134,7 +134,7 @@ Format in `lat,lon`.
|
|||
--------------------------------------------------
|
||||
|
||||
[float]
|
||||
=== Options
|
||||
==== Options
|
||||
|
||||
The following are options allowed on the filter:
|
||||
|
||||
|
@ -160,13 +160,13 @@ The following are options allowed on the filter:
|
|||
|
||||
|
||||
[float]
|
||||
=== geo_point Type
|
||||
==== geo_point Type
|
||||
|
||||
The filter *requires* the `geo_point` type to be set on the relevant
|
||||
field.
|
||||
|
||||
[float]
|
||||
=== Multi Location Per Document
|
||||
==== Multi Location Per Document
|
||||
|
||||
The `geo_distance` filter can work with multiple locations / points per
|
||||
document. Once a single location / point matches the filter, the
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
[[query-dsl-geo-distance-range-query]]
|
||||
== Geo Distance Range Query
|
||||
=== Geo Distance Range Query
|
||||
|
||||
Filters documents that exists within a range from a specific point:
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
[[query-dsl-geo-polygon-query]]
|
||||
== Geo Polygon Query
|
||||
=== Geo Polygon Query
|
||||
|
||||
A query allowing to include hits that only fall within a polygon of
|
||||
points. Here is an example:
|
||||
|
@ -27,10 +27,10 @@ points. Here is an example:
|
|||
--------------------------------------------------
|
||||
|
||||
[float]
|
||||
=== Allowed Formats
|
||||
==== Allowed Formats
|
||||
|
||||
[float]
|
||||
==== Lat Long as Array
|
||||
===== Lat Long as Array
|
||||
|
||||
Format in `[lon, lat]`, note, the order of lon/lat here in order to
|
||||
conform with http://geojson.org/[GeoJSON].
|
||||
|
@ -58,7 +58,7 @@ conform with http://geojson.org/[GeoJSON].
|
|||
--------------------------------------------------
|
||||
|
||||
[float]
|
||||
==== Lat Lon as String
|
||||
===== Lat Lon as String
|
||||
|
||||
Format in `lat,lon`.
|
||||
|
||||
|
@ -85,7 +85,7 @@ Format in `lat,lon`.
|
|||
--------------------------------------------------
|
||||
|
||||
[float]
|
||||
==== Geohash
|
||||
===== Geohash
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
|
@ -110,7 +110,7 @@ Format in `lat,lon`.
|
|||
--------------------------------------------------
|
||||
|
||||
[float]
|
||||
=== geo_point Type
|
||||
==== geo_point Type
|
||||
|
||||
The filter *requires* the
|
||||
<<mapping-geo-point-type,geo_point>> type to be
|
||||
|
|
|
@ -0,0 +1,50 @@
|
|||
[[geo-queries]]
|
||||
== Geo queries
|
||||
|
||||
Elasticsearch supports two types of geo data:
|
||||
<<mapping-geo-point-type,`geo_point`>> fields which support lat/lon pairs, and
|
||||
<<mapping-geo-shape-type,`geo_shape`>> fields, which support points,
|
||||
lines, circles, polygons, multi-polygons etc.
|
||||
|
||||
The queries in this group are:
|
||||
|
||||
<<query-dsl-geo-shape-query,`geo_shape`>> query::
|
||||
|
||||
Find document with geo-shapes which either intersect, are contained by, or
|
||||
do not interesect with the specified geo-shape.
|
||||
|
||||
<<query-dsl-geo-bounding-box-query,`geo_bounding_box`>> query::
|
||||
|
||||
Finds documents with geo-points that fall into the specified rectangle.
|
||||
|
||||
<<query-dsl-geo-distance-query,`geo_distance`>> query::
|
||||
|
||||
Finds document with geo-points within the specified distance of a central
|
||||
point.
|
||||
|
||||
<<query-dsl-geo-distance-range-query,`geo_distance_range`>> query::
|
||||
|
||||
Like the `geo_point` query, but the range starts at a specified distance
|
||||
from the central point.
|
||||
|
||||
<<query-dsl-geo-polygon-query,`geo_polygon`>> query::
|
||||
|
||||
Find documents with geo-points within the specified polygon.
|
||||
|
||||
<<query-dsl-geohash-cell-query,`geohash_cell`>> query::
|
||||
|
||||
Find geo-points whose geohash intersects with the geohash of the specified
|
||||
point.
|
||||
|
||||
|
||||
include::geo-shape-query.asciidoc[]
|
||||
|
||||
include::geo-bounding-box-query.asciidoc[]
|
||||
|
||||
include::geo-distance-query.asciidoc[]
|
||||
|
||||
include::geo-distance-range-query.asciidoc[]
|
||||
|
||||
include::geo-polygon-query.asciidoc[]
|
||||
|
||||
include::geohash-cell-query.asciidoc[]
|
|
@ -1,26 +1,21 @@
|
|||
[[query-dsl-geo-shape-query]]
|
||||
== GeoShape Filter
|
||||
=== GeoShape Query
|
||||
|
||||
Filter documents indexed using the `geo_shape` type.
|
||||
|
||||
Requires the <<mapping-geo-shape-type,geo_shape
|
||||
Mapping>>.
|
||||
Requires the <<mapping-geo-shape-type,geo_shape Mapping>>.
|
||||
|
||||
The `geo_shape` query uses the same grid square representation as the
|
||||
geo_shape mapping to find documents that have a shape that intersects
|
||||
with the query shape. It will also use the same PrefixTree configuration
|
||||
as defined for the field mapping.
|
||||
|
||||
[float]
|
||||
==== Filter Format
|
||||
|
||||
The Filter supports two ways of defining the Filter shape, either by
|
||||
The query supports two ways of defining the query shape, either by
|
||||
providing a whole shape definition, or by referencing the name of a shape
|
||||
pre-indexed in another index. Both formats are defined below with
|
||||
examples.
|
||||
|
||||
[float]
|
||||
===== Provided Shape Definition
|
||||
==== Inline Shape Definition
|
||||
|
||||
Similar to the `geo_shape` type, the `geo_shape` Filter uses
|
||||
http://www.geojson.org[GeoJSON] to represent shapes.
|
||||
|
@ -64,8 +59,7 @@ The following query will find the point using the Elasticsearch's
|
|||
}
|
||||
--------------------------------------------------
|
||||
|
||||
[float]
|
||||
===== Pre-Indexed Shape
|
||||
==== Pre-Indexed Shape
|
||||
|
||||
The Filter also supports using a shape which has already been indexed in
|
||||
another index and/or index type. This is particularly useful for when
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
[[query-dsl-geohash-cell-query]]
|
||||
== Geohash Cell Query
|
||||
=== Geohash Cell Query
|
||||
|
||||
The `geohash_cell` query provides access to a hierarchy of geohashes.
|
||||
By defining a geohash cell, only <<mapping-geo-point-type,geopoints>>
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
[[query-dsl-has-child-query]]
|
||||
== Has Child Query
|
||||
=== Has Child Query
|
||||
|
||||
The `has_child` filter accepts a query and the child type to run against, and
|
||||
results in parent documents that have child docs matching the query. Here is
|
||||
|
@ -20,7 +20,7 @@ an example:
|
|||
--------------------------------------------------
|
||||
|
||||
[float]
|
||||
=== Scoring capabilities
|
||||
==== Scoring capabilities
|
||||
|
||||
The `has_child` also has scoring support. The
|
||||
supported score types are `min`, `max`, `sum`, `avg` or `none`. The default is
|
||||
|
@ -46,7 +46,7 @@ inside the `has_child` query:
|
|||
--------------------------------------------------
|
||||
|
||||
[float]
|
||||
=== Min/Max Children
|
||||
==== Min/Max Children
|
||||
|
||||
The `has_child` query allows you to specify that a minimum and/or maximum
|
||||
number of children are required to match for the parent doc to be considered
|
||||
|
@ -72,21 +72,3 @@ a match:
|
|||
|
||||
The `min_children` and `max_children` parameters can be combined with
|
||||
the `score_mode` parameter.
|
||||
|
||||
[float]
|
||||
=== Memory Considerations
|
||||
|
||||
In order to support parent-child joins, all of the (string) parent IDs
|
||||
must be resident in memory (in the <<index-modules-fielddata,field data cache>>.
|
||||
Additionally, every child document is mapped to its parent using a long
|
||||
value (approximately). It is advisable to keep the string parent ID short
|
||||
in order to reduce memory usage.
|
||||
|
||||
You can check how much memory is being used by the `_parent` field in the fielddata cache
|
||||
using the <<indices-stats,indices stats>> or <<cluster-nodes-stats,nodes stats>>
|
||||
APIS, eg:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
curl -XGET "http://localhost:9200/_stats/fielddata?pretty&human&fielddata_fields=_parent"
|
||||
--------------------------------------------------
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
[[query-dsl-has-parent-query]]
|
||||
== Has Parent Query
|
||||
=== Has Parent Query
|
||||
|
||||
The `has_parent` query accepts a query and a parent type. The query is
|
||||
executed in the parent document space, which is specified by the parent
|
||||
|
@ -22,7 +22,7 @@ in the same manner as the `has_child` query.
|
|||
--------------------------------------------------
|
||||
|
||||
[float]
|
||||
=== Scoring capabilities
|
||||
==== Scoring capabilities
|
||||
|
||||
The `has_parent` also has scoring support. The
|
||||
supported score types are `score` or `none`. The default is `none` and
|
||||
|
@ -47,23 +47,3 @@ matching parent document. The score type can be specified with the
|
|||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
[float]
|
||||
=== Memory Considerations
|
||||
|
||||
In order to support parent-child joins, all of the (string) parent IDs
|
||||
must be resident in memory (in the <<index-modules-fielddata,field data cache>>.
|
||||
Additionally, every child document is mapped to its parent using a long
|
||||
value (approximately). It is advisable to keep the string parent ID short
|
||||
in order to reduce memory usage.
|
||||
|
||||
You can check how much memory is being used by the `_parent` field in the fielddata cache
|
||||
using the <<indices-stats,indices stats>> or <<cluster-nodes-stats,nodes stats>>
|
||||
APIS, eg:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
curl -XGET "http://localhost:9200/_stats/fielddata?pretty&human&fielddata_fields=_parent"
|
||||
--------------------------------------------------
|
||||
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
[[query-dsl-ids-query]]
|
||||
== Ids Query
|
||||
=== Ids Query
|
||||
|
||||
Filters documents that only have the provided ids. Note, this query
|
||||
uses the <<mapping-uid-field,_uid>> field.
|
||||
|
|
|
@ -1,99 +0,0 @@
|
|||
include::match-query.asciidoc[]
|
||||
|
||||
include::multi-match-query.asciidoc[]
|
||||
|
||||
include::and-query.asciidoc[]
|
||||
|
||||
include::bool-query.asciidoc[]
|
||||
|
||||
include::boosting-query.asciidoc[]
|
||||
|
||||
include::common-terms-query.asciidoc[]
|
||||
|
||||
include::constant-score-query.asciidoc[]
|
||||
|
||||
include::dis-max-query.asciidoc[]
|
||||
|
||||
include::exists-query.asciidoc[]
|
||||
|
||||
include::filtered-query.asciidoc[]
|
||||
|
||||
include::function-score-query.asciidoc[]
|
||||
|
||||
include::fuzzy-query.asciidoc[]
|
||||
|
||||
include::geo-shape-query.asciidoc[]
|
||||
|
||||
include::geo-bounding-box-query.asciidoc[]
|
||||
|
||||
include::geo-distance-query.asciidoc[]
|
||||
|
||||
include::geo-distance-range-query.asciidoc[]
|
||||
|
||||
include::geohash-cell-query.asciidoc[]
|
||||
|
||||
include::geo-polygon-query.asciidoc[]
|
||||
|
||||
include::has-child-query.asciidoc[]
|
||||
|
||||
include::has-parent-query.asciidoc[]
|
||||
|
||||
include::ids-query.asciidoc[]
|
||||
|
||||
include::indices-query.asciidoc[]
|
||||
|
||||
include::limit-query.asciidoc[]
|
||||
|
||||
include::match-all-query.asciidoc[]
|
||||
|
||||
include::missing-query.asciidoc[]
|
||||
|
||||
include::mlt-query.asciidoc[]
|
||||
|
||||
include::nested-query.asciidoc[]
|
||||
|
||||
include::not-query.asciidoc[]
|
||||
|
||||
include::or-query.asciidoc[]
|
||||
|
||||
include::prefix-query.asciidoc[]
|
||||
|
||||
include::query-string-query.asciidoc[]
|
||||
|
||||
include::simple-query-string-query.asciidoc[]
|
||||
|
||||
include::range-query.asciidoc[]
|
||||
|
||||
include::regexp-query.asciidoc[]
|
||||
|
||||
include::span-containing-query.asciidoc[]
|
||||
|
||||
include::span-first-query.asciidoc[]
|
||||
|
||||
include::span-multi-term-query.asciidoc[]
|
||||
|
||||
include::span-near-query.asciidoc[]
|
||||
|
||||
include::span-not-query.asciidoc[]
|
||||
|
||||
include::span-or-query.asciidoc[]
|
||||
|
||||
include::span-term-query.asciidoc[]
|
||||
|
||||
include::span-within-query.asciidoc[]
|
||||
|
||||
include::term-query.asciidoc[]
|
||||
|
||||
include::terms-query.asciidoc[]
|
||||
|
||||
include::wildcard-query.asciidoc[]
|
||||
|
||||
include::minimum-should-match.asciidoc[]
|
||||
|
||||
include::multi-term-rewrite.asciidoc[]
|
||||
|
||||
include::script-query.asciidoc[]
|
||||
|
||||
include::template-query.asciidoc[]
|
||||
|
||||
include::type-query.asciidoc[]
|
|
@ -1,5 +1,5 @@
|
|||
[[query-dsl-indices-query]]
|
||||
== Indices Query
|
||||
=== Indices Query
|
||||
|
||||
The `indices` query can be used when executed across multiple indices,
|
||||
allowing to have a query that executes only when executed on an index
|
||||
|
@ -29,9 +29,9 @@ documents), and `all` (to match all). Defaults to `all`.
|
|||
`query` is mandatory, as well as `indices` (or `index`).
|
||||
|
||||
[TIP]
|
||||
===================================================================
|
||||
====================================================================
|
||||
The fields order is important: if the `indices` are provided before `query`
|
||||
or `no_match_query`, the related queries get parsed only against the indices
|
||||
that they are going to be executed on. This is useful to avoid parsing queries
|
||||
when it is not necessary and prevent potential mapping errors.
|
||||
===================================================================
|
||||
====================================================================
|
||||
|
|
|
@ -0,0 +1,32 @@
|
|||
[[joining-queries]]
|
||||
== Joining queries
|
||||
|
||||
Performing full SQL-style joins in a distributed system like Elasticsearch is
|
||||
prohibitively expensive. Instead, Elasticsearch offers two forms of join
|
||||
which are designed to scale horizontally.
|
||||
|
||||
<<query-dsl-nested-query,`nested` query>>::
|
||||
|
||||
Documents may contains fields of type <<mapping-nested-type,`nested`>>. These
|
||||
fields are used to index arrays of objects, where each object can be queried
|
||||
(with the `nested` query) as an independent document.
|
||||
|
||||
<<query-dsl-has-child-query,`has_child`>> and <<query-dsl-has-parent-query,`has_parent`>> queries::
|
||||
|
||||
A <<mapping-parent-field,parent-child relationship>> can exist between two
|
||||
document types within a single index. The `has_child` query returns parent
|
||||
documents whose child documents match the specified query, while the
|
||||
`has_parent` query returns child documents whose parent document matches the
|
||||
specified query.
|
||||
|
||||
Also see the <<query-dsl-terms-lookup,terms-lookup mechanism>> in the `terms`
|
||||
query, which allows you to build a `terms` query from values contained in
|
||||
another document.
|
||||
|
||||
include::nested-query.asciidoc[]
|
||||
|
||||
include::has-child-query.asciidoc[]
|
||||
|
||||
include::has-parent-query.asciidoc[]
|
||||
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
[[query-dsl-limit-query]]
|
||||
== Limit Query
|
||||
=== Limit Query
|
||||
|
||||
deprecated[1.6.0, Use <<search-request-body,terminate_after>> instead]
|
||||
|
||||
|
|
|
@ -1,20 +1,17 @@
|
|||
[[query-dsl-match-all-query]]
|
||||
== Match All Query
|
||||
|
||||
A query that matches all documents. Maps to Lucene `MatchAllDocsQuery`.
|
||||
The most simple query, which matches all documents, giving them all a `_score`
|
||||
of `1.0`.
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"match_all" : { }
|
||||
}
|
||||
{ "match_all": {} }
|
||||
--------------------------------------------------
|
||||
|
||||
Which can also have boost associated with it:
|
||||
The `_score` can be changed with the `boost` parameter:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"match_all" : { "boost" : 1.2 }
|
||||
}
|
||||
{ "match_all": { "boost" : 1.2 }}
|
||||
--------------------------------------------------
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
[[query-dsl-match-query]]
|
||||
== Match Query
|
||||
=== Match Query
|
||||
|
||||
A family of `match` queries that accept text/numerics/dates, analyzes
|
||||
it, and constructs a query out of it. For example:
|
||||
|
@ -16,10 +16,8 @@ it, and constructs a query out of it. For example:
|
|||
Note, `message` is the name of a field, you can substitute the name of
|
||||
any field (including `_all`) instead.
|
||||
|
||||
[float]
|
||||
=== Types of Match Queries
|
||||
There are three types of `match` query: `boolean`, `phrase`, and `phrase_prefix`:
|
||||
|
||||
[float]
|
||||
[[query-dsl-match-query-boolean]]
|
||||
==== boolean
|
||||
|
||||
|
@ -40,7 +38,6 @@ data-type mismatches, such as trying to query a numeric field with a text
|
|||
query string. Defaults to `false`.
|
||||
|
||||
[[query-dsl-match-query-fuzziness]]
|
||||
[float]
|
||||
===== Fuzziness
|
||||
|
||||
`fuzziness` allows _fuzzy matching_ based on the type of field being queried.
|
||||
|
@ -69,7 +66,6 @@ change in structure, `message` is the field name):
|
|||
--------------------------------------------------
|
||||
|
||||
[[query-dsl-match-query-zero]]
|
||||
[float]
|
||||
===== Zero terms query
|
||||
If the analyzer used removes all tokens in a query like a `stop` filter
|
||||
does, the default behavior is to match no documents at all. In order to
|
||||
|
@ -90,7 +86,6 @@ change that the `zero_terms_query` option can be used, which accepts
|
|||
--------------------------------------------------
|
||||
|
||||
[[query-dsl-match-query-cutoff]]
|
||||
[float]
|
||||
===== Cutoff frequency
|
||||
|
||||
The match query supports a `cutoff_frequency` that allows
|
||||
|
@ -132,7 +127,6 @@ that when trying it out on test indexes with low document numbers you
|
|||
should follow the advice in {defguide}/relevance-is-broken.html[Relevance is broken].
|
||||
|
||||
[[query-dsl-match-query-phrase]]
|
||||
[float]
|
||||
==== phrase
|
||||
|
||||
The `match_phrase` query analyzes the text and creates a `phrase` query
|
||||
|
@ -181,9 +175,8 @@ definition, or the default search analyzer, for example:
|
|||
}
|
||||
--------------------------------------------------
|
||||
|
||||
[float]
|
||||
[[query-dsl-match-query-phrase-prefix]]
|
||||
===== match_phrase_prefix
|
||||
==== match_phrase_prefix
|
||||
|
||||
The `match_phrase_prefix` is the same as `match_phrase`, except that it
|
||||
allows for prefix matches on the last term in the text. For example:
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
[[query-dsl-missing-query]]
|
||||
== Missing Query
|
||||
=== Missing Query
|
||||
|
||||
Returns documents that have only `null` values or no value in the original field:
|
||||
|
||||
|
@ -42,7 +42,7 @@ These documents would *not* match the above filter:
|
|||
<3> This field has one non-`null` value.
|
||||
|
||||
[float]
|
||||
=== `null_value` mapping
|
||||
==== `null_value` mapping
|
||||
|
||||
If the field mapping includes a `null_value` (see <<mapping-core-types>>) then explicit `null` values
|
||||
are replaced with the specified `null_value`. For instance, if the `user` field were mapped
|
||||
|
@ -75,7 +75,7 @@ no values in the `user` field and thus would match the `missing` filter:
|
|||
--------------------------------------------------
|
||||
|
||||
[float]
|
||||
==== `existence` and `null_value` parameters
|
||||
===== `existence` and `null_value` parameters
|
||||
|
||||
When the field being queried has a `null_value` mapping, then the behaviour of
|
||||
the `missing` filter can be altered with the `existence` and `null_value`
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
[[query-dsl-mlt-query]]
|
||||
== More Like This Query
|
||||
=== More Like This Query
|
||||
|
||||
The More Like This Query (MLT Query) finds documents that are "like" a given
|
||||
set of documents. In order to do so, MLT selects a set of representative terms
|
||||
|
@ -87,7 +87,7 @@ present in the index, the syntax is similar to <<docs-termvectors-artificial-doc
|
|||
}
|
||||
--------------------------------------------------
|
||||
|
||||
=== How it Works
|
||||
==== How it Works
|
||||
|
||||
Suppose we wanted to find all documents similar to a given input document.
|
||||
Obviously, the input document itself should be its best match for that type of
|
||||
|
@ -139,14 +139,14 @@ curl -s -XPUT 'http://localhost:9200/imdb/' -d '{
|
|||
}
|
||||
--------------------------------------------------
|
||||
|
||||
=== Parameters
|
||||
==== Parameters
|
||||
|
||||
The only required parameter is `like`, all other parameters have sensible
|
||||
defaults. There are three types of parameters: one to specify the document
|
||||
input, the other one for term selection and for query formation.
|
||||
|
||||
[float]
|
||||
=== Document Input Parameters
|
||||
==== Document Input Parameters
|
||||
|
||||
[horizontal]
|
||||
`like`:: coming[2.0]
|
||||
|
@ -179,7 +179,7 @@ A list of documents following the same syntax as the <<docs-multi-get,Multi GET
|
|||
|
||||
[float]
|
||||
[[mlt-query-term-selection]]
|
||||
=== Term Selection Parameters
|
||||
==== Term Selection Parameters
|
||||
|
||||
[horizontal]
|
||||
`max_query_terms`::
|
||||
|
@ -219,7 +219,7 @@ The analyzer that is used to analyze the free form text. Defaults to the
|
|||
analyzer associated with the first field in `fields`.
|
||||
|
||||
[float]
|
||||
=== Query Formation Parameters
|
||||
==== Query Formation Parameters
|
||||
|
||||
[horizontal]
|
||||
`minimum_should_match`::
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
[[query-dsl-multi-match-query]]
|
||||
== Multi Match Query
|
||||
=== Multi Match Query
|
||||
|
||||
The `multi_match` query builds on the <<query-dsl-match-query,`match` query>>
|
||||
to allow multi-field queries:
|
||||
|
@ -17,7 +17,7 @@ to allow multi-field queries:
|
|||
<2> The fields to be queried.
|
||||
|
||||
[float]
|
||||
=== `fields` and per-field boosting
|
||||
==== `fields` and per-field boosting
|
||||
|
||||
Fields can be specified with wildcards, eg:
|
||||
|
||||
|
@ -47,7 +47,7 @@ Individual fields can be boosted with the caret (`^`) notation:
|
|||
|
||||
[[multi-match-types]]
|
||||
[float]
|
||||
=== Types of `multi_match` query:
|
||||
==== Types of `multi_match` query:
|
||||
|
||||
The way the `multi_match` query is executed internally depends on the `type`
|
||||
parameter, which can be set to:
|
||||
|
@ -70,7 +70,7 @@ parameter, which can be set to:
|
|||
combines the `_score` from each field. See <<type-phrase>>.
|
||||
|
||||
[[type-best-fields]]
|
||||
=== `best_fields`
|
||||
==== `best_fields`
|
||||
|
||||
The `best_fields` type is most useful when you are searching for multiple
|
||||
words best found in the same field. For instance ``brown fox'' in a single
|
||||
|
@ -121,7 +121,7 @@ and `cutoff_frequency`, as explained in <<query-dsl-match-query, match query>>.
|
|||
[IMPORTANT]
|
||||
[[operator-min]]
|
||||
.`operator` and `minimum_should_match`
|
||||
==================================================
|
||||
===================================================
|
||||
|
||||
The `best_fields` and `most_fields` types are _field-centric_ -- they generate
|
||||
a `match` query *per field*. This means that the `operator` and
|
||||
|
@ -153,10 +153,10 @@ to match.
|
|||
|
||||
See <<type-cross-fields>> for a better solution.
|
||||
|
||||
==================================================
|
||||
===================================================
|
||||
|
||||
[[type-most-fields]]
|
||||
=== `most_fields`
|
||||
==== `most_fields`
|
||||
|
||||
The `most_fields` type is most useful when querying multiple fields that
|
||||
contain the same text analyzed in different ways. For instance, the main
|
||||
|
@ -203,7 +203,7 @@ and `cutoff_frequency`, as explained in <<query-dsl-match-query,match query>>, b
|
|||
*see <<operator-min>>*.
|
||||
|
||||
[[type-phrase]]
|
||||
=== `phrase` and `phrase_prefix`
|
||||
==== `phrase` and `phrase_prefix`
|
||||
|
||||
The `phrase` and `phrase_prefix` types behave just like <<type-best-fields>>,
|
||||
but they use a `match_phrase` or `match_phrase_prefix` query instead of a
|
||||
|
@ -240,7 +240,7 @@ in <<query-dsl-match-query>>. Type `phrase_prefix` additionally accepts
|
|||
`max_expansions`.
|
||||
|
||||
[[type-cross-fields]]
|
||||
=== `cross_fields`
|
||||
==== `cross_fields`
|
||||
|
||||
The `cross_fields` type is particularly useful with structured documents where
|
||||
multiple fields *should* match. For instance, when querying the `first_name`
|
||||
|
@ -317,7 +317,7 @@ Also, accepts `analyzer`, `boost`, `operator`, `minimum_should_match`,
|
|||
`zero_terms_query` and `cutoff_frequency`, as explained in
|
||||
<<query-dsl-match-query, match query>>.
|
||||
|
||||
==== `cross_field` and analysis
|
||||
===== `cross_field` and analysis
|
||||
|
||||
The `cross_field` type can only work in term-centric mode on fields that have
|
||||
the same analyzer. Fields with the same analyzer are grouped together as in
|
||||
|
@ -411,7 +411,7 @@ which will be executed as:
|
|||
blended("will", fields: [first, first.edge, last.edge, last])
|
||||
blended("smith", fields: [first, first.edge, last.edge, last])
|
||||
|
||||
==== `tie_breaker`
|
||||
===== `tie_breaker`
|
||||
|
||||
By default, each per-term `blended` query will use the best score returned by
|
||||
any field in a group, then these scores are added together to give the final
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
[[query-dsl-nested-query]]
|
||||
== Nested Query
|
||||
=== Nested Query
|
||||
|
||||
Nested query allows to query nested objects / docs (see
|
||||
<<mapping-nested-type,nested mapping>>). The
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
[[query-dsl-not-query]]
|
||||
== Not Query
|
||||
=== Not Query
|
||||
|
||||
A query that filters out matched documents using a query. For example:
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
[[query-dsl-or-query]]
|
||||
== Or Query
|
||||
=== Or Query
|
||||
|
||||
deprecated[2.0.0, Use the `bool` query instead]
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
[[query-dsl-prefix-query]]
|
||||
== Prefix Query
|
||||
=== Prefix Query
|
||||
|
||||
Matches documents that have fields containing terms with a specified
|
||||
prefix (*not analyzed*). The prefix query maps to Lucene `PrefixQuery`.
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
[[query-dsl-query-string-query]]
|
||||
== Query String Query
|
||||
=== Query String Query
|
||||
|
||||
A query that uses a query parser in order to parse its content. Here is
|
||||
an example:
|
||||
|
@ -89,7 +89,7 @@ rewritten using the
|
|||
parameter.
|
||||
|
||||
[float]
|
||||
=== Default Field
|
||||
==== Default Field
|
||||
|
||||
When not explicitly specifying the field to search on in the query
|
||||
string syntax, the `index.query.default_field` will be used to derive
|
||||
|
@ -99,7 +99,7 @@ So, if `_all` field is disabled, it might make sense to change it to set
|
|||
a different default field.
|
||||
|
||||
[float]
|
||||
=== Multi Field
|
||||
==== Multi Field
|
||||
|
||||
The `query_string` query can also run against multiple fields. Fields can be
|
||||
provided via the `"fields"` parameter (example below).
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[[query-string-syntax]]
|
||||
|
||||
=== Query string syntax
|
||||
==== Query string syntax
|
||||
|
||||
The query string ``mini-language'' is used by the
|
||||
<<query-dsl-query-string-query>> and by the
|
||||
|
@ -14,7 +14,7 @@ phrase, in the same order.
|
|||
Operators allow you to customize the search -- the available options are
|
||||
explained below.
|
||||
|
||||
==== Field names
|
||||
===== Field names
|
||||
|
||||
As mentioned in <<query-dsl-query-string-query>>, the `default_field` is searched for the
|
||||
search terms, but it is possible to specify other fields in the query syntax:
|
||||
|
@ -46,7 +46,7 @@ search terms, but it is possible to specify other fields in the query syntax:
|
|||
|
||||
_exists_:title
|
||||
|
||||
==== Wildcards
|
||||
===== Wildcards
|
||||
|
||||
Wildcard searches can be run on individual terms, using `?` to replace
|
||||
a single character, and `*` to replace zero or more characters:
|
||||
|
@ -58,12 +58,12 @@ perform very badly -- just think how many terms need to be queried to
|
|||
match the query string `"a* b* c*"`.
|
||||
|
||||
[WARNING]
|
||||
======
|
||||
=======
|
||||
Allowing a wildcard at the beginning of a word (eg `"*ing"`) is particularly
|
||||
heavy, because all terms in the index need to be examined, just in case
|
||||
they match. Leading wildcards can be disabled by setting
|
||||
`allow_leading_wildcard` to `false`.
|
||||
======
|
||||
=======
|
||||
|
||||
Wildcarded terms are not analyzed by default -- they are lowercased
|
||||
(`lowercase_expanded_terms` defaults to `true`) but no further analysis
|
||||
|
@ -72,7 +72,7 @@ is missing some of its letters. However, by setting `analyze_wildcard` to
|
|||
`true`, an attempt will be made to analyze wildcarded words before searching
|
||||
the term list for matching terms.
|
||||
|
||||
==== Regular expressions
|
||||
===== Regular expressions
|
||||
|
||||
Regular expression patterns can be embedded in the query string by
|
||||
wrapping them in forward-slashes (`"/"`):
|
||||
|
@ -82,7 +82,7 @@ wrapping them in forward-slashes (`"/"`):
|
|||
The supported regular expression syntax is explained in <<regexp-syntax>>.
|
||||
|
||||
[WARNING]
|
||||
======
|
||||
=======
|
||||
The `allow_leading_wildcard` parameter does not have any control over
|
||||
regular expressions. A query string such as the following would force
|
||||
Elasticsearch to visit every term in the index:
|
||||
|
@ -90,9 +90,9 @@ Elasticsearch to visit every term in the index:
|
|||
/.*n/
|
||||
|
||||
Use with caution!
|
||||
======
|
||||
=======
|
||||
|
||||
==== Fuzziness
|
||||
===== Fuzziness
|
||||
|
||||
We can search for terms that are
|
||||
similar to, but not exactly like our search terms, using the ``fuzzy''
|
||||
|
@ -112,7 +112,7 @@ sufficient to catch 80% of all human misspellings. It can be specified as:
|
|||
|
||||
quikc~1
|
||||
|
||||
==== Proximity searches
|
||||
===== Proximity searches
|
||||
|
||||
While a phrase query (eg `"john smith"`) expects all of the terms in exactly
|
||||
the same order, a proximity query allows the specified words to be further
|
||||
|
@ -127,7 +127,7 @@ query string, the more relevant that document is considered to be. When
|
|||
compared to the above example query, the phrase `"quick fox"` would be
|
||||
considered more relevant than `"quick brown fox"`.
|
||||
|
||||
==== Ranges
|
||||
===== Ranges
|
||||
|
||||
Ranges can be specified for date, numeric or string fields. Inclusive ranges
|
||||
are specified with square brackets `[min TO max]` and exclusive ranges with
|
||||
|
@ -168,20 +168,20 @@ Ranges with one side unbounded can use the following syntax:
|
|||
age:<=10
|
||||
|
||||
[NOTE]
|
||||
===================================================================
|
||||
====================================================================
|
||||
To combine an upper and lower bound with the simplified syntax, you
|
||||
would need to join two clauses with an `AND` operator:
|
||||
|
||||
age:(>=10 AND <20)
|
||||
age:(+>=10 +<20)
|
||||
|
||||
===================================================================
|
||||
====================================================================
|
||||
|
||||
The parsing of ranges in query strings can be complex and error prone. It is
|
||||
much more reliable to use an explicit <<query-dsl-range-query,`range` query>>.
|
||||
|
||||
|
||||
==== Boosting
|
||||
===== Boosting
|
||||
|
||||
Use the _boost_ operator `^` to make one term more relevant than another.
|
||||
For instance, if we want to find all documents about foxes, but we are
|
||||
|
@ -196,7 +196,7 @@ Boosts can also be applied to phrases or to groups:
|
|||
|
||||
"john smith"^2 (foo bar)^4
|
||||
|
||||
==== Boolean operators
|
||||
===== Boolean operators
|
||||
|
||||
By default, all terms are optional, as long as one term matches. A search
|
||||
for `foo bar baz` will find any document that contains one or more of
|
||||
|
@ -256,7 +256,7 @@ would look like this:
|
|||
|
||||
****
|
||||
|
||||
==== Grouping
|
||||
===== Grouping
|
||||
|
||||
Multiple terms or clauses can be grouped together with parentheses, to form
|
||||
sub-queries:
|
||||
|
@ -268,7 +268,7 @@ of a sub-query:
|
|||
|
||||
status:(active OR pending) title:(full text search)^2
|
||||
|
||||
==== Reserved characters
|
||||
===== Reserved characters
|
||||
|
||||
If you need to use any of the characters which function as operators in your
|
||||
query itself (and not as operators), then you should escape them with
|
||||
|
@ -290,7 +290,7 @@ index is actually `"wifi"`. Escaping the space will protect it from
|
|||
being touched by the query string parser: `"wi\ fi"`.
|
||||
****
|
||||
|
||||
==== Empty Query
|
||||
===== Empty Query
|
||||
|
||||
If the query string is empty or only contains whitespaces the query will
|
||||
yield an empty result set.
|
||||
|
|
|
@ -0,0 +1,77 @@
|
|||
[[query-filter-context]]
|
||||
== Query and filter context
|
||||
|
||||
The behaviour of a query clause depends on whether it is used in _query context_ or
|
||||
in _filter context_:
|
||||
|
||||
Query context::
|
||||
+
|
||||
--
|
||||
A query clause used in query context answers the question ``__How well does this
|
||||
document match this query clause?__'' Besides deciding whether or not the
|
||||
document matches, the query clause also calculates a `_score` representing how
|
||||
well the document matches, relative to other documents.
|
||||
|
||||
Query context is in effect whenever a query clause is passed to a `query` parameter,
|
||||
such as the `query` parameter in the <<search-request-query,`search`>> API.
|
||||
--
|
||||
|
||||
Filter context::
|
||||
+
|
||||
--
|
||||
In _filter_ context, a query clause answers the question ``__Does this document
|
||||
match this query clause?__'' The answer is a simple Yes or No -- no scores are
|
||||
calculated. Filter context is mostly used for filtering structured data, e.g.
|
||||
|
||||
* __Does this +timestamp+ fall into the range 2015 to 2016?__
|
||||
* __Is the +status+ field set to ++"published"++__?
|
||||
|
||||
Frequently used filters will be cached automatically by Elasticsearch, to
|
||||
speed up performance.
|
||||
|
||||
Filter context is in effect whenever a query clause is passed to a `filter`
|
||||
parameter, such as the `filter` or `must_not` parameters in the
|
||||
<<query-dsl-bool-query,`bool`>> query, the `filter` parameter in the
|
||||
<<query-dsl-constant-score-query,`constant_score`>> query, or the
|
||||
<<search-aggregations-bucket-filter-aggregation,`filter`>> aggregation.
|
||||
--
|
||||
|
||||
Below is an example of query clauses being used in query and filter context
|
||||
in the `search` API. This query will match documents where all of the following
|
||||
conditions are met:
|
||||
|
||||
* The `title` field contains the word `search`.
|
||||
* The `content` field contains the word `elasticsearch`.
|
||||
* The `status` field contains the exact word `published`.
|
||||
* The `publish_date` field contains a date from 1 Jan 2015 onwards.
|
||||
|
||||
[source,json]
|
||||
------------------------------------
|
||||
GET _search
|
||||
{
|
||||
"query": { <1>
|
||||
"bool": { <2>
|
||||
"must": [
|
||||
{ "match": { "title": "Search" }}, <2>
|
||||
{ "match": { "content": "Elasticsearch" }} <2>
|
||||
],
|
||||
"filter": [ <3>
|
||||
{ "term": { "status": "published" }}, <4>
|
||||
{ "range": { "publish_date": { "gte": "2015-01-01" }}} <4>
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
------------------------------------
|
||||
<1> The `query` parameter indicates query context.
|
||||
<2> The `bool` and two `match` clauses are used in query context,
|
||||
which means that they are used to score how well each document
|
||||
matches.
|
||||
<3> The `filter` parameter indicates filter context.
|
||||
<4> The `term` and `range` clauses are used in filter context.
|
||||
They will filter out documents which do not match, but they will
|
||||
not affect the score for matching documents.
|
||||
|
||||
TIP: Use query clauses in query context for conditions which should affect the
|
||||
score of matching documents (i.e. how well does the document match), and use
|
||||
all other query clauses in filter context.
|
|
@ -1,5 +1,5 @@
|
|||
[[query-dsl-range-query]]
|
||||
== Range Query
|
||||
=== Range Query
|
||||
|
||||
Matches documents with fields that have terms within a certain range.
|
||||
The type of the Lucene query depends on the field type, for `string`
|
||||
|
@ -30,7 +30,7 @@ The `range` query accepts the following parameters:
|
|||
`boost`:: Sets the boost value of the query, defaults to `1.0`
|
||||
|
||||
[float]
|
||||
=== Date options
|
||||
==== Date options
|
||||
|
||||
When applied on `date` fields the `range` filter accepts also a `time_zone` parameter.
|
||||
The `time_zone` parameter will be applied to your input lower and upper bounds and will
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
[[query-dsl-regexp-query]]
|
||||
== Regexp Query
|
||||
=== Regexp Query
|
||||
|
||||
The `regexp` query allows you to use regular expression term queries.
|
||||
See <<regexp-syntax>> for details of the supported regular expression language.
|
||||
|
|
|
@ -1,17 +1,17 @@
|
|||
[[regexp-syntax]]
|
||||
=== Regular expression syntax
|
||||
==== Regular expression syntax
|
||||
|
||||
Regular expression queries are supported by the `regexp` and the `query_string`
|
||||
queries. The Lucene regular expression engine
|
||||
is not Perl-compatible but supports a smaller range of operators.
|
||||
|
||||
[NOTE]
|
||||
====
|
||||
=====
|
||||
We will not attempt to explain regular expressions, but
|
||||
just explain the supported operators.
|
||||
====
|
||||
=====
|
||||
|
||||
==== Standard operators
|
||||
===== Standard operators
|
||||
|
||||
Anchoring::
|
||||
+
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
[[query-dsl-script-query]]
|
||||
== Script Query
|
||||
=== Script Query
|
||||
|
||||
A query allowing to define
|
||||
<<modules-scripting,scripts>> as filters. For
|
||||
|
@ -20,7 +20,7 @@ example:
|
|||
----------------------------------------------
|
||||
|
||||
[float]
|
||||
=== Custom Parameters
|
||||
==== Custom Parameters
|
||||
|
||||
Scripts are compiled and cached for faster execution. If the same script
|
||||
can be used, just with different parameters provider, it is preferable
|
||||
|
@ -34,9 +34,11 @@ to use the ability to pass parameters to the script itself, for example:
|
|||
},
|
||||
"filter" : {
|
||||
"script" : {
|
||||
"script" : "doc['num1'].value > param1"
|
||||
"params" : {
|
||||
"param1" : 5
|
||||
"script" : {
|
||||
"inline" : "doc['num1'].value > param1"
|
||||
"params" : {
|
||||
"param1" : 5
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
[[query-dsl-simple-query-string-query]]
|
||||
== Simple Query String Query
|
||||
=== Simple Query String Query
|
||||
|
||||
A query that uses the SimpleQueryParser to parse its context. Unlike the
|
||||
regular `query_string` query, the `simple_query_string` query will never
|
||||
|
@ -57,7 +57,7 @@ Defaults to `ROOT`.
|
|||
|=======================================================================
|
||||
|
||||
[float]
|
||||
==== Simple Query String Syntax
|
||||
===== Simple Query String Syntax
|
||||
The `simple_query_string` supports the following special characters:
|
||||
|
||||
* `+` signifies AND operation
|
||||
|
@ -73,7 +73,7 @@ In order to search for any of these special characters, they will need to
|
|||
be escaped with `\`.
|
||||
|
||||
[float]
|
||||
=== Default Field
|
||||
==== Default Field
|
||||
When not explicitly specifying the field to search on in the query
|
||||
string syntax, the `index.query.default_field` will be used to derive
|
||||
which field to search on. It defaults to `_all` field.
|
||||
|
@ -82,7 +82,7 @@ So, if `_all` field is disabled, it might make sense to change it to set
|
|||
a different default field.
|
||||
|
||||
[float]
|
||||
=== Multi Field
|
||||
==== Multi Field
|
||||
The fields parameter can also include pattern based field names,
|
||||
allowing to automatically expand to the relevant fields (dynamically
|
||||
introduced fields included). For example:
|
||||
|
@ -98,7 +98,7 @@ introduced fields included). For example:
|
|||
--------------------------------------------------
|
||||
|
||||
[float]
|
||||
=== Flags
|
||||
==== Flags
|
||||
`simple_query_string` support multiple flags to specify which parsing features
|
||||
should be enabled. It is specified as a `|`-delimited string with the
|
||||
`flags` parameter:
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
[[query-dsl-span-containing-query]]
|
||||
== Span Containing Query
|
||||
=== Span Containing Query
|
||||
|
||||
Returns matches which enclose another span query. The span containing
|
||||
query maps to Lucene `SpanContainingQuery`. Here is an example:
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
[[query-dsl-span-first-query]]
|
||||
== Span First Query
|
||||
=== Span First Query
|
||||
|
||||
Matches spans near the beginning of a field. The span first query maps
|
||||
to Lucene `SpanFirstQuery`. Here is an example:
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
[[query-dsl-span-multi-term-query]]
|
||||
== Span Multi Term Query
|
||||
=== Span Multi Term Query
|
||||
|
||||
The `span_multi` query allows you to wrap a `multi term query` (one of wildcard,
|
||||
fuzzy, prefix, term, range or regexp query) as a `span query`, so
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
[[query-dsl-span-near-query]]
|
||||
== Span Near Query
|
||||
=== Span Near Query
|
||||
|
||||
Matches spans which are near one another. One can specify _slop_, the
|
||||
maximum number of intervening unmatched positions, as well as whether
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
[[query-dsl-span-not-query]]
|
||||
== Span Not Query
|
||||
=== Span Not Query
|
||||
|
||||
Removes matches which overlap with another span query. The span not
|
||||
query maps to Lucene `SpanNotQuery`. Here is an example:
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue