Merge remote-tracking branch 'origin/master' into feature/synced_flush
|
@ -34,6 +34,10 @@ h2. Getting Started
|
||||||
|
|
||||||
First of all, DON'T PANIC. It will take 5 minutes to get the gist of what Elasticsearch is all about.
|
First of all, DON'T PANIC. It will take 5 minutes to get the gist of what Elasticsearch is all about.
|
||||||
|
|
||||||
|
h3. Requirements
|
||||||
|
|
||||||
|
You need to have a recent version of Java installed. See the "Setup":http://www.elastic.co/guide/en/elasticsearch/reference/current/setup.html#jvm-version page for more information.
|
||||||
|
|
||||||
h3. Installation
|
h3. Installation
|
||||||
|
|
||||||
* "Download":https://www.elastic.co/downloads/elasticsearch and unzip the Elasticsearch official distribution.
|
* "Download":https://www.elastic.co/downloads/elasticsearch and unzip the Elasticsearch official distribution.
|
||||||
|
|
|
@ -137,13 +137,11 @@ set JVM_SS=256
|
||||||
|
|
||||||
if "%DATA_DIR%" == "" set DATA_DIR=%ES_HOME%\data
|
if "%DATA_DIR%" == "" set DATA_DIR=%ES_HOME%\data
|
||||||
|
|
||||||
if "%WORK_DIR%" == "" set WORK_DIR=%ES_HOME%
|
|
||||||
|
|
||||||
if "%CONF_DIR%" == "" set CONF_DIR=%ES_HOME%\config
|
if "%CONF_DIR%" == "" set CONF_DIR=%ES_HOME%\config
|
||||||
|
|
||||||
if "%CONF_FILE%" == "" set CONF_FILE=%ES_HOME%\config\elasticsearch.yml
|
if "%CONF_FILE%" == "" set CONF_FILE=%ES_HOME%\config\elasticsearch.yml
|
||||||
|
|
||||||
set ES_PARAMS=-Delasticsearch;-Des.path.home="%ES_HOME%";-Des.default.config="%CONF_FILE%";-Des.default.path.home="%ES_HOME%";-Des.default.path.logs="%LOG_DIR%";-Des.default.path.data="%DATA_DIR%";-Des.default.path.work="%WORK_DIR%";-Des.default.path.conf="%CONF_DIR%"
|
set ES_PARAMS=-Delasticsearch;-Des.path.home="%ES_HOME%";-Des.default.config="%CONF_FILE%";-Des.default.path.home="%ES_HOME%";-Des.default.path.logs="%LOG_DIR%";-Des.default.path.data="%DATA_DIR%";-Des.default.path.conf="%CONF_DIR%"
|
||||||
|
|
||||||
set JVM_OPTS=%JAVA_OPTS: =;%
|
set JVM_OPTS=%JAVA_OPTS: =;%
|
||||||
|
|
||||||
|
|
|
@ -1,373 +1,99 @@
|
||||||
##################### Elasticsearch Configuration Example #####################
|
# ======================== Elasticsearch Configuration =========================
|
||||||
|
|
||||||
# This file contains an overview of various configuration settings,
|
|
||||||
# targeted at operations staff. Application developers should
|
|
||||||
# consult the guide at <http://elasticsearch.org/guide>.
|
|
||||||
#
|
#
|
||||||
# The installation procedure is covered at
|
# NOTE: Elasticsearch comes with reasonable defaults for most settings.
|
||||||
# <http://elasticsearch.org/guide/en/elasticsearch/reference/current/setup.html>.
|
# Before you set out to tweak and tune the configuration, make sure you
|
||||||
|
# understand what are you trying to accomplish and the consequences.
|
||||||
#
|
#
|
||||||
# Elasticsearch comes with reasonable defaults for most settings,
|
# The primary way of configuring a node is via this file. This template lists
|
||||||
# so you can try it out without bothering with configuration.
|
# the most important settings you may want to configure for a production cluster.
|
||||||
#
|
#
|
||||||
# Most of the time, these defaults are just fine for running a production
|
# Please see the documentation for further information on configuration options:
|
||||||
# cluster. If you're fine-tuning your cluster, or wondering about the
|
# <http://www.elastic.co/guide/en/elasticsearch/reference/current/setup-configuration.html>
|
||||||
# effect of certain configuration option, please _do ask_ on the
|
|
||||||
# mailing list or IRC channel [http://elasticsearch.org/community].
|
|
||||||
|
|
||||||
# Any element in the configuration can be replaced with environment variables
|
|
||||||
# by placing them in ${...} notation. For example:
|
|
||||||
#
|
#
|
||||||
#node.rack: ${RACK_ENV_VAR}
|
# ---------------------------------- Cluster -----------------------------------
|
||||||
|
|
||||||
# For information on supported formats and syntax for the config file, see
|
|
||||||
# <http://elasticsearch.org/guide/en/elasticsearch/reference/current/setup-configuration.html>
|
|
||||||
|
|
||||||
|
|
||||||
################################### Cluster ###################################
|
|
||||||
|
|
||||||
# Cluster name identifies your cluster for auto-discovery. If you're running
|
|
||||||
# multiple clusters on the same network, make sure you're using unique names.
|
|
||||||
#
|
#
|
||||||
#cluster.name: elasticsearch
|
# Use a descriptive name for your cluster:
|
||||||
|
|
||||||
|
|
||||||
#################################### Node #####################################
|
|
||||||
|
|
||||||
# Node names are generated dynamically on startup, so you're relieved
|
|
||||||
# from configuring them manually. You can tie this node to a specific name:
|
|
||||||
#
|
#
|
||||||
#node.name: "Franz Kafka"
|
# cluster.name: my-application
|
||||||
|
|
||||||
# Every node can be configured to allow or deny being eligible as the master,
|
|
||||||
# and to allow or deny to store the data.
|
|
||||||
#
|
#
|
||||||
# Allow this node to be eligible as a master node (enabled by default):
|
# ------------------------------------ Node ------------------------------------
|
||||||
#
|
#
|
||||||
#node.master: true
|
# Use a descriptive name for the node:
|
||||||
#
|
#
|
||||||
# Allow this node to store data (enabled by default):
|
# node.name: node-1
|
||||||
#
|
#
|
||||||
#node.data: true
|
# Add custom attributes to the node:
|
||||||
|
|
||||||
# You can exploit these settings to design advanced cluster topologies.
|
|
||||||
#
|
#
|
||||||
# 1. You want this node to never become a master node, only to hold data.
|
# node.rack: r1
|
||||||
# This will be the "workhorse" of your cluster.
|
|
||||||
#
|
#
|
||||||
#node.master: false
|
# ----------------------------------- Paths ------------------------------------
|
||||||
#node.data: true
|
|
||||||
#
|
#
|
||||||
# 2. You want this node to only serve as a master: to not store any data and
|
# Path to directory where to store the data (separate multiple locations by comma):
|
||||||
# to have free resources. This will be the "coordinator" of your cluster.
|
|
||||||
#
|
|
||||||
#node.master: true
|
|
||||||
#node.data: false
|
|
||||||
#
|
|
||||||
# 3. You want this node to be neither master nor data node, but
|
|
||||||
# to act as a "search load balancer" (fetching data from nodes,
|
|
||||||
# aggregating results, etc.)
|
|
||||||
#
|
|
||||||
#node.master: false
|
|
||||||
#node.data: false
|
|
||||||
|
|
||||||
# Use the Cluster Health API [http://localhost:9200/_cluster/health], the
|
|
||||||
# Node Info API [http://localhost:9200/_nodes] or GUI tools
|
|
||||||
# such as <http://www.elasticsearch.org/overview/marvel/>,
|
|
||||||
# <http://github.com/karmi/elasticsearch-paramedic>,
|
|
||||||
# <http://github.com/lukas-vlcek/bigdesk> and
|
|
||||||
# <http://mobz.github.com/elasticsearch-head> to inspect the cluster state.
|
|
||||||
|
|
||||||
# A node can have generic attributes associated with it, which can later be used
|
|
||||||
# for customized shard allocation filtering, or allocation awareness. An attribute
|
|
||||||
# is a simple key value pair, similar to node.key: value, here is an example:
|
|
||||||
#
|
|
||||||
#node.rack: rack314
|
|
||||||
|
|
||||||
# By default, multiple nodes are allowed to start from the same installation location
|
|
||||||
# to disable it, set the following:
|
|
||||||
#node.max_local_storage_nodes: 1
|
|
||||||
|
|
||||||
|
|
||||||
#################################### Index ####################################
|
|
||||||
|
|
||||||
# You can set a number of options (such as shard/replica options, mapping
|
|
||||||
# or analyzer definitions, translog settings, ...) for indices globally,
|
|
||||||
# in this file.
|
|
||||||
#
|
|
||||||
# Note, that it makes more sense to configure index settings specifically for
|
|
||||||
# a certain index, either when creating it or by using the index templates API.
|
|
||||||
#
|
|
||||||
# See <http://elasticsearch.org/guide/en/elasticsearch/reference/current/index-modules.html> and
|
|
||||||
# <http://elasticsearch.org/guide/en/elasticsearch/reference/current/indices-create-index.html>
|
|
||||||
# for more information.
|
|
||||||
|
|
||||||
# Set the number of shards (splits) of an index (5 by default):
|
|
||||||
#
|
|
||||||
#index.number_of_shards: 5
|
|
||||||
|
|
||||||
# Set the number of replicas (additional copies) of an index (1 by default):
|
|
||||||
#
|
|
||||||
#index.number_of_replicas: 1
|
|
||||||
|
|
||||||
# Note, that for development on a local machine, with small indices, it usually
|
|
||||||
# makes sense to "disable" the distributed features:
|
|
||||||
#
|
|
||||||
#index.number_of_shards: 1
|
|
||||||
#index.number_of_replicas: 0
|
|
||||||
|
|
||||||
# These settings directly affect the performance of index and search operations
|
|
||||||
# in your cluster. Assuming you have enough machines to hold shards and
|
|
||||||
# replicas, the rule of thumb is:
|
|
||||||
#
|
|
||||||
# 1. Having more *shards* enhances the _indexing_ performance and allows to
|
|
||||||
# _distribute_ a big index across machines.
|
|
||||||
# 2. Having more *replicas* enhances the _search_ performance and improves the
|
|
||||||
# cluster _availability_.
|
|
||||||
#
|
|
||||||
# The "number_of_shards" is a one-time setting for an index.
|
|
||||||
#
|
|
||||||
# The "number_of_replicas" can be increased or decreased anytime,
|
|
||||||
# by using the Index Update Settings API.
|
|
||||||
#
|
|
||||||
# Elasticsearch takes care about load balancing, relocating, gathering the
|
|
||||||
# results from nodes, etc. Experiment with different settings to fine-tune
|
|
||||||
# your setup.
|
|
||||||
|
|
||||||
# Use the Index Status API (<http://localhost:9200/A/_status>) to inspect
|
|
||||||
# the index status.
|
|
||||||
|
|
||||||
|
|
||||||
#################################### Paths ####################################
|
|
||||||
|
|
||||||
# Path to directory containing configuration (this file and logging.yml):
|
|
||||||
#
|
|
||||||
#path.conf: /path/to/conf
|
|
||||||
|
|
||||||
# Path to directory where to store index data allocated for this node.
|
|
||||||
#
|
#
|
||||||
# path.data: /path/to/data
|
# path.data: /path/to/data
|
||||||
#
|
#
|
||||||
# Can optionally include more than one location, causing data to be striped across
|
|
||||||
# the locations (a la RAID 0) on a file level, favouring locations with most free
|
|
||||||
# space on creation. For example:
|
|
||||||
#
|
|
||||||
#path.data: /path/to/data1,/path/to/data2
|
|
||||||
|
|
||||||
# Path to temporary files:
|
|
||||||
#
|
|
||||||
#path.work: /path/to/work
|
|
||||||
|
|
||||||
# Path to log files:
|
# Path to log files:
|
||||||
#
|
#
|
||||||
# path.logs: /path/to/logs
|
# path.logs: /path/to/logs
|
||||||
|
|
||||||
# Path to where plugins are installed:
|
|
||||||
#
|
#
|
||||||
#path.plugins: /path/to/plugins
|
# ----------------------------------- Memory -----------------------------------
|
||||||
|
|
||||||
|
|
||||||
#################################### Plugin ###################################
|
|
||||||
|
|
||||||
# If a plugin listed here is not installed for current node, the node will not start.
|
|
||||||
#
|
#
|
||||||
#plugin.mandatory: mapper-attachments,lang-groovy
|
# Lock the memory on startup:
|
||||||
|
|
||||||
|
|
||||||
################################### Memory ####################################
|
|
||||||
|
|
||||||
# Elasticsearch performs poorly when JVM starts swapping: you should ensure that
|
|
||||||
# it _never_ swaps.
|
|
||||||
#
|
|
||||||
# Set this property to true to lock the memory:
|
|
||||||
#
|
#
|
||||||
# bootstrap.mlockall: true
|
# bootstrap.mlockall: true
|
||||||
|
|
||||||
# Make sure that the ES_MIN_MEM and ES_MAX_MEM environment variables are set
|
|
||||||
# to the same value, and that the machine has enough memory to allocate
|
|
||||||
# for Elasticsearch, leaving enough memory for the operating system itself.
|
|
||||||
#
|
#
|
||||||
# You should also make sure that the Elasticsearch process is allowed to lock
|
# Make sure that the `ES_HEAP_SIZE` environment variable is set to about half the memory
|
||||||
# the memory, eg. by using `ulimit -l unlimited`.
|
# available on the system and that the owner of the process is allowed to use this limit.
|
||||||
|
|
||||||
|
|
||||||
############################## Network And HTTP ###############################
|
|
||||||
|
|
||||||
# Elasticsearch, by default, binds itself to the 0.0.0.0 address, and listens
|
|
||||||
# on port [9200-9300] for HTTP traffic and on port [9300-9400] for node-to-node
|
|
||||||
# communication. (the range means that if the port is busy, it will automatically
|
|
||||||
# try the next port).
|
|
||||||
|
|
||||||
# Set the bind address specifically (IPv4 or IPv6):
|
|
||||||
#
|
#
|
||||||
#network.bind_host: 192.168.0.1
|
# Elasticsearch performs poorly when the system is swapping the memory.
|
||||||
|
|
||||||
# Set the address other nodes will use to communicate with this node. If not
|
|
||||||
# set, it is automatically derived. It must point to an actual IP address.
|
|
||||||
#
|
#
|
||||||
#network.publish_host: 192.168.0.1
|
# ---------------------------------- Network -----------------------------------
|
||||||
|
#
|
||||||
# Set both 'bind_host' and 'publish_host':
|
# Set the bind adress to a specific IP (IPv4 or IPv6):
|
||||||
#
|
#
|
||||||
# network.host: 192.168.0.1
|
# network.host: 192.168.0.1
|
||||||
|
|
||||||
# Set a custom port for the node to node communication (9300 by default):
|
|
||||||
#
|
#
|
||||||
#transport.tcp.port: 9300
|
# Set a custom port for HTTP:
|
||||||
|
|
||||||
# Enable compression for all communication between nodes (disabled by default):
|
|
||||||
#
|
|
||||||
#transport.tcp.compress: true
|
|
||||||
|
|
||||||
# Set a custom port to listen for HTTP traffic:
|
|
||||||
#
|
#
|
||||||
# http.port: 9200
|
# http.port: 9200
|
||||||
|
|
||||||
# Set a custom allowed content length:
|
|
||||||
#
|
#
|
||||||
#http.max_content_length: 100mb
|
# For more information, see the documentation at:
|
||||||
|
# <http://www.elastic.co/guide/en/elasticsearch/reference/current/modules-network.html>
|
||||||
# Disable HTTP completely:
|
|
||||||
#
|
#
|
||||||
#http.enabled: false
|
# ---------------------------------- Gateway -----------------------------------
|
||||||
|
|
||||||
|
|
||||||
################################### Gateway ###################################
|
|
||||||
|
|
||||||
# The gateway allows for persisting the cluster state between full cluster
|
|
||||||
# restarts. Every change to the state (such as adding an index) will be stored
|
|
||||||
# in the gateway, and when the cluster starts up for the first time,
|
|
||||||
# it will read its state from the gateway.
|
|
||||||
|
|
||||||
# For more information, see
|
|
||||||
# <http://elasticsearch.org/guide/en/elasticsearch/reference/current/modules-gateway.html>.
|
|
||||||
|
|
||||||
# Settings below control how and when to start the initial recovery process on
|
|
||||||
# a full cluster restart (to reuse as much local data as possible when using shared
|
|
||||||
# gateway).
|
|
||||||
|
|
||||||
# Allow recovery process after N nodes in a cluster are up:
|
|
||||||
#
|
#
|
||||||
#gateway.recover_after_nodes: 1
|
# Block initial recovery after a full cluster restart until N nodes are started:
|
||||||
|
|
||||||
# Set the timeout to initiate the recovery process, once the N nodes
|
|
||||||
# from previous setting are up (accepts time value):
|
|
||||||
#
|
#
|
||||||
#gateway.recover_after_time: 5m
|
# gateway.recover_after_nodes: 3
|
||||||
|
|
||||||
# Set how many nodes are expected in this cluster. Once these N nodes
|
|
||||||
# are up (and recover_after_nodes is met), begin recovery process immediately
|
|
||||||
# (without waiting for recover_after_time to expire):
|
|
||||||
#
|
#
|
||||||
#gateway.expected_nodes: 2
|
# For more information, see the documentation at:
|
||||||
|
# <http://www.elastic.co/guide/en/elasticsearch/reference/current/modules-gateway.html>
|
||||||
|
|
||||||
############################# Recovery Throttling #############################
|
|
||||||
|
|
||||||
# These settings allow to control the process of shards allocation between
|
|
||||||
# nodes during initial recovery, replica allocation, rebalancing,
|
|
||||||
# or when adding and removing nodes.
|
|
||||||
|
|
||||||
# Set the number of concurrent recoveries happening on a node:
|
|
||||||
#
|
#
|
||||||
# 1. During the initial recovery
|
# --------------------------------- Discovery ----------------------------------
|
||||||
#
|
#
|
||||||
#cluster.routing.allocation.node_initial_primaries_recoveries: 4
|
# Elasticsearch nodes will find each other via multicast, by default.
|
||||||
#
|
#
|
||||||
# 2. During adding/removing nodes, rebalancing, etc
|
# To use the unicast discovery, disable the multicast discovery:
|
||||||
#
|
|
||||||
#cluster.routing.allocation.node_concurrent_recoveries: 2
|
|
||||||
|
|
||||||
# Set to throttle throughput when recovering (eg. 100mb, by default 20mb):
|
|
||||||
#
|
|
||||||
#indices.recovery.max_bytes_per_sec: 20mb
|
|
||||||
|
|
||||||
# Set to limit the number of open concurrent streams when
|
|
||||||
# recovering a shard from a peer:
|
|
||||||
#
|
|
||||||
#indices.recovery.concurrent_streams: 5
|
|
||||||
|
|
||||||
|
|
||||||
################################## Discovery ##################################
|
|
||||||
|
|
||||||
# Discovery infrastructure ensures nodes can be found within a cluster
|
|
||||||
# and master node is elected. Multicast discovery is the default.
|
|
||||||
|
|
||||||
# Set to ensure a node sees N other master eligible nodes to be considered
|
|
||||||
# operational within the cluster. This should be set to a quorum/majority of
|
|
||||||
# the master-eligible nodes in the cluster.
|
|
||||||
#
|
|
||||||
#discovery.zen.minimum_master_nodes: 1
|
|
||||||
|
|
||||||
# Set the time to wait for ping responses from other nodes when discovering.
|
|
||||||
# Set this option to a higher value on a slow or congested network
|
|
||||||
# to minimize discovery failures:
|
|
||||||
#
|
|
||||||
#discovery.zen.ping.timeout: 3s
|
|
||||||
|
|
||||||
# For more information, see
|
|
||||||
# <http://elasticsearch.org/guide/en/elasticsearch/reference/current/modules-discovery-zen.html>
|
|
||||||
|
|
||||||
# Unicast discovery allows to explicitly control which nodes will be used
|
|
||||||
# to discover the cluster. It can be used when multicast is not present,
|
|
||||||
# or to restrict the cluster communication-wise.
|
|
||||||
#
|
|
||||||
# 1. Disable multicast discovery (enabled by default):
|
|
||||||
#
|
#
|
||||||
# discovery.zen.ping.multicast.enabled: false
|
# discovery.zen.ping.multicast.enabled: false
|
||||||
#
|
#
|
||||||
# 2. Configure an initial list of master nodes in the cluster
|
# Pass an initial list of hosts to perform discovery when new node is started:
|
||||||
# to perform discovery when new nodes (master or data) are started:
|
|
||||||
#
|
#
|
||||||
#discovery.zen.ping.unicast.hosts: ["host1", "host2:port"]
|
# discovery.zen.ping.unicast.hosts: ["host1", "host2"]
|
||||||
|
|
||||||
# EC2 discovery allows to use AWS EC2 API in order to perform discovery.
|
|
||||||
#
|
#
|
||||||
# You have to install the cloud-aws plugin for enabling the EC2 discovery.
|
# Prevent the "split brain" by configuring the majority of nodes (total number of nodes / 2 + 1):
|
||||||
#
|
#
|
||||||
# For more information, see
|
# discovery.zen.minimum_master_nodes: 3
|
||||||
# <http://elasticsearch.org/guide/en/elasticsearch/reference/current/modules-discovery-ec2.html>
|
|
||||||
#
|
#
|
||||||
# See <http://elasticsearch.org/tutorials/elasticsearch-on-ec2/>
|
# For more information, see the documentation at:
|
||||||
# for a step-by-step tutorial.
|
# <http://www.elastic.co/guide/en/elasticsearch/reference/current/modules-discovery.html>
|
||||||
|
|
||||||
# GCE discovery allows to use Google Compute Engine API in order to perform discovery.
|
|
||||||
#
|
#
|
||||||
# You have to install the cloud-gce plugin for enabling the GCE discovery.
|
# ---------------------------------- Various -----------------------------------
|
||||||
#
|
#
|
||||||
# For more information, see <https://github.com/elasticsearch/elasticsearch-cloud-gce>.
|
# Disable starting multiple nodes on a single system:
|
||||||
|
|
||||||
# Azure discovery allows to use Azure API in order to perform discovery.
|
|
||||||
#
|
#
|
||||||
# You have to install the cloud-azure plugin for enabling the Azure discovery.
|
# node.max_local_storage_nodes: 1
|
||||||
#
|
#
|
||||||
# For more information, see <https://github.com/elasticsearch/elasticsearch-cloud-azure>.
|
# Require explicit names when deleting indices:
|
||||||
|
#
|
||||||
################################## Slow Log ##################################
|
# action.destructive_requires_name: true
|
||||||
|
|
||||||
# Shard level query and fetch threshold logging.
|
|
||||||
|
|
||||||
#index.search.slowlog.threshold.query.warn: 10s
|
|
||||||
#index.search.slowlog.threshold.query.info: 5s
|
|
||||||
#index.search.slowlog.threshold.query.debug: 2s
|
|
||||||
#index.search.slowlog.threshold.query.trace: 500ms
|
|
||||||
|
|
||||||
#index.search.slowlog.threshold.fetch.warn: 1s
|
|
||||||
#index.search.slowlog.threshold.fetch.info: 800ms
|
|
||||||
#index.search.slowlog.threshold.fetch.debug: 500ms
|
|
||||||
#index.search.slowlog.threshold.fetch.trace: 200ms
|
|
||||||
|
|
||||||
#index.indexing.slowlog.threshold.index.warn: 10s
|
|
||||||
#index.indexing.slowlog.threshold.index.info: 5s
|
|
||||||
#index.indexing.slowlog.threshold.index.debug: 2s
|
|
||||||
#index.indexing.slowlog.threshold.index.trace: 500ms
|
|
||||||
|
|
||||||
################################## GC Logging ################################
|
|
||||||
|
|
||||||
#monitor.jvm.gc.young.warn: 1000ms
|
|
||||||
#monitor.jvm.gc.young.info: 700ms
|
|
||||||
#monitor.jvm.gc.young.debug: 400ms
|
|
||||||
|
|
||||||
#monitor.jvm.gc.old.warn: 10s
|
|
||||||
#monitor.jvm.gc.old.info: 5s
|
|
||||||
#monitor.jvm.gc.old.debug: 2s
|
|
||||||
|
|
|
@ -57,7 +57,9 @@ Once it's done it will print all the remaining steps.
|
||||||
- Python 3k for script execution
|
- Python 3k for script execution
|
||||||
- Boto for S3 Upload ($ apt-get install python-boto)
|
- Boto for S3 Upload ($ apt-get install python-boto)
|
||||||
- RPM for RPM building ($ apt-get install rpm)
|
- RPM for RPM building ($ apt-get install rpm)
|
||||||
- S3 keys exported via ENV Variables (AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
|
- S3 keys exported via ENV variables (AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
|
||||||
|
- GPG data exported via ENV variables (GPG_KEY_ID, GPG_PASSPHRASE, optionally GPG_KEYRING)
|
||||||
|
- S3 target repository via ENV variables (S3_BUCKET_SYNC_TO, optionally S3_BUCKET_SYNC_FROM)
|
||||||
"""
|
"""
|
||||||
env = os.environ
|
env = os.environ
|
||||||
|
|
||||||
|
@ -246,10 +248,13 @@ def build_release(run_tests=False, dry_run=True, cpus=1, bwc_version=None):
|
||||||
print('Running Backwards compatibility tests against version [%s]' % (bwc_version))
|
print('Running Backwards compatibility tests against version [%s]' % (bwc_version))
|
||||||
run_mvn('clean', 'test -Dtests.filter=@backwards -Dtests.bwc.version=%s -Dtests.bwc=true -Dtests.jvms=1' % bwc_version)
|
run_mvn('clean', 'test -Dtests.filter=@backwards -Dtests.bwc.version=%s -Dtests.bwc=true -Dtests.jvms=1' % bwc_version)
|
||||||
run_mvn('clean test-compile -Dforbidden.test.signatures="org.apache.lucene.util.LuceneTestCase\$AwaitsFix @ Please fix all bugs before release"')
|
run_mvn('clean test-compile -Dforbidden.test.signatures="org.apache.lucene.util.LuceneTestCase\$AwaitsFix @ Please fix all bugs before release"')
|
||||||
run_mvn('clean %s -DskipTests' % (target))
|
gpg_args = '-Dgpg.key="%s" -Dgpg.passphrase="%s" -Ddeb.sign=true' % (env.get('GPG_KEY_ID'), env.get('GPG_PASSPHRASE'))
|
||||||
|
if env.get('GPG_KEYRING'):
|
||||||
|
gpg_args += ' -Dgpg.keyring="%s"' % env.get('GPG_KEYRING')
|
||||||
|
run_mvn('clean %s -DskipTests %s' % (target, gpg_args))
|
||||||
success = False
|
success = False
|
||||||
try:
|
try:
|
||||||
run_mvn('-DskipTests rpm:rpm')
|
run_mvn('-DskipTests rpm:rpm %s' % (gpg_args))
|
||||||
success = True
|
success = True
|
||||||
finally:
|
finally:
|
||||||
if not success:
|
if not success:
|
||||||
|
@ -502,6 +507,14 @@ def publish_artifacts(artifacts, base='elasticsearch/elasticsearch', dry_run=Tru
|
||||||
# requires boto to be installed but it is not available on python3k yet so we use a dedicated tool
|
# requires boto to be installed but it is not available on python3k yet so we use a dedicated tool
|
||||||
run('python %s/upload-s3.py --file %s ' % (location, os.path.abspath(artifact)))
|
run('python %s/upload-s3.py --file %s ' % (location, os.path.abspath(artifact)))
|
||||||
|
|
||||||
|
def publish_repositories(version, dry_run=True):
|
||||||
|
if dry_run:
|
||||||
|
print('Skipping package repository update')
|
||||||
|
else:
|
||||||
|
print('Triggering repository update - calling dev-tools/build_repositories.sh %s' % version)
|
||||||
|
# src_branch is a version like 1.5/1.6/2.0/etc.. so we can use this
|
||||||
|
run('dev-tools/build_repositories.sh %s' % src_branch)
|
||||||
|
|
||||||
def print_sonatype_notice():
|
def print_sonatype_notice():
|
||||||
settings = os.path.join(os.path.expanduser('~'), '.m2/settings.xml')
|
settings = os.path.join(os.path.expanduser('~'), '.m2/settings.xml')
|
||||||
if os.path.isfile(settings):
|
if os.path.isfile(settings):
|
||||||
|
@ -536,6 +549,16 @@ def check_s3_credentials():
|
||||||
if not env.get('AWS_ACCESS_KEY_ID', None) or not env.get('AWS_SECRET_ACCESS_KEY', None):
|
if not env.get('AWS_ACCESS_KEY_ID', None) or not env.get('AWS_SECRET_ACCESS_KEY', None):
|
||||||
raise RuntimeError('Could not find "AWS_ACCESS_KEY_ID" / "AWS_SECRET_ACCESS_KEY" in the env variables please export in order to upload to S3')
|
raise RuntimeError('Could not find "AWS_ACCESS_KEY_ID" / "AWS_SECRET_ACCESS_KEY" in the env variables please export in order to upload to S3')
|
||||||
|
|
||||||
|
def check_gpg_credentials():
|
||||||
|
if not env.get('GPG_KEY_ID', None) or not env.get('GPG_PASSPHRASE', None):
|
||||||
|
raise RuntimeError('Could not find "GPG_KEY_ID" / "GPG_PASSPHRASE" in the env variables please export in order to sign the packages (also make sure that GPG_KEYRING is set when not in ~/.gnupg)')
|
||||||
|
|
||||||
|
def check_command_exists(name, cmd):
|
||||||
|
try:
|
||||||
|
subprocess.check_output(cmd, shell=True, stderr=subprocess.STDOUT)
|
||||||
|
except subprocess.CalledProcessError:
|
||||||
|
raise RuntimeError('Could not run command %s - please make sure it is installed' % (name))
|
||||||
|
|
||||||
VERSION_FILE = 'src/main/java/org/elasticsearch/Version.java'
|
VERSION_FILE = 'src/main/java/org/elasticsearch/Version.java'
|
||||||
POM_FILE = 'pom.xml'
|
POM_FILE = 'pom.xml'
|
||||||
|
|
||||||
|
@ -629,8 +652,15 @@ if __name__ == '__main__':
|
||||||
if os.path.exists(LOG):
|
if os.path.exists(LOG):
|
||||||
raise RuntimeError('please remove old release log %s first' % LOG)
|
raise RuntimeError('please remove old release log %s first' % LOG)
|
||||||
|
|
||||||
|
check_gpg_credentials()
|
||||||
|
check_command_exists('gpg', 'gpg --version')
|
||||||
|
check_command_exists('expect', 'expect -v')
|
||||||
|
|
||||||
if not dry_run:
|
if not dry_run:
|
||||||
check_s3_credentials()
|
check_s3_credentials()
|
||||||
|
check_command_exists('createrepo', 'createrepo --version')
|
||||||
|
check_command_exists('s3cmd', 's3cmd --version')
|
||||||
|
check_command_exists('apt-ftparchive', 'apt-ftparchive --version')
|
||||||
print('WARNING: dryrun is set to "false" - this will push and publish the release')
|
print('WARNING: dryrun is set to "false" - this will push and publish the release')
|
||||||
input('Press Enter to continue...')
|
input('Press Enter to continue...')
|
||||||
|
|
||||||
|
@ -687,6 +717,8 @@ if __name__ == '__main__':
|
||||||
merge_tag_push(remote, src_branch, release_version, dry_run)
|
merge_tag_push(remote, src_branch, release_version, dry_run)
|
||||||
print(' publish artifacts to S3 -- dry_run: %s' % dry_run)
|
print(' publish artifacts to S3 -- dry_run: %s' % dry_run)
|
||||||
publish_artifacts(artifacts_and_checksum, dry_run=dry_run)
|
publish_artifacts(artifacts_and_checksum, dry_run=dry_run)
|
||||||
|
print(' Updating package repositories -- dry_run: %s' % dry_run)
|
||||||
|
publish_repositories(src_branch, dry_run=dry_run)
|
||||||
cherry_pick_command = '.'
|
cherry_pick_command = '.'
|
||||||
if version_head_hash:
|
if version_head_hash:
|
||||||
cherry_pick_command = ' and cherry-pick the documentation changes: \'git cherry-pick %s\' to the development branch' % (version_head_hash)
|
cherry_pick_command = ' and cherry-pick the documentation changes: \'git cherry-pick %s\' to the development branch' % (version_head_hash)
|
||||||
|
|
|
@ -0,0 +1,247 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Licensed to Elasticsearch under one or more contributor
|
||||||
|
# license agreements. See the NOTICE file distributed with
|
||||||
|
# this work for additional information regarding copyright
|
||||||
|
# ownership. Elasticsearch licenses this file to you under
|
||||||
|
# the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
# not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing,
|
||||||
|
# software distributed under the License is distributed on
|
||||||
|
# an 'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||||
|
# either express or implied. See the License for the specific
|
||||||
|
# language governing permissions and limitations under the License.
|
||||||
|
|
||||||
|
|
||||||
|
# This tool uploads the debian and RPM packages to the specified S3 buckets
|
||||||
|
# The packages get signed as well
|
||||||
|
# A requirement is the sync of the existing repository
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
###################
|
||||||
|
## environment variables
|
||||||
|
##
|
||||||
|
## required
|
||||||
|
##
|
||||||
|
## GPG_PASSPHRASE: Passphrase of your GPG key
|
||||||
|
## GPG_KEY_ID: Key id of your GPG key
|
||||||
|
## AWS_ACCESS_KEY_ID: AWS access key id
|
||||||
|
## AWS_SECRET_ACCESS_KEY: AWS secret access key
|
||||||
|
## S3_BUCKET_SYNC_TO Bucket to write packages to, should be set packages.elasticsearch.org for a regular release
|
||||||
|
##
|
||||||
|
##
|
||||||
|
## optional
|
||||||
|
##
|
||||||
|
## S3_BUCKET_SYNC_FROM Bucket to read packages from, defaults to packages.elasticsearch.org
|
||||||
|
## KEEP_DIRECTORIES Allows to keep all the generated directory structures for debugging
|
||||||
|
## GPG_KEYRING Configure GPG keyring home, defaults to ~/.gnupg/
|
||||||
|
##
|
||||||
|
###################
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
###################
|
||||||
|
## configuration
|
||||||
|
###################
|
||||||
|
|
||||||
|
# No trailing slashes!
|
||||||
|
if [ -z $S3_BUCKET_SYNC_FROM ] ; then
|
||||||
|
S3_BUCKET_SYNC_FROM="packages.elasticsearch.org"
|
||||||
|
fi
|
||||||
|
if [ ! -z $GPG_KEYRING ] ; then
|
||||||
|
GPG_HOMEDIR="--homedir ${GPG_KEYRING}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
###################
|
||||||
|
## parameters
|
||||||
|
###################
|
||||||
|
|
||||||
|
# Must be major and minor version, i.e. 1.5 instead of 1.5.0
|
||||||
|
version=$1
|
||||||
|
|
||||||
|
###################
|
||||||
|
## prerequisites
|
||||||
|
###################
|
||||||
|
|
||||||
|
if [ "$#" != "1" ] || [ "x$1" == "x-h" ] || [ "x$1" == "x--help" ] ; then
|
||||||
|
echo "Usage: $0 version"
|
||||||
|
echo
|
||||||
|
echo " version: The elasticsearch major and minor version, i.e. 1.5"
|
||||||
|
exit
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Checking for correct environment"
|
||||||
|
|
||||||
|
error=""
|
||||||
|
|
||||||
|
if [ -z "$GPG_PASSPHRASE" ] ; then
|
||||||
|
echo "Environment variable GPG_PASSPHRASE is not set"
|
||||||
|
error="true"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -z "$S3_BUCKET_SYNC_TO" ] ; then
|
||||||
|
echo "Environment variable S3_BUCKET_SYNC_TO is not set"
|
||||||
|
error="true"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -z "$GPG_KEY_ID" ] ; then
|
||||||
|
echo "Environment variable GPG_KEY_ID is not set"
|
||||||
|
error="true"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -z "$AWS_ACCESS_KEY_ID" ] ; then
|
||||||
|
echo "Environment variable AWS_ACCESS_KEY_ID is not set"
|
||||||
|
error="true"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -z "$AWS_SECRET_ACCESS_KEY" ] ; then
|
||||||
|
echo "Environment variable AWS_SECRET_ACCESS_KEY is not set"
|
||||||
|
error="true"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "x$error" == "xtrue" ] ; then
|
||||||
|
echo "Please set all of the above environment variables first. Exiting..."
|
||||||
|
exit
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Checking for available command line tools:"
|
||||||
|
|
||||||
|
check_for_command() {
|
||||||
|
echo -n " $1"
|
||||||
|
if [ -z "`which $1`" ]; then
|
||||||
|
echo "NO"
|
||||||
|
error="true"
|
||||||
|
else
|
||||||
|
echo "ok"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
error=""
|
||||||
|
check_for_command "createrepo"
|
||||||
|
check_for_command "s3cmd"
|
||||||
|
check_for_command "apt-ftparchive"
|
||||||
|
check_for_command "gpg"
|
||||||
|
check_for_command "expect" # needed for the RPM plugin
|
||||||
|
|
||||||
|
if [ "x$error" == "xtrue" ] ; then
|
||||||
|
echo "Please install all of the above tools first. Exiting..."
|
||||||
|
exit
|
||||||
|
fi
|
||||||
|
|
||||||
|
###################
|
||||||
|
## setup
|
||||||
|
###################
|
||||||
|
tempdir=`mktemp -d /tmp/elasticsearch-repo.XXXX`
|
||||||
|
mkdir -p $tempdir
|
||||||
|
|
||||||
|
# create custom s3cmd conf, in case s3cmd does not support --aws-secret-key like on ubuntu
|
||||||
|
( cat <<EOF
|
||||||
|
[default]
|
||||||
|
access_key = $AWS_ACCESS_KEY_ID
|
||||||
|
secret_key = $AWS_SECRET_ACCESS_KEY
|
||||||
|
EOF
|
||||||
|
) > $tempdir/.s3cmd
|
||||||
|
s3cmd="s3cmd -c $tempdir/.s3cmd"
|
||||||
|
|
||||||
|
###################
|
||||||
|
## RPM
|
||||||
|
###################
|
||||||
|
|
||||||
|
centosdir=$tempdir/repository/elasticsearch/$version/centos
|
||||||
|
mkdir -p $centosdir
|
||||||
|
|
||||||
|
echo "RPM: Syncing repository for version $version into $centosdir"
|
||||||
|
$s3cmd sync s3://$S3_BUCKET_SYNC_FROM/elasticsearch/$version/centos/ $centosdir
|
||||||
|
|
||||||
|
rpm=target/rpm/elasticsearch/RPMS/noarch/elasticsearch*.rpm
|
||||||
|
echo "RPM: Copying $rpm into $centosdor"
|
||||||
|
cp $rpm $centosdir
|
||||||
|
|
||||||
|
echo "RPM: Running createrepo in $centosdir"
|
||||||
|
createrepo --update $centosdir
|
||||||
|
|
||||||
|
echo "RPM: Resigning repomd.xml"
|
||||||
|
rm -f $centosdir/repodata/repomd.xml.asc
|
||||||
|
gpg $GPG_HOMEDIR --passphrase "$GPG_PASSPHRASE" -a -b -o $centosdir/repodata/repomd.xml.asc $centosdir/repodata/repomd.xml
|
||||||
|
|
||||||
|
echo "RPM: Syncing back repository for $version into S3 bucket $S3_BUCKET_SYNC_TO"
|
||||||
|
$s3cmd sync -P $centosdir/ s3://$S3_BUCKET_SYNC_TO/elasticsearch/$version/centos/
|
||||||
|
|
||||||
|
###################
|
||||||
|
## DEB
|
||||||
|
###################
|
||||||
|
|
||||||
|
deb=target/releases/elasticsearch*.deb
|
||||||
|
|
||||||
|
echo "DEB: Creating repository directory structure"
|
||||||
|
|
||||||
|
if [ -z $tempdir ] ; then
|
||||||
|
echo "DEB: Could not create tempdir directory name, exiting"
|
||||||
|
exit
|
||||||
|
fi
|
||||||
|
|
||||||
|
debbasedir=$tempdir/repository/elasticsearch/$version/debian
|
||||||
|
mkdir -p $debbasedir
|
||||||
|
|
||||||
|
|
||||||
|
echo "DEB: Syncing debian repository of version $version to $debbasedir"
|
||||||
|
# sync all former versions into directory
|
||||||
|
$s3cmd sync s3://$S3_BUCKET_SYNC_FROM/elasticsearch/$version/debian/ $debbasedir
|
||||||
|
|
||||||
|
# create directories in case of a new release so that syncing did not create this structure
|
||||||
|
mkdir -p $debbasedir/dists/stable/main/binary-all
|
||||||
|
mkdir -p $debbasedir/dists/stable/main/binary-i386
|
||||||
|
mkdir -p $debbasedir/dists/stable/main/binary-amd64
|
||||||
|
mkdir -p $debbasedir/.cache
|
||||||
|
mkdir -p $debbasedir/pool/main
|
||||||
|
|
||||||
|
# create elasticsearch-1.4.conf
|
||||||
|
( cat <<EOF
|
||||||
|
APT::FTPArchive::Release::Origin "Elasticsearch";
|
||||||
|
APT::FTPArchive::Release::Label "Elasticsearch ${version}.x";
|
||||||
|
APT::FTPArchive::Release::Suite "stable";
|
||||||
|
APT::FTPArchive::Release::Codename "stable";
|
||||||
|
APT::FTPArchive::Release::Architectures "i386 amd64";
|
||||||
|
APT::FTPArchive::Release::Components "main";
|
||||||
|
APT::FTPArchive::Release::Description "Elasticsearch repo for all ${version}.x packages";
|
||||||
|
EOF
|
||||||
|
) > $tempdir/elasticsearch-$version-releases.conf
|
||||||
|
|
||||||
|
# create packages file using apt-ftparchive
|
||||||
|
mkdir -p $debbasedir/dists/stable/main/binary-all
|
||||||
|
mkdir -p $debbasedir/pool/main/e/elasticsearch
|
||||||
|
|
||||||
|
echo "DEB: Copying $deb to elasticsearch repo directory"
|
||||||
|
cp $deb $debbasedir/pool/main/e/elasticsearch
|
||||||
|
|
||||||
|
echo "DEB: Creating new Packages and Release files"
|
||||||
|
cd $debbasedir
|
||||||
|
apt-ftparchive packages pool > dists/stable/main/binary-all/Packages
|
||||||
|
cat dists/stable/main/binary-all/Packages | gzip -9 > dists/stable/main/binary-all/Packages.gz
|
||||||
|
cp dists/stable/main/binary-all/Packages* dists/stable/main/binary-i386/
|
||||||
|
cp dists/stable/main/binary-all/Packages* dists/stable/main/binary-amd64/
|
||||||
|
apt-ftparchive -c $tempdir/elasticsearch-$version-releases.conf release $debbasedir/dists/stable/ > $debbasedir/dists/stable/Release
|
||||||
|
|
||||||
|
echo "DEB: Signing newly created release file at $debbasedir/dists/stable/Release.gpg"
|
||||||
|
rm -f $debbasedir/dists/stable/Release.gpg
|
||||||
|
gpg $GPG_HOMEDIR --passphrase "$GPG_PASSPHRASE" -a -b -o $debbasedir/dists/stable/Release.gpg $debbasedir/dists/stable/Release
|
||||||
|
|
||||||
|
# upload to S3
|
||||||
|
echo "DEB: Uploading to S3 bucket to $S3_BUCKET_SYNC_TO"
|
||||||
|
$s3cmd sync -P $debbasedir/ s3://$S3_BUCKET_SYNC_TO/elasticsearch/$version/debian/
|
||||||
|
|
||||||
|
# back to original dir
|
||||||
|
cd -
|
||||||
|
|
||||||
|
# delete directories unless configured otherwise
|
||||||
|
if [ -z $KEEP_DIRECTORIES ] ; then
|
||||||
|
echo "Done! Deleting repository directories at $tempdir"
|
||||||
|
rm -fr $tempdir
|
||||||
|
else
|
||||||
|
echo "Done! Keeping repository directories at $tempdir"
|
||||||
|
fi
|
|
@ -39,9 +39,6 @@ org.apache.lucene.index.IndexReader#decRef()
|
||||||
org.apache.lucene.index.IndexReader#incRef()
|
org.apache.lucene.index.IndexReader#incRef()
|
||||||
org.apache.lucene.index.IndexReader#tryIncRef()
|
org.apache.lucene.index.IndexReader#tryIncRef()
|
||||||
|
|
||||||
@defaultMessage QueryWrapperFilter is cacheable by default - use Queries#wrap instead
|
|
||||||
org.apache.lucene.search.QueryWrapperFilter#<init>(org.apache.lucene.search.Query)
|
|
||||||
|
|
||||||
@defaultMessage Pass the precision step from the mappings explicitly instead
|
@defaultMessage Pass the precision step from the mappings explicitly instead
|
||||||
org.apache.lucene.search.NumericRangeQuery#newDoubleRange(java.lang.String,java.lang.Double,java.lang.Double,boolean,boolean)
|
org.apache.lucene.search.NumericRangeQuery#newDoubleRange(java.lang.String,java.lang.Double,java.lang.Double,boolean,boolean)
|
||||||
org.apache.lucene.search.NumericRangeQuery#newFloatRange(java.lang.String,java.lang.Float,java.lang.Float,boolean,boolean)
|
org.apache.lucene.search.NumericRangeQuery#newFloatRange(java.lang.String,java.lang.Float,java.lang.Float,boolean,boolean)
|
||||||
|
|
|
@ -82,7 +82,7 @@ See the {client}/php-api/current/index.html[official Elasticsearch PHP client].
|
||||||
|
|
||||||
* https://github.com/searchbox-io/Jest[Jest]:
|
* https://github.com/searchbox-io/Jest[Jest]:
|
||||||
Java Rest client.
|
Java Rest client.
|
||||||
* There is of course the http://www.elasticsearch.org/guide/en/elasticsearch/client/java-api/current/index.html[native ES Java client]
|
* There is of course the {client}/java-api/current/index.html[native ES Java client]
|
||||||
|
|
||||||
[[community-javascript]]
|
[[community-javascript]]
|
||||||
=== JavaScript
|
=== JavaScript
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
= Community Supported Clients
|
= Community Supported Clients
|
||||||
|
|
||||||
:client: http://www.elasticsearch.org/guide/en/elasticsearch/client
|
:client: http://www.elastic.co/guide/en/elasticsearch/client
|
||||||
|
|
||||||
|
|
||||||
include::clients.asciidoc[]
|
include::clients.asciidoc[]
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
= Groovy API
|
= Groovy API
|
||||||
:ref: http://www.elasticsearch.org/guide/en/elasticsearch/reference/current
|
:ref: http://www.elastic.co/guide/en/elasticsearch/reference/current
|
||||||
:java: http://www.elasticsearch.org/guide/en/elasticsearch/client/java-api/current
|
:java: http://www.elastic.co/guide/en/elasticsearch/client/java-api/current
|
||||||
|
|
||||||
[preface]
|
[preface]
|
||||||
== Preface
|
== Preface
|
||||||
|
|
|
@ -31,7 +31,7 @@ import org.elasticsearch.search.aggregations.metrics.stats.extended.ExtendedStat
|
||||||
[source,java]
|
[source,java]
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
// sr is here your SearchResponse object
|
// sr is here your SearchResponse object
|
||||||
Stats agg = sr.getAggregations().get("agg");
|
ExtendedStats agg = sr.getAggregations().get("agg");
|
||||||
double min = agg.getMin();
|
double min = agg.getMin();
|
||||||
double max = agg.getMax();
|
double max = agg.getMax();
|
||||||
double avg = agg.getAvg();
|
double avg = agg.getAvg();
|
||||||
|
|
|
@ -99,3 +99,22 @@ By default, `BulkProcessor`:
|
||||||
* does not set flushInterval
|
* does not set flushInterval
|
||||||
* sets concurrentRequests to 1
|
* sets concurrentRequests to 1
|
||||||
|
|
||||||
|
When all documents are loaded to the `BulkProcessor` it can be closed by using `awaitClose` or `close` methods:
|
||||||
|
|
||||||
|
[source,java]
|
||||||
|
--------------------------------------------------
|
||||||
|
bulkProcessor.awaitClose(10, TimeUnit.MINUTES);
|
||||||
|
--------------------------------------------------
|
||||||
|
|
||||||
|
or
|
||||||
|
|
||||||
|
[source,java]
|
||||||
|
--------------------------------------------------
|
||||||
|
bulkProcessor.close();
|
||||||
|
--------------------------------------------------
|
||||||
|
|
||||||
|
Both methods flush any remaining documents and disable all other scheduled flushes if they were scheduled by setting
|
||||||
|
`flushInterval`. If concurrent requests were enabled the `awaitClose` method waits for up to the specified timeout for
|
||||||
|
all bulk requests to complete then returns `true`, if the specified waiting time elapses before all bulk requests complete,
|
||||||
|
`false` is returned. The `close` method doesn't wait for any remaining bulk requests to complete and exists immediately.
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[[java-api]]
|
[[java-api]]
|
||||||
= Java API
|
= Java API
|
||||||
:ref: http://www.elasticsearch.org/guide/en/elasticsearch/reference/current
|
:ref: http://www.elastic.co/guide/en/elasticsearch/reference/current
|
||||||
|
|
||||||
[preface]
|
[preface]
|
||||||
== Preface
|
== Preface
|
||||||
|
|
|
@ -234,7 +234,7 @@ QueryBuilder qb = matchAllQuery();
|
||||||
|
|
||||||
|
|
||||||
[[mlt]]
|
[[mlt]]
|
||||||
=== More Like This (Field) Query (mlt and mlt_field)
|
=== More Like This Query (mlt)
|
||||||
|
|
||||||
See:
|
See:
|
||||||
* {ref}/query-dsl-mlt-query.html[More Like This Query]
|
* {ref}/query-dsl-mlt-query.html[More Like This Query]
|
||||||
|
|
|
@ -1,138 +0,0 @@
|
||||||
= elasticsearch-js
|
|
||||||
|
|
||||||
== Overview
|
|
||||||
|
|
||||||
Official low-level client for Elasticsearch. Its goal is to provide common
|
|
||||||
ground for all Elasticsearch-related code in JavaScript; because of this it tries
|
|
||||||
to be opinion-free and very extendable.
|
|
||||||
|
|
||||||
The full documentation is available at http://elasticsearch.github.io/elasticsearch-js
|
|
||||||
|
|
||||||
|
|
||||||
=== Getting the Node.js module
|
|
||||||
|
|
||||||
To install the module into an existing Node.js project use npm:
|
|
||||||
|
|
||||||
[source,sh]
|
|
||||||
------------------------------------
|
|
||||||
npm install elasticsearch
|
|
||||||
------------------------------------
|
|
||||||
|
|
||||||
=== Getting the browser client
|
|
||||||
|
|
||||||
For a browser-based projects, builds for modern browsers are available http://elasticsearch.github.io/elasticsearch-js#browser-builds[here]. Download one of the archives and extract it, inside you'll find three files, pick the one that best matches your environment:
|
|
||||||
|
|
||||||
* elasticsearch.jquery.js - for projects that already use jQuery
|
|
||||||
* elasticsearch.angular.js - for Angular projects
|
|
||||||
* elasticsearch.js - generic build for all other projects
|
|
||||||
|
|
||||||
Each of the library specific builds tie into the AJAX and Promise creation facilities provided by their respective libraries. This is an example of how Elasticsearch.js can be extended to provide a more opinionated approach when appropriate.
|
|
||||||
|
|
||||||
=== Setting up the client
|
|
||||||
|
|
||||||
Now you are ready to get busy! First thing you'll need to do is create an instance of `elasticsearch.Client`. Here are several examples of configuration parameters you can use when creating that instance. For a full list of configuration options see http://elasticsearch.github.io/elasticsearch-js/index.html#configuration[the configuration docs].
|
|
||||||
|
|
||||||
[source,javascript]
|
|
||||||
------------------------------------
|
|
||||||
var elasticsearch = require('elasticsearch');
|
|
||||||
|
|
||||||
// Connect to localhost:9200 and use the default settings
|
|
||||||
var client = new elasticsearch.Client();
|
|
||||||
|
|
||||||
// Connect the client to two nodes, requests will be
|
|
||||||
// load-balanced between them using round-robin
|
|
||||||
var client = elasticsearch.Client({
|
|
||||||
hosts: [
|
|
||||||
'elasticsearch1:9200',
|
|
||||||
'elasticsearch2:9200'
|
|
||||||
]
|
|
||||||
});
|
|
||||||
|
|
||||||
// Connect to the this host's cluster, sniff
|
|
||||||
// for the rest of the cluster right away, and
|
|
||||||
// again every 5 minutes
|
|
||||||
var client = elasticsearch.Client({
|
|
||||||
host: 'elasticsearch1:9200',
|
|
||||||
sniffOnStart: true,
|
|
||||||
sniffInterval: 300000
|
|
||||||
});
|
|
||||||
|
|
||||||
// Connect to this host using https, basic auth,
|
|
||||||
// a path prefix, and static query string values
|
|
||||||
var client = new elasticsearch.Client({
|
|
||||||
host: 'https://user:password@elasticsearch1/search?app=blog'
|
|
||||||
});
|
|
||||||
------------------------------------
|
|
||||||
|
|
||||||
|
|
||||||
=== Setting up the client in the browser
|
|
||||||
|
|
||||||
The params accepted by the `Client` constructor are the same in the browser versions of the client, but how you access the Client constructor is different based on the build you are using. Below is an example of instantiating a client in each build.
|
|
||||||
|
|
||||||
[source,javascript]
|
|
||||||
------------------------------------
|
|
||||||
// elasticsearch.js adds the elasticsearch namespace to the window
|
|
||||||
var client = elasticsearch.Client({ ... });
|
|
||||||
|
|
||||||
// elasticsearch.jquery.js adds the es namespace to the jQuery object
|
|
||||||
var client = jQuery.es.Client({ ... });
|
|
||||||
|
|
||||||
// elasticsearch.angular.js creates an elasticsearch
|
|
||||||
// module, which provides an esFactory
|
|
||||||
var app = angular.module('app', ['elasticsearch']);
|
|
||||||
app.service('es', function (esFactory) {
|
|
||||||
return esFactory({ ... });
|
|
||||||
});
|
|
||||||
------------------------------------
|
|
||||||
|
|
||||||
=== Using the client instance to make API calls.
|
|
||||||
|
|
||||||
Once you create the client, making API calls is simple.
|
|
||||||
|
|
||||||
[source,javascript]
|
|
||||||
------------------------------------
|
|
||||||
// get the current status of the entire cluster.
|
|
||||||
// Note: params are always optional, you can just send a callback
|
|
||||||
client.cluster.health(function (err, resp) {
|
|
||||||
if (err) {
|
|
||||||
console.error(err.message);
|
|
||||||
} else {
|
|
||||||
console.dir(resp);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
// index a document
|
|
||||||
client.index({
|
|
||||||
index: 'blog',
|
|
||||||
type: 'post',
|
|
||||||
id: 1,
|
|
||||||
body: {
|
|
||||||
title: 'JavaScript Everywhere!',
|
|
||||||
content: 'It all started when...',
|
|
||||||
date: '2013-12-17'
|
|
||||||
}
|
|
||||||
}, function (err, resp) {
|
|
||||||
// ...
|
|
||||||
});
|
|
||||||
|
|
||||||
// search for documents (and also promises!!)
|
|
||||||
client.search({
|
|
||||||
index: 'users',
|
|
||||||
size: 50,
|
|
||||||
body: {
|
|
||||||
query: {
|
|
||||||
match: {
|
|
||||||
profile: 'elasticsearch'
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}).then(function (resp) {
|
|
||||||
var hits = resp.body.hits;
|
|
||||||
});
|
|
||||||
------------------------------------
|
|
||||||
|
|
||||||
== Copyright and License
|
|
||||||
|
|
||||||
This software is Copyright (c) 2013-2015 by Elasticsearch BV.
|
|
||||||
|
|
||||||
This is free software, licensed under The Apache License Version 2.0.
|
|
|
@ -82,7 +82,7 @@ curl -XPUT localhost:9200/test/ -d '
|
||||||
"type" : "pattern_capture",
|
"type" : "pattern_capture",
|
||||||
"preserve_original" : 1,
|
"preserve_original" : 1,
|
||||||
"patterns" : [
|
"patterns" : [
|
||||||
"(\\w+)",
|
"([^@]+)",
|
||||||
"(\\p{L}+)",
|
"(\\p{L}+)",
|
||||||
"(\\d+)",
|
"(\\d+)",
|
||||||
"@(.+)"
|
"@(.+)"
|
||||||
|
@ -108,9 +108,10 @@ When the above analyzer is used on an email address like:
|
||||||
john-smith_123@foo-bar.com
|
john-smith_123@foo-bar.com
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
||||||
it would produce the following tokens: [ `john-smith_123`,
|
it would produce the following tokens:
|
||||||
`foo-bar.com`, `john`, `smith_123`, `smith`, `123`, `foo`,
|
|
||||||
`foo-bar.com`, `bar`, `com` ]
|
john-smith_123@foo-bar.com, john-smith_123,
|
||||||
|
john, smith, 123, foo-bar.com, foo, bar, com
|
||||||
|
|
||||||
Multiple patterns are required to allow overlapping captures, but also
|
Multiple patterns are required to allow overlapping captures, but also
|
||||||
means that patterns are less dense and easier to understand.
|
means that patterns are less dense and easier to understand.
|
||||||
|
|
|
@ -78,3 +78,9 @@ Advance settings include:
|
||||||
# see http://en.wikipedia.org/wiki/Zero-width_joiner
|
# see http://en.wikipedia.org/wiki/Zero-width_joiner
|
||||||
\\u200D => ALPHANUM
|
\\u200D => ALPHANUM
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
||||||
|
NOTE: Using a tokenizer like the `standard` tokenizer may interfere with
|
||||||
|
the `catenate_*` and `preserve_original` parameters, as the original
|
||||||
|
string may already have lost punctuation during tokenization. Instead,
|
||||||
|
you may want to use the `whitespace` tokenizer.
|
||||||
|
|
||||||
|
|
|
@ -46,5 +46,3 @@ include::cluster/nodes-stats.asciidoc[]
|
||||||
include::cluster/nodes-info.asciidoc[]
|
include::cluster/nodes-info.asciidoc[]
|
||||||
|
|
||||||
include::cluster/nodes-hot-threads.asciidoc[]
|
include::cluster/nodes-hot-threads.asciidoc[]
|
||||||
|
|
||||||
include::cluster/nodes-shutdown.asciidoc[]
|
|
||||||
|
|
|
@ -1,57 +0,0 @@
|
||||||
[[cluster-nodes-shutdown]]
|
|
||||||
== Nodes Shutdown
|
|
||||||
|
|
||||||
The nodes shutdown API allows to shutdown one or more (or all) nodes in
|
|
||||||
the cluster. Here is an example of shutting the `_local` node the
|
|
||||||
request is directed to:
|
|
||||||
|
|
||||||
[source,js]
|
|
||||||
--------------------------------------------------
|
|
||||||
$ curl -XPOST 'http://localhost:9200/_cluster/nodes/_local/_shutdown'
|
|
||||||
--------------------------------------------------
|
|
||||||
|
|
||||||
Specific node(s) can be shutdown as well using their respective node ids
|
|
||||||
(or other selective options as explained
|
|
||||||
<<cluster-nodes,here>> .):
|
|
||||||
|
|
||||||
[source,js]
|
|
||||||
--------------------------------------------------
|
|
||||||
$ curl -XPOST 'http://localhost:9200/_cluster/nodes/nodeId1,nodeId2/_shutdown'
|
|
||||||
--------------------------------------------------
|
|
||||||
|
|
||||||
The master (of the cluster) can also be shutdown using:
|
|
||||||
|
|
||||||
[source,js]
|
|
||||||
--------------------------------------------------
|
|
||||||
$ curl -XPOST 'http://localhost:9200/_cluster/nodes/_master/_shutdown'
|
|
||||||
--------------------------------------------------
|
|
||||||
|
|
||||||
Finally, all nodes can be shutdown using one of the options below:
|
|
||||||
|
|
||||||
[source,js]
|
|
||||||
--------------------------------------------------
|
|
||||||
$ curl -XPOST 'http://localhost:9200/_shutdown'
|
|
||||||
|
|
||||||
$ curl -XPOST 'http://localhost:9200/_cluster/nodes/_shutdown'
|
|
||||||
|
|
||||||
$ curl -XPOST 'http://localhost:9200/_cluster/nodes/_all/_shutdown'
|
|
||||||
--------------------------------------------------
|
|
||||||
|
|
||||||
[float]
|
|
||||||
[[delay]]
|
|
||||||
=== Delay
|
|
||||||
|
|
||||||
By default, the shutdown will be executed after a 1 second delay (`1s`).
|
|
||||||
The delay can be customized by setting the `delay` parameter in a time
|
|
||||||
value format. For example:
|
|
||||||
|
|
||||||
[source,js]
|
|
||||||
--------------------------------------------------
|
|
||||||
$ curl -XPOST 'http://localhost:9200/_cluster/nodes/_local/_shutdown?delay=10s'
|
|
||||||
--------------------------------------------------
|
|
||||||
|
|
||||||
[float]
|
|
||||||
=== Disable Shutdown
|
|
||||||
|
|
||||||
The shutdown API can be disabled by setting `action.disable_shutdown` in
|
|
||||||
the node configuration.
|
|
|
@ -153,9 +153,6 @@ due to forced awareness or allocation filtering.
|
||||||
`indices.cache.filter.size`::
|
`indices.cache.filter.size`::
|
||||||
See <<index-modules-cache>>
|
See <<index-modules-cache>>
|
||||||
|
|
||||||
`indices.cache.filter.expire` (time)::
|
|
||||||
See <<index-modules-cache>>
|
|
||||||
|
|
||||||
[float]
|
[float]
|
||||||
==== TTL interval
|
==== TTL interval
|
||||||
|
|
||||||
|
|
|
@ -104,13 +104,13 @@ java -version
|
||||||
echo $JAVA_HOME
|
echo $JAVA_HOME
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
||||||
Once we have Java set up, we can then download and run Elasticsearch. The binaries are available from http://www.elasticsearch.org/download[`www.elasticsearch.org/download`] along with all the releases that have been made in the past. For each release, you have a choice among a `zip` or `tar` archive, or a `DEB` or `RPM` package. For simplicity, let's use the tar file.
|
Once we have Java set up, we can then download and run Elasticsearch. The binaries are available from http://www.elastic.co/downloads[`www.elastic.co/downloads`] along with all the releases that have been made in the past. For each release, you have a choice among a `zip` or `tar` archive, or a `DEB` or `RPM` package. For simplicity, let's use the tar file.
|
||||||
|
|
||||||
Let's download the Elasticsearch {version} tar as follows (Windows users should download the zip package):
|
Let's download the Elasticsearch {version} tar as follows (Windows users should download the zip package):
|
||||||
|
|
||||||
["source","sh",subs="attributes,callouts"]
|
["source","sh",subs="attributes,callouts"]
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
curl -L -O https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-{version}.tar.gz
|
curl -L -O https://download.elastic.co/elasticsearch/elasticsearch/elasticsearch-{version}.tar.gz
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
||||||
Then extract it as follows (Windows users should unzip the zip package):
|
Then extract it as follows (Windows users should unzip the zip package):
|
||||||
|
@ -868,7 +868,7 @@ In the previous section, we skipped over a little detail called the document sco
|
||||||
All queries in Elasticsearch trigger computation of the relevance scores. In cases where we do not need the relevance scores, Elasticsearch provides another query capability in the form of <<query-dsl-filters,filters>. Filters are similar in concept to queries except that they are optimized for much faster execution speeds for two primary reasons:
|
All queries in Elasticsearch trigger computation of the relevance scores. In cases where we do not need the relevance scores, Elasticsearch provides another query capability in the form of <<query-dsl-filters,filters>. Filters are similar in concept to queries except that they are optimized for much faster execution speeds for two primary reasons:
|
||||||
|
|
||||||
* Filters do not score so they are faster to execute than queries
|
* Filters do not score so they are faster to execute than queries
|
||||||
* Filters can be http://www.elasticsearch.org/blog/all-about-elasticsearch-filter-bitsets/[cached in memory] allowing repeated search executions to be significantly faster than queries
|
* Filters can be http://www.elastic.co/blog/all-about-elasticsearch-filter-bitsets/[cached in memory] allowing repeated search executions to be significantly faster than queries
|
||||||
|
|
||||||
To understand filters, let's first introduce the <<query-dsl-filtered-query,`filtered` query>>, which allows you to combine a query (like `match_all`, `match`, `bool`, etc.) together with a filter. As an example, let's introduce the <<query-dsl-range-filter,`range` filter>>, which allows us to filter documents by a range of values. This is generally used for numeric or date filtering.
|
To understand filters, let's first introduce the <<query-dsl-filtered-query,`filtered` query>>, which allows you to combine a query (like `match_all`, `match`, `bool`, etc.) together with a filter. As an example, let's introduce the <<query-dsl-range-filter,`range` filter>>, which allows us to filter documents by a range of values. This is generally used for numeric or date filtering.
|
||||||
|
|
||||||
|
|
After Width: | Height: | Size: 69 KiB |
After Width: | Height: | Size: 72 KiB |
After Width: | Height: | Size: 70 KiB |
After Width: | Height: | Size: 66 KiB |
After Width: | Height: | Size: 65 KiB |
After Width: | Height: | Size: 70 KiB |
After Width: | Height: | Size: 64 KiB |
After Width: | Height: | Size: 66 KiB |
After Width: | Height: | Size: 67 KiB |
After Width: | Height: | Size: 63 KiB |
After Width: | Height: | Size: 67 KiB |
|
@ -15,29 +15,6 @@ all the relevant modules settings can be provided when creating an index
|
||||||
There are specific index level settings that are not associated with any
|
There are specific index level settings that are not associated with any
|
||||||
specific module. These include:
|
specific module. These include:
|
||||||
|
|
||||||
[[index-compound-format]]`index.compound_format`::
|
|
||||||
|
|
||||||
experimental[]
|
|
||||||
Should the compound file format be used (boolean setting).
|
|
||||||
The compound format was created to reduce the number of open
|
|
||||||
file handles when using file based storage. However, by default it is set
|
|
||||||
to `false` as the non-compound format gives better performance. It is important
|
|
||||||
that OS is configured to give Elasticsearch ``enough'' file handles.
|
|
||||||
See <<file-descriptors>>.
|
|
||||||
+
|
|
||||||
Alternatively, `compound_format` can be set to a number between `0` and
|
|
||||||
`1`, where `0` means `false`, `1` means `true` and a number inbetween
|
|
||||||
represents a percentage: if the merged segment is less than this
|
|
||||||
percentage of the total index, then it is written in compound format,
|
|
||||||
otherwise it is written in non-compound format.
|
|
||||||
|
|
||||||
[[index-compound-on-flush]]`index.compound_on_flush`::
|
|
||||||
|
|
||||||
experimental[]
|
|
||||||
Should a new segment (create by indexing, not by merging) be written
|
|
||||||
in compound format or non-compound format? Defaults to `true`.
|
|
||||||
This is a dynamic setting.
|
|
||||||
|
|
||||||
`index.refresh_interval`::
|
`index.refresh_interval`::
|
||||||
A time setting controlling how often the
|
A time setting controlling how often the
|
||||||
refresh operation will be executed. Defaults to `1s`. Can be set to `-1`
|
refresh operation will be executed. Defaults to `1s`. Can be set to `-1`
|
||||||
|
|
|
@ -143,6 +143,21 @@ settings API. By default, Elasticsearch will retrieve information
|
||||||
about the disk usage of the nodes every 30 seconds. This can also be
|
about the disk usage of the nodes every 30 seconds. This can also be
|
||||||
changed by setting the `cluster.info.update.interval` setting.
|
changed by setting the `cluster.info.update.interval` setting.
|
||||||
|
|
||||||
|
An example of updating the low watermark to no more than 80% of the disk size, a
|
||||||
|
high watermark of at least 50 gigabytes free, and updating the information about
|
||||||
|
the cluster every minute:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
curl -XPUT localhost:9200/_cluster/settings -d '{
|
||||||
|
"transient" : {
|
||||||
|
"cluster.routing.allocation.disk.watermark.low" : "80%",
|
||||||
|
"cluster.routing.allocation.disk.watermark.high" : "50gb",
|
||||||
|
"cluster.info.update.interval" : "1m"
|
||||||
|
}
|
||||||
|
}'
|
||||||
|
--------------------------------------------------
|
||||||
|
|
||||||
By default, Elasticsearch will take into account shards that are currently being
|
By default, Elasticsearch will take into account shards that are currently being
|
||||||
relocated to the target node when computing a node's disk usage. This can be
|
relocated to the target node when computing a node's disk usage. This can be
|
||||||
changed by setting the `cluster.routing.allocation.disk.include_relocations`
|
changed by setting the `cluster.routing.allocation.disk.include_relocations`
|
||||||
|
|
|
@ -32,7 +32,7 @@ mapping specified in the <<indices-create-index,`create-index`>> or
|
||||||
`_default_` mapping.
|
`_default_` mapping.
|
||||||
|
|
||||||
The default mapping definition is a plain mapping definition that is
|
The default mapping definition is a plain mapping definition that is
|
||||||
embedded within ElasticSearch:
|
embedded within Elasticsearch:
|
||||||
|
|
||||||
[source,js]
|
[source,js]
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
@ -46,11 +46,8 @@ Pretty short, isn't it? Basically, everything is `_default_`ed, including the
|
||||||
dynamic nature of the root object mapping which allows new fields to be added
|
dynamic nature of the root object mapping which allows new fields to be added
|
||||||
automatically.
|
automatically.
|
||||||
|
|
||||||
The built-in default mapping definition can be overridden in several ways. A
|
The default mapping can be overridden by specifying the `_default_` type when
|
||||||
`_default_` mapping can be specified when creating a new index, or the global
|
creating a new index.
|
||||||
`_default_` mapping (for all indices) can be configured by creating a file
|
|
||||||
called `config/default-mapping.json`. (This location can be changed with
|
|
||||||
the `index.mapper.default_mapping_location` setting.)
|
|
||||||
|
|
||||||
Dynamic creation of mappings for unmapped types can be completely
|
Dynamic creation of mappings for unmapped types can be completely
|
||||||
disabled by setting `index.mapper.dynamic` to `false`.
|
disabled by setting `index.mapper.dynamic` to `false`.
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
[[index-modules-merge]]
|
[[index-modules-merge]]
|
||||||
== Merge
|
== Merge
|
||||||
|
|
||||||
experimental[]
|
experimental[All of the settings exposed in the `merge` module are expert only and may be removed in the future]
|
||||||
|
|
||||||
A shard in elasticsearch is a Lucene index, and a Lucene index is broken
|
A shard in elasticsearch is a Lucene index, and a Lucene index is broken
|
||||||
down into segments. Segments are internal storage elements in the index
|
down into segments. Segments are internal storage elements in the index
|
||||||
|
@ -72,12 +72,6 @@ This policy has the following settings:
|
||||||
Higher values favor selecting merges that reclaim deletions. A value of
|
Higher values favor selecting merges that reclaim deletions. A value of
|
||||||
`0.0` means deletions don't impact merge selection. Defaults to `2.0`.
|
`0.0` means deletions don't impact merge selection. Defaults to `2.0`.
|
||||||
|
|
||||||
`index.compound_format`::
|
|
||||||
|
|
||||||
Should the index be stored in compound format or not. Defaults to `false`.
|
|
||||||
See <<index-compound-format,`index.compound_format`>> in
|
|
||||||
<<index-modules-settings>>.
|
|
||||||
|
|
||||||
For normal merging, this policy first computes a "budget" of how many
|
For normal merging, this policy first computes a "budget" of how many
|
||||||
segments are allowed to be in the index. If the index is over-budget,
|
segments are allowed to be in the index. If the index is over-budget,
|
||||||
then the policy sorts segments by decreasing size (proportionally considering percent
|
then the policy sorts segments by decreasing size (proportionally considering percent
|
||||||
|
|
|
@ -1,8 +1,6 @@
|
||||||
[[index-modules-store]]
|
[[index-modules-store]]
|
||||||
== Store
|
== Store
|
||||||
|
|
||||||
experimental[]
|
|
||||||
|
|
||||||
The store module allows you to control how index data is stored.
|
The store module allows you to control how index data is stored.
|
||||||
|
|
||||||
The index can either be stored in-memory (no persistence) or on-disk
|
The index can either be stored in-memory (no persistence) or on-disk
|
||||||
|
@ -20,6 +18,7 @@ heap space* using the "Memory" (see below) storage type. It translates
|
||||||
to the fact that there is no need for extra large JVM heaps (with their
|
to the fact that there is no need for extra large JVM heaps (with their
|
||||||
own consequences) for storing the index in memory.
|
own consequences) for storing the index in memory.
|
||||||
|
|
||||||
|
experimental[All of the settings exposed in the `store` module are expert only and may be removed in the future]
|
||||||
|
|
||||||
[float]
|
[float]
|
||||||
[[file-system]]
|
[[file-system]]
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
[[elasticsearch-reference]]
|
[[elasticsearch-reference]]
|
||||||
= Reference
|
= Reference
|
||||||
|
|
||||||
:version: 1.5.1
|
:version: 1.5.2
|
||||||
:branch: 1.5
|
:branch: 1.5
|
||||||
:jdk: 1.8.0_25
|
:jdk: 1.8.0_25
|
||||||
:defguide: https://www.elastic.co/guide/en/elasticsearch/guide/current
|
:defguide: https://www.elastic.co/guide/en/elasticsearch/guide/current
|
||||||
|
|
|
@ -78,7 +78,7 @@ compound:: Whether the segment is stored in a compound file. When true, this
|
||||||
|
|
||||||
To add additional information that can be used for debugging, use the `verbose` flag.
|
To add additional information that can be used for debugging, use the `verbose` flag.
|
||||||
|
|
||||||
NOTE: The format of additional verbose information is experimental and can change at any time.
|
experimental[The format of the additional verbose information is experimental and can change at any time]
|
||||||
|
|
||||||
[source,js]
|
[source,js]
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
|
@ -61,9 +61,6 @@ settings API:
|
||||||
`index.refresh_interval`::
|
`index.refresh_interval`::
|
||||||
The async refresh interval of a shard.
|
The async refresh interval of a shard.
|
||||||
|
|
||||||
`index.index_concurrency`::
|
|
||||||
experimental[] Defaults to `8`.
|
|
||||||
|
|
||||||
`index.translog.flush_threshold_ops`::
|
`index.translog.flush_threshold_ops`::
|
||||||
When to flush based on operations.
|
When to flush based on operations.
|
||||||
|
|
||||||
|
@ -151,14 +148,6 @@ settings API:
|
||||||
`index.translog.fs.type`::
|
`index.translog.fs.type`::
|
||||||
experimental[] Either `simple` or `buffered` (default).
|
experimental[] Either `simple` or `buffered` (default).
|
||||||
|
|
||||||
`index.compound_format`::
|
|
||||||
experimental[] See <<index-compound-format,`index.compound_format`>> in
|
|
||||||
<<index-modules-settings>>.
|
|
||||||
|
|
||||||
`index.compound_on_flush`::
|
|
||||||
experimental[] See <<index-compound-on-flush,`index.compound_on_flush>> in
|
|
||||||
<<index-modules-settings>>.
|
|
||||||
|
|
||||||
<<index-modules-slowlog>>::
|
<<index-modules-slowlog>>::
|
||||||
All the settings for slow log.
|
All the settings for slow log.
|
||||||
|
|
||||||
|
|
|
@ -71,8 +71,6 @@ include::mapping/date-format.asciidoc[]
|
||||||
|
|
||||||
include::mapping/dynamic-mapping.asciidoc[]
|
include::mapping/dynamic-mapping.asciidoc[]
|
||||||
|
|
||||||
include::mapping/conf-mappings.asciidoc[]
|
|
||||||
|
|
||||||
include::mapping/meta.asciidoc[]
|
include::mapping/meta.asciidoc[]
|
||||||
|
|
||||||
include::mapping/transform.asciidoc[]
|
include::mapping/transform.asciidoc[]
|
||||||
|
|
|
@ -1,19 +0,0 @@
|
||||||
[[mapping-conf-mappings]]
|
|
||||||
== Config Mappings
|
|
||||||
|
|
||||||
Creating new mappings can be done using the
|
|
||||||
<<indices-put-mapping,Put Mapping>>
|
|
||||||
API. When a document is indexed with no mapping associated with it in
|
|
||||||
the specific index, the
|
|
||||||
<<mapping-dynamic-mapping,dynamic / default
|
|
||||||
mapping>> feature will kick in and automatically create mapping
|
|
||||||
definition for it.
|
|
||||||
|
|
||||||
Mappings can also be provided on the node level, meaning that each index
|
|
||||||
created will automatically be started with all the mappings defined
|
|
||||||
within a certain location.
|
|
||||||
|
|
||||||
Mappings can be defined within files called `[mapping_name].json` and be
|
|
||||||
placed either under `config/mappings/_default` location, or under
|
|
||||||
`config/mappings/[index_name]` (for mappings that should be associated
|
|
||||||
only with a specific index).
|
|
|
@ -21,12 +21,8 @@ embedded within the distribution:
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
||||||
Pretty short, isn't it? Basically, everything is defaulted, especially the
|
Pretty short, isn't it? Basically, everything is defaulted, especially the
|
||||||
dynamic nature of the root object mapping. The default mapping
|
dynamic nature of the root object mapping. The default mapping can be
|
||||||
definition can be overridden in several manners. The simplest manner is
|
overridden by specifying the `_default_` type when creating a new index.
|
||||||
to simply define a file called `default-mapping.json` and to place it
|
|
||||||
under the `config` directory (which can be configured to exist in a
|
|
||||||
different location). It can also be explicitly set using the
|
|
||||||
`index.mapper.default_mapping_location` setting.
|
|
||||||
|
|
||||||
The dynamic creation of mappings for unmapped types can be completely
|
The dynamic creation of mappings for unmapped types can be completely
|
||||||
disabled by setting `index.mapper.dynamic` to `false`.
|
disabled by setting `index.mapper.dynamic` to `false`.
|
||||||
|
|
|
@ -20,22 +20,3 @@ example:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
||||||
[float]
|
|
||||||
[[include-exclude]]
|
|
||||||
==== Includes / Excludes
|
|
||||||
|
|
||||||
Allow to specify paths in the source that would be included / excluded
|
|
||||||
when it's stored, supporting `*` as wildcard annotation. For example:
|
|
||||||
|
|
||||||
[source,js]
|
|
||||||
--------------------------------------------------
|
|
||||||
{
|
|
||||||
"my_type" : {
|
|
||||||
"_source" : {
|
|
||||||
"includes" : ["path1.*", "path2.*"],
|
|
||||||
"excludes" : ["path3.*"]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
--------------------------------------------------
|
|
||||||
|
|
|
@ -67,8 +67,3 @@ the fact that the following JSON document is perfectly fine:
|
||||||
}
|
}
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
||||||
Note also, that thanks to the fact that we used the `index_name` to use
|
|
||||||
the non plural form (`tag` instead of `tags`), we can actually refer to
|
|
||||||
the field using the `index_name` as well. For example, we can execute a
|
|
||||||
query using `tweet.tags:wow` or `tweet.tag:wow`. We could, of course,
|
|
||||||
name the field as `tag` and skip the `index_name` all together).
|
|
||||||
|
|
|
@ -426,6 +426,9 @@ and it can be retrieved from it).
|
||||||
in `_source`, have `include_in_all` enabled, or `store` be set to
|
in `_source`, have `include_in_all` enabled, or `store` be set to
|
||||||
`true` for this to be useful.
|
`true` for this to be useful.
|
||||||
|
|
||||||
|
|`doc_values` |Set to `true` to store field values in a column-stride fashion.
|
||||||
|
Automatically set to `true` when the fielddata format is `doc_values`.
|
||||||
|
|
||||||
|`boost` |The boost value. Defaults to `1.0`.
|
|`boost` |The boost value. Defaults to `1.0`.
|
||||||
|
|
||||||
|`null_value` |When there is a (JSON) null value for the field, use the
|
|`null_value` |When there is a (JSON) null value for the field, use the
|
||||||
|
|
|
@ -139,6 +139,10 @@ Nested fields may contain other nested fields. The `include_in_parent` object
|
||||||
refers to the direct parent of the field, while the `include_in_root`
|
refers to the direct parent of the field, while the `include_in_root`
|
||||||
parameter refers only to the topmost ``root'' object or document.
|
parameter refers only to the topmost ``root'' object or document.
|
||||||
|
|
||||||
|
NOTE: The `include_in_parent` and `include_in_root` options do not apply
|
||||||
|
to <<mapping-geo-shape-type,`geo_shape` fields>>, which are only ever
|
||||||
|
indexed inside the nested document.
|
||||||
|
|
||||||
Nested docs will automatically use the root doc `_all` field only.
|
Nested docs will automatically use the root doc `_all` field only.
|
||||||
|
|
||||||
.Internal Implementation
|
.Internal Implementation
|
||||||
|
|
|
@ -16,27 +16,6 @@ specifying the `tweet` type in the document itself:
|
||||||
}
|
}
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
||||||
[float]
|
|
||||||
==== Index / Search Analyzers
|
|
||||||
|
|
||||||
The root object allows to define type mapping level analyzers for index
|
|
||||||
and search that will be used with all different fields that do not
|
|
||||||
explicitly set analyzers on their own. Here is an example:
|
|
||||||
|
|
||||||
[source,js]
|
|
||||||
--------------------------------------------------
|
|
||||||
{
|
|
||||||
"tweet" : {
|
|
||||||
"analyzer" : "standard",
|
|
||||||
"search_analyzer" : "standard_with_synonyms"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
--------------------------------------------------
|
|
||||||
|
|
||||||
The above simply explicitly defines both the `analyzer` and
|
|
||||||
`search_analyzer` that will be used. If `search_analyzer` is not specified,
|
|
||||||
it defaults to the value of `analyzer`.
|
|
||||||
|
|
||||||
[float]
|
[float]
|
||||||
==== dynamic_date_formats
|
==== dynamic_date_formats
|
||||||
|
|
||||||
|
|
|
@ -362,7 +362,7 @@ in the query string.
|
||||||
=== Percolator
|
=== Percolator
|
||||||
|
|
||||||
The percolator has been redesigned and because of this the dedicated `_percolator` index is no longer used by the percolator,
|
The percolator has been redesigned and because of this the dedicated `_percolator` index is no longer used by the percolator,
|
||||||
but instead the percolator works with a dedicated `.percolator` type. Read the http://www.elasticsearch.org/blog/percolator-redesign-blog-post/[redesigned percolator]
|
but instead the percolator works with a dedicated `.percolator` type. Read the http://www.elastic.co/blog/percolator-redesign-blog-post[redesigned percolator]
|
||||||
blog post for the reasons why the percolator has been redesigned.
|
blog post for the reasons why the percolator has been redesigned.
|
||||||
|
|
||||||
Elasticsearch will *not* delete the `_percolator` index when upgrading, only the percolate api will not use the queries
|
Elasticsearch will *not* delete the `_percolator` index when upgrading, only the percolate api will not use the queries
|
||||||
|
|
|
@ -139,6 +139,8 @@ equivalent to the former `pre_zone` option. Setting `time_zone` to a value like
|
||||||
being applied in the specified time zone but In addition to this, also the `pre_zone_adjust_large_interval` is removed because we
|
being applied in the specified time zone but In addition to this, also the `pre_zone_adjust_large_interval` is removed because we
|
||||||
now always return dates and bucket keys in UTC.
|
now always return dates and bucket keys in UTC.
|
||||||
|
|
||||||
|
Both the `histogram` and `date_histogram` aggregations now have a default `min_doc_count` of `0` instead of `1` previously.
|
||||||
|
|
||||||
`include`/`exclude` filtering on the `terms` aggregation now uses the same syntax as regexp queries instead of the Java syntax. While simple
|
`include`/`exclude` filtering on the `terms` aggregation now uses the same syntax as regexp queries instead of the Java syntax. While simple
|
||||||
regexps should still work, more complex ones might need some rewriting. Also, the `flags` parameter is not supported anymore.
|
regexps should still work, more complex ones might need some rewriting. Also, the `flags` parameter is not supported anymore.
|
||||||
|
|
||||||
|
@ -270,7 +272,7 @@ to provide special features. They now have limited configuration options.
|
||||||
* `_field_names` configuration is limited to disabling the field.
|
* `_field_names` configuration is limited to disabling the field.
|
||||||
* `_size` configuration is limited to enabling the field.
|
* `_size` configuration is limited to enabling the field.
|
||||||
|
|
||||||
=== Boolean fields
|
==== Boolean fields
|
||||||
|
|
||||||
Boolean fields used to have a string fielddata with `F` meaning `false` and `T`
|
Boolean fields used to have a string fielddata with `F` meaning `false` and `T`
|
||||||
meaning `true`. They have been refactored to use numeric fielddata, with `0`
|
meaning `true`. They have been refactored to use numeric fielddata, with `0`
|
||||||
|
@ -302,6 +304,22 @@ the user-friendly representation of boolean fields: `false`/`true`:
|
||||||
]
|
]
|
||||||
---------------
|
---------------
|
||||||
|
|
||||||
|
==== Murmur3 Fields
|
||||||
|
Fields of type `murmur3` can no longer change `doc_values` or `index` setting.
|
||||||
|
They are always stored with doc values, and not indexed.
|
||||||
|
|
||||||
|
==== Source field configuration
|
||||||
|
The `_source` field no longer supports `includes` and `excludes` paramters. When
|
||||||
|
`_source` is enabled, the entire original source will be stored.
|
||||||
|
|
||||||
|
==== Config based mappings
|
||||||
|
The ability to specify mappings in configuration files has been removed. To specify
|
||||||
|
default mappings that apply to multiple indexes, use index templates.
|
||||||
|
|
||||||
|
The following settings are no longer valid:
|
||||||
|
* `index.mapper.default_mapping_location`
|
||||||
|
* `index.mapper.default_percolator_mapping_location`
|
||||||
|
|
||||||
=== Codecs
|
=== Codecs
|
||||||
|
|
||||||
It is no longer possible to specify per-field postings and doc values formats
|
It is no longer possible to specify per-field postings and doc values formats
|
||||||
|
@ -341,6 +359,11 @@ Deprecated script parameters `id`, `file`, and `scriptField` have been removed
|
||||||
from all scriptable APIs. `script_id`, `script_file` and `script` should be used
|
from all scriptable APIs. `script_id`, `script_file` and `script` should be used
|
||||||
in their place.
|
in their place.
|
||||||
|
|
||||||
|
=== Groovy scripts sandbox
|
||||||
|
|
||||||
|
The groovy sandbox and related settings have been removed. Groovy is now a non
|
||||||
|
sandboxed scripting language, without any option to turn the sandbox on.
|
||||||
|
|
||||||
=== Plugins making use of scripts
|
=== Plugins making use of scripts
|
||||||
|
|
||||||
Plugins that make use of scripts must register their own script context through
|
Plugins that make use of scripts must register their own script context through
|
||||||
|
@ -377,6 +400,11 @@ be used separately to control whether `routing_nodes` should be returned.
|
||||||
|
|
||||||
=== Query DSL
|
=== Query DSL
|
||||||
|
|
||||||
|
Change to ranking behaviour: single-term queries on numeric fields now score in the same way as string fields (use of IDF, norms if enabled).
|
||||||
|
Previously, term queries on numeric fields were deliberately prevented from using the usual Lucene scoring logic and this behaviour was undocumented and, to some, unexpected.
|
||||||
|
If the introduction of scoring to numeric fields is undesirable for your query clauses the fix is simple: wrap them in a `constant_score` or use a `filter` expression instead.
|
||||||
|
|
||||||
|
|
||||||
The `fuzzy_like_this` and `fuzzy_like_this_field` queries have been removed.
|
The `fuzzy_like_this` and `fuzzy_like_this_field` queries have been removed.
|
||||||
|
|
||||||
The `limit` filter is deprecated and becomes a no-op. You can achieve similar
|
The `limit` filter is deprecated and becomes a no-op. You can achieve similar
|
||||||
|
@ -389,3 +417,44 @@ favour or `bool`.
|
||||||
|
|
||||||
The `execution` option of the `terms` filter is now deprecated and ignored if
|
The `execution` option of the `terms` filter is now deprecated and ignored if
|
||||||
provided.
|
provided.
|
||||||
|
|
||||||
|
The `_cache` and `_cache_key` parameters of filters are deprecated in the REST
|
||||||
|
layer and removed in the Java API. In case they are specified they will be
|
||||||
|
ignored. Instead filters are always used as their own cache key and elasticsearch
|
||||||
|
makes decisions by itself about whether it should cache filters based on how
|
||||||
|
often they are used.
|
||||||
|
|
||||||
|
=== Snapshot and Restore
|
||||||
|
|
||||||
|
The obsolete parameters `expand_wildcards_open` and `expand_wildcards_close` are no longer
|
||||||
|
supported by the snapshot and restore operations. These parameters have been replaced by
|
||||||
|
a single `expand_wildcards` parameter. See <<multi-index,the multi-index docs>> for more.
|
||||||
|
|
||||||
|
=== `_shutdown` API
|
||||||
|
|
||||||
|
The `_shutdown` API has been removed without a replacement. Nodes should be managed via operating
|
||||||
|
systems and the provided start/stop scripts.
|
||||||
|
|
||||||
|
=== Analyze API
|
||||||
|
|
||||||
|
The Analyze API return 0 as first Token's position instead of 1.
|
||||||
|
|
||||||
|
=== Multiple data.path striping
|
||||||
|
|
||||||
|
Previously, if the `data.path` setting listed multiple data paths, then a
|
||||||
|
shard would be ``striped'' across all paths by writing a whole file to each
|
||||||
|
path in turn (in accordance with the `index.store.distributor` setting). The
|
||||||
|
result was that the files from a single segment in a shard could be spread
|
||||||
|
across multiple disks, and the failure of any one disk could corrupt multiple
|
||||||
|
shards.
|
||||||
|
|
||||||
|
This striping is no longer supported. Instead, different shards may be
|
||||||
|
allocated to different paths, but all of the files in a single shard will be
|
||||||
|
written to the same path.
|
||||||
|
|
||||||
|
If striping is detected while starting Elasticsearch 2.0.0 or later, all of
|
||||||
|
the files belonging to the same shard will be migrated to the same path. If
|
||||||
|
there is not enough disk space to complete this migration, the upgrade will be
|
||||||
|
cancelled and can only be resumed once enough disk space is made available.
|
||||||
|
|
||||||
|
The `index.store.distributor` setting has also been removed.
|
||||||
|
|
|
@ -227,7 +227,7 @@ several attributes, for example:
|
||||||
[source,js]
|
[source,js]
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
curl -XPUT localhost:9200/test/_settings -d '{
|
curl -XPUT localhost:9200/test/_settings -d '{
|
||||||
"index.routing.allocation.include.group1" : "xxx"
|
"index.routing.allocation.include.group1" : "xxx",
|
||||||
"index.routing.allocation.include.group2" : "yyy",
|
"index.routing.allocation.include.group2" : "yyy",
|
||||||
"index.routing.allocation.exclude.group3" : "zzz",
|
"index.routing.allocation.exclude.group3" : "zzz",
|
||||||
"index.routing.allocation.require.group4" : "aaa"
|
"index.routing.allocation.require.group4" : "aaa"
|
||||||
|
|
|
@ -42,6 +42,9 @@ to `100mb`
|
||||||
|`http.max_initial_line_length` |The max length of an HTTP URL. Defaults
|
|`http.max_initial_line_length` |The max length of an HTTP URL. Defaults
|
||||||
to `4kb`
|
to `4kb`
|
||||||
|
|
||||||
|
|`http.max_header_size` | The max size of allowed headers. Defaults to `8kB`
|
||||||
|
|
||||||
|
|
||||||
|`http.compression` |Support for compression when possible (with
|
|`http.compression` |Support for compression when possible (with
|
||||||
Accept-Encoding). Defaults to `false`.
|
Accept-Encoding). Defaults to `false`.
|
||||||
|
|
||||||
|
|
|
@ -26,7 +26,7 @@ plugin --install <org>/<user/component>/<version>
|
||||||
-----------------------------------
|
-----------------------------------
|
||||||
|
|
||||||
The plugins will be
|
The plugins will be
|
||||||
automatically downloaded in this case from `download.elasticsearch.org`,
|
automatically downloaded in this case from `download.elastic.co`,
|
||||||
and in case they don't exist there, from maven (central and sonatype).
|
and in case they don't exist there, from maven (central and sonatype).
|
||||||
|
|
||||||
Note that when the plugin is located in maven central or sonatype
|
Note that when the plugin is located in maven central or sonatype
|
||||||
|
|
|
@ -11,26 +11,11 @@ The scripting module uses by default http://groovy.codehaus.org/[groovy]
|
||||||
scripting language with some extensions. Groovy is used since it is extremely
|
scripting language with some extensions. Groovy is used since it is extremely
|
||||||
fast and very simple to use.
|
fast and very simple to use.
|
||||||
|
|
||||||
.Groovy dynamic scripting disabled by default from v1.4.3
|
.Groovy dynamic scripting off by default from v1.4.3
|
||||||
[IMPORTANT]
|
[IMPORTANT]
|
||||||
===================================================
|
===================================================
|
||||||
|
|
||||||
Elasticsearch versions 1.3.0-1.3.7 and 1.4.0-1.4.2 have a vulnerability in the
|
Groovy dynamic scripting is off by default, preventing dynamic Groovy scripts
|
||||||
Groovy scripting engine. The vulnerability allows an attacker to construct
|
|
||||||
Groovy scripts that escape the sandbox and execute shell commands as the user
|
|
||||||
running the Elasticsearch Java VM.
|
|
||||||
|
|
||||||
If you are running a vulnerable version of Elasticsearch, you should either
|
|
||||||
upgrade to at least v1.3.8 or v1.4.3, or disable dynamic Groovy scripts by
|
|
||||||
adding this setting to the `config/elasticsearch.yml` file in all nodes in the
|
|
||||||
cluster:
|
|
||||||
|
|
||||||
[source,yaml]
|
|
||||||
-----------------------------------
|
|
||||||
script.groovy.sandbox.enabled: false
|
|
||||||
-----------------------------------
|
|
||||||
|
|
||||||
This will turn off the Groovy sandbox, thus preventing dynamic Groovy scripts
|
|
||||||
from being accepted as part of a request or retrieved from the special
|
from being accepted as part of a request or retrieved from the special
|
||||||
`.scripts` index. You will still be able to use Groovy scripts stored in files
|
`.scripts` index. You will still be able to use Groovy scripts stored in files
|
||||||
in the `config/scripts/` directory on every node.
|
in the `config/scripts/` directory on every node.
|
||||||
|
@ -69,7 +54,7 @@ GET /_search
|
||||||
{
|
{
|
||||||
"script_fields": {
|
"script_fields": {
|
||||||
"my_field": {
|
"my_field": {
|
||||||
"script_file": "my_test",
|
"script_file": "my_script",
|
||||||
"params": {
|
"params": {
|
||||||
"my_var": 2
|
"my_var": 2
|
||||||
}
|
}
|
||||||
|
@ -351,39 +336,6 @@ The default scripting language (assuming no `lang` parameter is provided) is
|
||||||
`groovy`. In order to change it, set the `script.default_lang` to the
|
`groovy`. In order to change it, set the `script.default_lang` to the
|
||||||
appropriate language.
|
appropriate language.
|
||||||
|
|
||||||
[float]
|
|
||||||
=== Groovy Sandboxing
|
|
||||||
|
|
||||||
Elasticsearch sandboxes Groovy scripts that are compiled and executed in order
|
|
||||||
to ensure they don't perform unwanted actions. There are a number of options
|
|
||||||
that can be used for configuring this sandbox:
|
|
||||||
|
|
||||||
`script.groovy.sandbox.receiver_whitelist`::
|
|
||||||
|
|
||||||
Comma-separated list of string classes for objects that may have methods
|
|
||||||
invoked.
|
|
||||||
|
|
||||||
`script.groovy.sandbox.package_whitelist`::
|
|
||||||
|
|
||||||
Comma-separated list of packages under which new objects may be constructed.
|
|
||||||
|
|
||||||
`script.groovy.sandbox.class_whitelist`::
|
|
||||||
|
|
||||||
Comma-separated list of classes that are allowed to be constructed.
|
|
||||||
|
|
||||||
`script.groovy.sandbox.method_blacklist`::
|
|
||||||
|
|
||||||
Comma-separated list of methods that are never allowed to be invoked,
|
|
||||||
regardless of target object.
|
|
||||||
|
|
||||||
`script.groovy.sandbox.enabled`::
|
|
||||||
|
|
||||||
Flag to enable the sandbox (defaults to `false` meaning the sandbox is
|
|
||||||
disabled).
|
|
||||||
|
|
||||||
When specifying whitelist or blacklist settings for the groovy sandbox, all
|
|
||||||
options replace the current whitelist, they are not additive.
|
|
||||||
|
|
||||||
[float]
|
[float]
|
||||||
=== Automatic Script Reloading
|
=== Automatic Script Reloading
|
||||||
|
|
||||||
|
@ -424,10 +376,7 @@ automatically loaded.
|
||||||
[float]
|
[float]
|
||||||
=== Lucene Expressions Scripts
|
=== Lucene Expressions Scripts
|
||||||
|
|
||||||
[WARNING]
|
experimental[The Lucene expressions module is undergoing significant development and the exposed functionality is likely to change in the future]
|
||||||
========================
|
|
||||||
This feature is *experimental* and subject to change in future versions.
|
|
||||||
========================
|
|
||||||
|
|
||||||
Lucene's expressions module provides a mechanism to compile a
|
Lucene's expressions module provides a mechanism to compile a
|
||||||
`javascript` expression to bytecode. This allows very fast execution,
|
`javascript` expression to bytecode. This allows very fast execution,
|
||||||
|
@ -440,9 +389,23 @@ for details on what operators and functions are available.
|
||||||
Variables in `expression` scripts are available to access:
|
Variables in `expression` scripts are available to access:
|
||||||
|
|
||||||
* Single valued document fields, e.g. `doc['myfield'].value`
|
* Single valued document fields, e.g. `doc['myfield'].value`
|
||||||
|
* Single valued document fields can also be accessed without `.value` e.g. `doc['myfield']`
|
||||||
* Parameters passed into the script, e.g. `mymodifier`
|
* Parameters passed into the script, e.g. `mymodifier`
|
||||||
* The current document's score, `_score` (only available when used in a `script_score`)
|
* The current document's score, `_score` (only available when used in a `script_score`)
|
||||||
|
|
||||||
|
Variables in `expression` scripts that are of type `date` may use the following member methods:
|
||||||
|
|
||||||
|
* getYear()
|
||||||
|
* getMonth()
|
||||||
|
* getDayOfMonth()
|
||||||
|
* getHourOfDay()
|
||||||
|
* getMinutes()
|
||||||
|
* getSeconds()
|
||||||
|
|
||||||
|
The following example shows the difference in years between the `date` fields date0 and date1:
|
||||||
|
|
||||||
|
`doc['date1'].getYear() - doc['date0'].getYear()`
|
||||||
|
|
||||||
There are a few limitations relative to other script languages:
|
There are a few limitations relative to other script languages:
|
||||||
|
|
||||||
* Only numeric fields may be accessed
|
* Only numeric fields may be accessed
|
||||||
|
|
|
@ -10,85 +10,14 @@ As a general rule, filters should be used instead of queries:
|
||||||
[[caching]]
|
[[caching]]
|
||||||
=== Filters and Caching
|
=== Filters and Caching
|
||||||
|
|
||||||
Filters can be a great candidate for caching. Caching the result of a
|
Filters can be a great candidate for caching. Caching the document set that
|
||||||
filter does not require a lot of memory, and will cause other queries
|
a filter matches does not require much memory and can help improve
|
||||||
executing against the same filter (same parameters) to be blazingly
|
execution speed of queries.
|
||||||
fast.
|
|
||||||
|
|
||||||
However the cost of caching is not the same for all filters. For
|
Elasticsearch decides to cache filters based on how often they are used. For
|
||||||
instance some filters are already fast out of the box while caching could
|
this reason you might occasionally see better performance by splitting
|
||||||
add significant overhead, and some filters produce results that are already
|
complex filters into a static part that Elasticsearch will cache and a dynamic
|
||||||
cacheable so caching them is just a matter of putting the result in the
|
part which is least costly than the original filter.
|
||||||
cache.
|
|
||||||
|
|
||||||
The default caching policy, `_cache: auto`, tracks the 1000 most recently
|
|
||||||
used filters on a per-index basis and makes decisions based on their
|
|
||||||
frequency.
|
|
||||||
|
|
||||||
[float]
|
|
||||||
==== Filters that read directly the index structure
|
|
||||||
|
|
||||||
Some filters can directly read the index structure and potentially jump
|
|
||||||
over large sequences of documents that are not worth evaluating (for
|
|
||||||
instance when these documents do not match the query). Caching these
|
|
||||||
filters introduces overhead given that all documents that the filter
|
|
||||||
matches need to be consumed in order to be loaded into the cache.
|
|
||||||
|
|
||||||
These filters, which include the <<query-dsl-term-filter,term>> and
|
|
||||||
<<query-dsl-term-query,query>> filters, are only cached after they
|
|
||||||
appear 5 times or more in the history of the 1000 most recently used
|
|
||||||
filters.
|
|
||||||
|
|
||||||
[float]
|
|
||||||
==== Filters that produce results that are already cacheable
|
|
||||||
|
|
||||||
Some filters produce results that are already cacheable, and the difference
|
|
||||||
between caching and not caching them is the act of placing the result in
|
|
||||||
the cache or not. These filters, which include the
|
|
||||||
<<query-dsl-terms-filter,terms>>,
|
|
||||||
<<query-dsl-prefix-filter,prefix>>, and
|
|
||||||
<<query-dsl-range-filter,range>> filters, are by default cached after they
|
|
||||||
appear twice or more in the history of the most 1000 recently used filters.
|
|
||||||
|
|
||||||
[float]
|
|
||||||
==== Computational filters
|
|
||||||
|
|
||||||
Some filters need to run some computation in order to figure out whether
|
|
||||||
a given document matches a filter. These filters, which include the geo and
|
|
||||||
<<query-dsl-script-filter,script>> filters, but also the
|
|
||||||
<<query-dsl-terms-filter,terms>> and <<query-dsl-range-filter,range>>
|
|
||||||
filters when using the `fielddata` execution mode are never cached by default,
|
|
||||||
as it would require to evaluate the filter on all documents in your indices
|
|
||||||
while they can otherwise be only evaluated on documents that match the query.
|
|
||||||
|
|
||||||
[float]
|
|
||||||
==== Compound filters
|
|
||||||
|
|
||||||
The last type of filters are those working with other filters, and includes
|
|
||||||
the <<query-dsl-bool-filter,bool>>,
|
|
||||||
<<query-dsl-and-filter,and>>,
|
|
||||||
<<query-dsl-not-filter,not>> and
|
|
||||||
<<query-dsl-or-filter,or>> filters.
|
|
||||||
|
|
||||||
There is no general rule about these filters. Depending on the filters that
|
|
||||||
they wrap, they will sometimes return a filter that dynamically evaluates the
|
|
||||||
sub filters and sometimes evaluate the sub filters eagerly in order to return
|
|
||||||
a result that is already cacheable, so depending on the case, these filters
|
|
||||||
will be cached after they appear 2+ or 5+ times in the history of the most
|
|
||||||
1000 recently used filters.
|
|
||||||
|
|
||||||
[float]
|
|
||||||
==== Overriding the default behaviour
|
|
||||||
|
|
||||||
All filters allow to set `_cache` element on them to explicitly control
|
|
||||||
caching. It accepts 3 values: `true` in order to cache the filter, `false`
|
|
||||||
to make sure that the filter will not be cached, and `auto`, which is the
|
|
||||||
default and will decide on whether to cache the filter based on the cost
|
|
||||||
to cache it and how often it has been used as explained above.
|
|
||||||
|
|
||||||
Filters also allow to set `_cache_key` which will be used as the
|
|
||||||
caching key for that filter. This can be handy when using very large
|
|
||||||
filters (like a terms filter with many elements in it).
|
|
||||||
|
|
||||||
include::filters/and-filter.asciidoc[]
|
include::filters/and-filter.asciidoc[]
|
||||||
|
|
||||||
|
|
|
@ -32,40 +32,3 @@ filters. Can be placed within queries that accept a filter.
|
||||||
}
|
}
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
||||||
[float]
|
|
||||||
==== Caching
|
|
||||||
|
|
||||||
The result of the filter is only cached by default if there is evidence of
|
|
||||||
reuse. It is possible to opt-in explicitely for caching by setting `_cache`
|
|
||||||
to `true`. Since the `_cache` element requires to be set on the `and` filter
|
|
||||||
itself, the structure then changes a bit to have the filters provided within a
|
|
||||||
`filters` element:
|
|
||||||
|
|
||||||
[source,js]
|
|
||||||
--------------------------------------------------
|
|
||||||
{
|
|
||||||
"filtered" : {
|
|
||||||
"query" : {
|
|
||||||
"term" : { "name.first" : "shay" }
|
|
||||||
},
|
|
||||||
"filter" : {
|
|
||||||
"and" : {
|
|
||||||
"filters": [
|
|
||||||
{
|
|
||||||
"range" : {
|
|
||||||
"postDate" : {
|
|
||||||
"from" : "2010-03-01",
|
|
||||||
"to" : "2010-04-01"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"prefix" : { "name.second" : "ba" }
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"_cache" : true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
--------------------------------------------------
|
|
||||||
|
|
|
@ -24,7 +24,7 @@ accept a filter.
|
||||||
},
|
},
|
||||||
"must_not" : {
|
"must_not" : {
|
||||||
"range" : {
|
"range" : {
|
||||||
"age" : { "from" : 10, "to" : 20 }
|
"age" : { "gte" : 10, "lt" : 20 }
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"should" : [
|
"should" : [
|
||||||
|
|
|
@ -230,11 +230,3 @@ are not supported. Here is an example:
|
||||||
}
|
}
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
||||||
[float]
|
|
||||||
==== Caching
|
|
||||||
|
|
||||||
The result of the filter is not cached by default. The `_cache` can be
|
|
||||||
set to `true` to cache the *result* of the filter. This is handy when
|
|
||||||
the same bounding box parameters are used on several (many) other
|
|
||||||
queries. Note, the process of caching the first execution is higher when
|
|
||||||
caching (since it needs to satisfy different queries).
|
|
||||||
|
|
|
@ -172,11 +172,3 @@ The `geo_distance` filter can work with multiple locations / points per
|
||||||
document. Once a single location / point matches the filter, the
|
document. Once a single location / point matches the filter, the
|
||||||
document will be included in the filter.
|
document will be included in the filter.
|
||||||
|
|
||||||
[float]
|
|
||||||
==== Caching
|
|
||||||
|
|
||||||
The result of the filter is not cached by default. The `_cache` can be
|
|
||||||
set to `true` to cache the *result* of the filter. This is handy when
|
|
||||||
the same point and distance parameters are used on several (many) other
|
|
||||||
queries. Note, the process of caching the first execution is higher when
|
|
||||||
caching (since it needs to satisfy different queries).
|
|
||||||
|
|
|
@ -116,11 +116,3 @@ The filter *requires* the
|
||||||
<<mapping-geo-point-type,geo_point>> type to be
|
<<mapping-geo-point-type,geo_point>> type to be
|
||||||
set on the relevant field.
|
set on the relevant field.
|
||||||
|
|
||||||
[float]
|
|
||||||
==== Caching
|
|
||||||
|
|
||||||
The result of the filter is not cached by default. The `_cache` can be
|
|
||||||
set to `true` to cache the *result* of the filter. This is handy when
|
|
||||||
the same points parameters are used on several (many) other queries.
|
|
||||||
Note, the process of caching the first execution is higher when caching
|
|
||||||
(since it needs to satisfy different queries).
|
|
||||||
|
|
|
@ -110,12 +110,3 @@ shape:
|
||||||
}
|
}
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
||||||
[float]
|
|
||||||
==== Caching
|
|
||||||
|
|
||||||
The result of the Filter is not cached by default. Setting `_cache` to
|
|
||||||
`true` will mean the results of the Filter will be cached. Since shapes
|
|
||||||
can contain 10s-100s of coordinates and any one differing means a new
|
|
||||||
shape, it may make sense to only using caching when you are sure that
|
|
||||||
the shapes will remain reasonably static.
|
|
||||||
|
|
||||||
|
|
|
@ -61,10 +61,3 @@ next to the given cell.
|
||||||
}
|
}
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
||||||
[float]
|
|
||||||
==== Caching
|
|
||||||
|
|
||||||
The result of the filter is not cached by default. The
|
|
||||||
`_cache` parameter can be set to `true` to turn caching on.
|
|
||||||
By default the filter uses the resulting geohash cells as a cache key.
|
|
||||||
This can be changed by using the `_cache_key` option.
|
|
||||||
|
|
|
@ -88,9 +88,3 @@ APIS, eg:
|
||||||
curl -XGET "http://localhost:9200/_stats/id_cache?pretty&human"
|
curl -XGET "http://localhost:9200/_stats/id_cache?pretty&human"
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
||||||
[float]
|
|
||||||
==== Caching
|
|
||||||
|
|
||||||
The `has_child` filter cannot be cached in the filter cache. The `_cache`
|
|
||||||
and `_cache_key` options are a no-op in this filter. Also any filter that
|
|
||||||
wraps the `has_child` filter either directly or indirectly will not be cached.
|
|
||||||
|
|
|
@ -63,9 +63,3 @@ APIS, eg:
|
||||||
curl -XGET "http://localhost:9200/_stats/id_cache?pretty&human"
|
curl -XGET "http://localhost:9200/_stats/id_cache?pretty&human"
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
||||||
[float]
|
|
||||||
==== Caching
|
|
||||||
|
|
||||||
The `has_parent` filter cannot be cached in the filter cache. The `_cache`
|
|
||||||
and `_cache_key` options are a no-op in this filter. Also any filter that
|
|
||||||
wraps the `has_parent` filter either directly or indirectly will not be cached.
|
|
||||||
|
|
|
@ -2,10 +2,7 @@
|
||||||
=== Nested Filter
|
=== Nested Filter
|
||||||
|
|
||||||
A `nested` filter works in a similar fashion to the
|
A `nested` filter works in a similar fashion to the
|
||||||
<<query-dsl-nested-query,nested>> query, except it's
|
<<query-dsl-nested-query,nested>> query. For example:
|
||||||
used as a filter. It follows exactly the same structure, but also allows
|
|
||||||
to cache the results (set `_cache` to `true`), and have it named (set
|
|
||||||
the `_name` value). For example:
|
|
||||||
|
|
||||||
[source,js]
|
[source,js]
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
@ -26,8 +23,7 @@ the `_name` value). For example:
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
}
|
||||||
"_cache" : true
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -50,33 +50,3 @@ Or, in a longer form with a `filter` element:
|
||||||
}
|
}
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
||||||
[float]
|
|
||||||
==== Caching
|
|
||||||
|
|
||||||
The result of the filter is only cached if there is evidence of reuse.
|
|
||||||
The `_cache` can be set to `true` in order to cache it (though usually
|
|
||||||
not needed). Here is an example:
|
|
||||||
|
|
||||||
[source,js]
|
|
||||||
--------------------------------------------------
|
|
||||||
{
|
|
||||||
"filtered" : {
|
|
||||||
"query" : {
|
|
||||||
"term" : { "name.first" : "shay" }
|
|
||||||
},
|
|
||||||
"filter" : {
|
|
||||||
"not" : {
|
|
||||||
"filter" : {
|
|
||||||
"range" : {
|
|
||||||
"postDate" : {
|
|
||||||
"from" : "2010-03-01",
|
|
||||||
"to" : "2010-04-01"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"_cache" : true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
--------------------------------------------------
|
|
||||||
|
|
|
@ -27,36 +27,3 @@ filters. Can be placed within queries that accept a filter.
|
||||||
}
|
}
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
||||||
[float]
|
|
||||||
==== Caching
|
|
||||||
|
|
||||||
The result of the filter is only cached by default if there is evidence
|
|
||||||
of reuse. The `_cache` can be
|
|
||||||
set to `true` in order to cache it (though usually not needed). Since
|
|
||||||
the `_cache` element requires to be set on the `or` filter itself, the
|
|
||||||
structure then changes a bit to have the filters provided within a
|
|
||||||
`filters` element:
|
|
||||||
|
|
||||||
[source,js]
|
|
||||||
--------------------------------------------------
|
|
||||||
{
|
|
||||||
"filtered" : {
|
|
||||||
"query" : {
|
|
||||||
"term" : { "name.first" : "shay" }
|
|
||||||
},
|
|
||||||
"filter" : {
|
|
||||||
"or" : {
|
|
||||||
"filters" : [
|
|
||||||
{
|
|
||||||
"term" : { "name.second" : "banon" }
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"term" : { "name.nick" : "kimchy" }
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"_cache" : true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
--------------------------------------------------
|
|
||||||
|
|
|
@ -16,22 +16,3 @@ a filter. Can be placed within queries that accept a filter.
|
||||||
}
|
}
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
||||||
[float]
|
|
||||||
==== Caching
|
|
||||||
|
|
||||||
The result of the filter is cached by default if there is evidence of reuse.
|
|
||||||
The `_cache` can be set to `true` in order to cache it. Here is an example:
|
|
||||||
|
|
||||||
[source,js]
|
|
||||||
--------------------------------------------------
|
|
||||||
{
|
|
||||||
"constant_score" : {
|
|
||||||
"filter" : {
|
|
||||||
"prefix" : {
|
|
||||||
"user" : "ki",
|
|
||||||
"_cache" : true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
--------------------------------------------------
|
|
||||||
|
|
|
@ -19,34 +19,3 @@ that accept a filter.
|
||||||
}
|
}
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
||||||
[float]
|
|
||||||
==== Caching
|
|
||||||
|
|
||||||
The result of the filter is only cached by default if there is evidence of reuse.
|
|
||||||
|
|
||||||
The `_cache` can be
|
|
||||||
set to `true` to cache the *result* of the filter. This is handy when
|
|
||||||
the same query is used on several (many) other queries. Note, the
|
|
||||||
process of caching the first execution is higher when not caching (since
|
|
||||||
it needs to satisfy different queries).
|
|
||||||
|
|
||||||
Setting the `_cache` element requires a different format for the
|
|
||||||
`query`:
|
|
||||||
|
|
||||||
[source,js]
|
|
||||||
--------------------------------------------------
|
|
||||||
{
|
|
||||||
"constantScore" : {
|
|
||||||
"filter" : {
|
|
||||||
"fquery" : {
|
|
||||||
"query" : {
|
|
||||||
"query_string" : {
|
|
||||||
"query" : "this AND that OR thus"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"_cache" : true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
--------------------------------------------------
|
|
||||||
|
|
|
@ -95,11 +95,3 @@ requires more memory, so make sure you have sufficient memory on your nodes in
|
||||||
order to use this execution mode. It usually makes sense to use it on fields
|
order to use this execution mode. It usually makes sense to use it on fields
|
||||||
you're already aggregating or sorting by.
|
you're already aggregating or sorting by.
|
||||||
|
|
||||||
[float]
|
|
||||||
==== Caching
|
|
||||||
|
|
||||||
The result of the filter is only cached by default if there is evidence of reuse. The
|
|
||||||
`_cache` can be set to `false` to turn it off.
|
|
||||||
|
|
||||||
Having the `now` expression used without rounding will make the filter unlikely to be
|
|
||||||
cached since reuse is very unlikely.
|
|
||||||
|
|
|
@ -51,9 +51,7 @@ You have to enable caching explicitly in order to have the
|
||||||
"flags" : "INTERSECTION|COMPLEMENT|EMPTY",
|
"flags" : "INTERSECTION|COMPLEMENT|EMPTY",
|
||||||
"max_determinized_states": 20000
|
"max_determinized_states": 20000
|
||||||
},
|
},
|
||||||
"_name":"test",
|
"_name":"test"
|
||||||
"_cache" : true,
|
|
||||||
"_cache_key" : "key"
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -43,11 +43,3 @@ to use the ability to pass parameters to the script itself, for example:
|
||||||
}
|
}
|
||||||
----------------------------------------------
|
----------------------------------------------
|
||||||
|
|
||||||
[float]
|
|
||||||
==== Caching
|
|
||||||
|
|
||||||
The result of the filter is not cached by default. The `_cache` can be
|
|
||||||
set to `true` to cache the *result* of the filter. This is handy when
|
|
||||||
the same script and parameters are used on several (many) other queries.
|
|
||||||
Note, the process of caching the first execution is higher when caching
|
|
||||||
(since it needs to satisfy different queries).
|
|
||||||
|
|
|
@ -17,22 +17,3 @@ accept a filter, for example:
|
||||||
}
|
}
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
||||||
[float]
|
|
||||||
==== Caching
|
|
||||||
|
|
||||||
The result of the filter is only cached by default if there is evidence of reuse.
|
|
||||||
The `_cache` can be set to `false` to turn it off. Here is an example:
|
|
||||||
|
|
||||||
[source,js]
|
|
||||||
--------------------------------------------------
|
|
||||||
{
|
|
||||||
"constant_score" : {
|
|
||||||
"filter" : {
|
|
||||||
"term" : {
|
|
||||||
"user" : "kimchy",
|
|
||||||
"_cache" : false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
--------------------------------------------------
|
|
||||||
|
|
|
@ -18,13 +18,6 @@ Filters documents that have fields that match any of the provided terms
|
||||||
The `terms` filter is also aliased with `in` as the filter name for
|
The `terms` filter is also aliased with `in` as the filter name for
|
||||||
simpler usage.
|
simpler usage.
|
||||||
|
|
||||||
[float]
|
|
||||||
==== Caching
|
|
||||||
|
|
||||||
The result of the filter is cached if there is evidence of reuse. It is
|
|
||||||
possible to enable caching explicitely by setting `_cache` to `true` and
|
|
||||||
to disable caching by setting `_cache` to `false`.
|
|
||||||
|
|
||||||
[float]
|
[float]
|
||||||
==== Terms lookup mechanism
|
==== Terms lookup mechanism
|
||||||
|
|
||||||
|
@ -93,8 +86,7 @@ curl -XGET localhost:9200/tweets/_search -d '{
|
||||||
"type" : "user",
|
"type" : "user",
|
||||||
"id" : "2",
|
"id" : "2",
|
||||||
"path" : "followers"
|
"path" : "followers"
|
||||||
},
|
}
|
||||||
"_cache_key" : "user_2_friends"
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -102,10 +94,6 @@ curl -XGET localhost:9200/tweets/_search -d '{
|
||||||
}'
|
}'
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
||||||
If there are lots of matching values, then `_cache_key` is recommended to be set,
|
|
||||||
so that the filter cache will not store a reference to the potentially heavy
|
|
||||||
terms filter.
|
|
||||||
|
|
||||||
The structure of the external terms document can also include array of
|
The structure of the external terms document can also include array of
|
||||||
inner objects, for example:
|
inner objects, for example:
|
||||||
|
|
||||||
|
|
|
@ -52,6 +52,8 @@ include::queries/range-query.asciidoc[]
|
||||||
|
|
||||||
include::queries/regexp-query.asciidoc[]
|
include::queries/regexp-query.asciidoc[]
|
||||||
|
|
||||||
|
include::queries/span-containing-query.asciidoc[]
|
||||||
|
|
||||||
include::queries/span-first-query.asciidoc[]
|
include::queries/span-first-query.asciidoc[]
|
||||||
|
|
||||||
include::queries/span-multi-term-query.asciidoc[]
|
include::queries/span-multi-term-query.asciidoc[]
|
||||||
|
@ -64,6 +66,8 @@ include::queries/span-or-query.asciidoc[]
|
||||||
|
|
||||||
include::queries/span-term-query.asciidoc[]
|
include::queries/span-term-query.asciidoc[]
|
||||||
|
|
||||||
|
include::queries/span-within-query.asciidoc[]
|
||||||
|
|
||||||
include::queries/term-query.asciidoc[]
|
include::queries/term-query.asciidoc[]
|
||||||
|
|
||||||
include::queries/terms-query.asciidoc[]
|
include::queries/terms-query.asciidoc[]
|
||||||
|
|
|
@ -175,7 +175,8 @@ doing so would look like:
|
||||||
"field_value_factor": {
|
"field_value_factor": {
|
||||||
"field": "popularity",
|
"field": "popularity",
|
||||||
"factor": 1.2,
|
"factor": 1.2,
|
||||||
"modifier": "sqrt"
|
"modifier": "sqrt",
|
||||||
|
"missing": 1
|
||||||
}
|
}
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
||||||
|
@ -193,6 +194,8 @@ There are a number of options for the `field_value_factor` function:
|
||||||
|`modifier` |Modifier to apply to the field value, can be one of: `none`, `log`,
|
|`modifier` |Modifier to apply to the field value, can be one of: `none`, `log`,
|
||||||
`log1p`, `log2p`, `ln`, `ln1p`, `ln2p`, `square`, `sqrt`, or `reciprocal`.
|
`log1p`, `log2p`, `ln`, `ln1p`, `ln2p`, `square`, `sqrt`, or `reciprocal`.
|
||||||
Defaults to `none`.
|
Defaults to `none`.
|
||||||
|
|`missing` |Value used if the document doesn't have that field. The modifier
|
||||||
|
and factor are still applied to it as though it were read from the document.
|
||||||
|=======================================================================
|
|=======================================================================
|
||||||
|
|
||||||
Keep in mind that taking the log() of 0, or the square root of a negative number
|
Keep in mind that taking the log() of 0, or the square root of a negative number
|
||||||
|
|
|
@ -87,10 +87,10 @@ if one of the low frequency (below the cutoff) terms in the case of an
|
||||||
operator match.
|
operator match.
|
||||||
|
|
||||||
This query allows handling `stopwords` dynamically at runtime, is domain
|
This query allows handling `stopwords` dynamically at runtime, is domain
|
||||||
independent and doesn't require on a stopword file. It prevent scoring /
|
independent and doesn't require a stopword file. It prevents scoring /
|
||||||
iterating high frequency terms and only takes the terms into account if a
|
iterating high frequency terms and only takes the terms into account if a
|
||||||
more significant / lower frequency terms match a document. Yet, if all of
|
more significant / lower frequency term matches a document. Yet, if all
|
||||||
the query terms are above the given `cutoff_frequency` the query is
|
of the query terms are above the given `cutoff_frequency` the query is
|
||||||
automatically transformed into a pure conjunction (`and`) query to
|
automatically transformed into a pure conjunction (`and`) query to
|
||||||
ensure fast execution.
|
ensure fast execution.
|
||||||
|
|
||||||
|
@ -98,7 +98,7 @@ The `cutoff_frequency` can either be relative to the total number of
|
||||||
documents if in the range `[0..1)` or absolute if greater or equal to
|
documents if in the range `[0..1)` or absolute if greater or equal to
|
||||||
`1.0`.
|
`1.0`.
|
||||||
|
|
||||||
Here is an example showing a query composed of stopwords exclusivly:
|
Here is an example showing a query composed of stopwords exclusively:
|
||||||
|
|
||||||
[source,js]
|
[source,js]
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
|
@ -0,0 +1,29 @@
|
||||||
|
[[query-dsl-span-containing-query]]
|
||||||
|
=== Span Containing Query
|
||||||
|
|
||||||
|
Returns matches which enclose another span query. The span containing
|
||||||
|
query maps to Lucene `SpanContainingQuery`. Here is an example:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
{
|
||||||
|
"span_containing" : {
|
||||||
|
"little" : {
|
||||||
|
"span_term" : { "field1" : "foo" }
|
||||||
|
},
|
||||||
|
"big" : {
|
||||||
|
"span_near" : {
|
||||||
|
"clauses" : [
|
||||||
|
{ "span_term" : { "field1" : "bar" } },
|
||||||
|
{ "span_term" : { "field1" : "baz" } }
|
||||||
|
],
|
||||||
|
"slop" : 5,
|
||||||
|
"in_order" : true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
|
||||||
|
The `big` and `little` clauses can be any span type query. Matching
|
||||||
|
spans from `big` that contain matches from `little` are returned.
|
|
@ -0,0 +1,29 @@
|
||||||
|
[[query-dsl-span-within-query]]
|
||||||
|
=== Span Within Query
|
||||||
|
|
||||||
|
Returns matches which are enclosed inside another span query. The span within
|
||||||
|
query maps to Lucene `SpanWithinQuery`. Here is an example:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
{
|
||||||
|
"span_within" : {
|
||||||
|
"little" : {
|
||||||
|
"span_term" : { "field1" : "foo" }
|
||||||
|
},
|
||||||
|
"big" : {
|
||||||
|
"span_near" : {
|
||||||
|
"clauses" : [
|
||||||
|
{ "span_term" : { "field1" : "bar" } },
|
||||||
|
{ "span_term" : { "field1" : "baz" } }
|
||||||
|
],
|
||||||
|
"slop" : 5,
|
||||||
|
"in_order" : true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
|
||||||
|
The `big` and `little` clauses can be any span type query. Matching
|
||||||
|
spans from `little` that are enclosed within `big` are returned.
|
|
@ -12,7 +12,7 @@ GET /_search
|
||||||
{
|
{
|
||||||
"query": {
|
"query": {
|
||||||
"template": {
|
"template": {
|
||||||
"query": { "match": { "text": "{query_string}" }}},
|
"query": { "match": { "text": "{{query_string}}" }},
|
||||||
"params" : {
|
"params" : {
|
||||||
"query_string" : "all about search"
|
"query_string" : "all about search"
|
||||||
}
|
}
|
||||||
|
@ -45,7 +45,7 @@ GET /_search
|
||||||
{
|
{
|
||||||
"query": {
|
"query": {
|
||||||
"template": {
|
"template": {
|
||||||
"query": "{ \"match\": { \"text\": \"{query_string}\" }}}", <1>
|
"query": "{ \"match\": { \"text\": \"{{query_string}}\" }}", <1>
|
||||||
"params" : {
|
"params" : {
|
||||||
"query_string" : "all about search"
|
"query_string" : "all about search"
|
||||||
}
|
}
|
||||||
|
@ -85,7 +85,7 @@ Alternatively, you can register a query template in the special `.scripts` index
|
||||||
------------------------------------------
|
------------------------------------------
|
||||||
PUT /_search/template/my_template
|
PUT /_search/template/my_template
|
||||||
{
|
{
|
||||||
"template": { "match": { "text": "{query_string}" }}},
|
"template": { "match": { "text": "{{query_string}}" }},
|
||||||
}
|
}
|
||||||
------------------------------------------
|
------------------------------------------
|
||||||
|
|
||||||
|
|
|
@ -105,3 +105,5 @@ include::search/percolate.asciidoc[]
|
||||||
|
|
||||||
include::search/more-like-this.asciidoc[]
|
include::search/more-like-this.asciidoc[]
|
||||||
|
|
||||||
|
include::search/field-stats.asciidoc[]
|
||||||
|
|
||||||
|
|
|
@ -68,6 +68,8 @@ Some aggregations work on values extracted from the aggregated documents. Typica
|
||||||
a specific document field which is set using the `field` key for the aggregations. It is also possible to define a
|
a specific document field which is set using the `field` key for the aggregations. It is also possible to define a
|
||||||
<<modules-scripting,`script`>> which will generate the values (per document).
|
<<modules-scripting,`script`>> which will generate the values (per document).
|
||||||
|
|
||||||
|
TIP: The `script` parameter expects an inline script. Use `script_id` for indexed scripts and `script_file` for scripts in the `config/scripts/` directory.
|
||||||
|
|
||||||
When both `field` and `script` settings are configured for the aggregation, the script will be treated as a
|
When both `field` and `script` settings are configured for the aggregation, the script will be treated as a
|
||||||
`value script`. While normal scripts are evaluated on a document level (i.e. the script has access to all the data
|
`value script`. While normal scripts are evaluated on a document level (i.e. the script has access to all the data
|
||||||
associated with the document), value scripts are evaluated on the *value* level. In this mode, the values are extracted
|
associated with the document), value scripts are evaluated on the *value* level. In this mode, the values are extracted
|
||||||
|
@ -116,6 +118,38 @@ aggregated for the buckets created by their "parent" bucket aggregation.
|
||||||
There are different bucket aggregators, each with a different "bucketing" strategy. Some define a single bucket, some
|
There are different bucket aggregators, each with a different "bucketing" strategy. Some define a single bucket, some
|
||||||
define fixed number of multiple buckets, and others dynamically create the buckets during the aggregation process.
|
define fixed number of multiple buckets, and others dynamically create the buckets during the aggregation process.
|
||||||
|
|
||||||
|
[float]
|
||||||
|
=== Reducer Aggregations
|
||||||
|
|
||||||
|
coming[2.0.0]
|
||||||
|
|
||||||
|
experimental[]
|
||||||
|
|
||||||
|
Reducer aggregations work on the outputs produced from other aggregations rather than from document sets, adding
|
||||||
|
information to the output tree. There are many different types of reducer, each computing different information from
|
||||||
|
other aggregations, but these types can broken down into two families:
|
||||||
|
|
||||||
|
_Parent_::
|
||||||
|
A family of reducer aggregations that is provided with the output of its parent aggregation and is able
|
||||||
|
to compute new buckets or new aggregations to add to existing buckets.
|
||||||
|
|
||||||
|
_Sibling_::
|
||||||
|
Reducer aggregations that are provided with the output of a sibling aggregation and are able to compute a
|
||||||
|
new aggregation which will be at the same level as the sibling aggregation.
|
||||||
|
|
||||||
|
Reducer aggregations can reference the aggregations they need to perform their computation by using the `buckets_paths`
|
||||||
|
parameter to indicate the paths to the required metrics. The syntax for defining these paths can be found in the
|
||||||
|
<<search-aggregations-bucket-terms-aggregation-order, terms aggregation order>> section.
|
||||||
|
|
||||||
|
?????? SHOULD THE SECTION ABOUT DEFINING AGGREGATION PATHS
|
||||||
|
BE IN THIS PAGE AND REFERENCED FROM THE TERMS AGGREGATION DOCUMENTATION ???????
|
||||||
|
|
||||||
|
Reducer aggregations cannot have sub-aggregations but depending on the type it can reference another reducer in the `buckets_path`
|
||||||
|
allowing reducers to be chained.
|
||||||
|
|
||||||
|
NOTE: Because reducer aggregations only add to the output, when chaining reducer aggregations the output of each reducer will be
|
||||||
|
included in the final output.
|
||||||
|
|
||||||
[float]
|
[float]
|
||||||
=== Caching heavy aggregations
|
=== Caching heavy aggregations
|
||||||
|
|
||||||
|
@ -195,3 +229,6 @@ Then that piece of metadata will be returned in place for our `titles` terms agg
|
||||||
include::aggregations/metrics.asciidoc[]
|
include::aggregations/metrics.asciidoc[]
|
||||||
|
|
||||||
include::aggregations/bucket.asciidoc[]
|
include::aggregations/bucket.asciidoc[]
|
||||||
|
|
||||||
|
include::aggregations/reducer.asciidoc[]
|
||||||
|
|
||||||
|
|
|
@ -119,7 +119,7 @@ Response:
|
||||||
|
|
||||||
Like with the normal <<search-aggregations-bucket-histogram-aggregation,histogram>>, both document level scripts and
|
Like with the normal <<search-aggregations-bucket-histogram-aggregation,histogram>>, both document level scripts and
|
||||||
value level scripts are supported. It is also possible to control the order of the returned buckets using the `order`
|
value level scripts are supported. It is also possible to control the order of the returned buckets using the `order`
|
||||||
settings and filter the returned buckets based on a `min_doc_count` setting (by default all buckets with
|
settings and filter the returned buckets based on a `min_doc_count` setting (by default all buckets between the first
|
||||||
`min_doc_count > 0` will be returned). This histogram also supports the `extended_bounds` setting, which enables extending
|
bucket that matches documents and the last one are returned). This histogram also supports the `extended_bounds`
|
||||||
the bounds of the histogram beyond the data itself (to read more on why you'd want to do that please refer to the
|
setting, which enables extending the bounds of the histogram beyond the data itself (to read more on why you'd want to
|
||||||
explanation <<search-aggregations-bucket-histogram-aggregation-extended-bounds,here>>).
|
do that please refer to the explanation <<search-aggregations-bucket-histogram-aggregation-extended-bounds,here>>).
|
||||||
|
|
|
@ -50,6 +50,10 @@ And the following may be the response:
|
||||||
"key": 50,
|
"key": 50,
|
||||||
"doc_count": 4
|
"doc_count": 4
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"key": 100,
|
||||||
|
"doc_count": 0
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"key": 150,
|
"key": 150,
|
||||||
"doc_count": 3
|
"doc_count": 3
|
||||||
|
@ -60,10 +64,11 @@ And the following may be the response:
|
||||||
}
|
}
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
||||||
The response above shows that none of the aggregated products has a price that falls within the range of `[100 - 150)`.
|
==== Minimum document count
|
||||||
By default, the response will only contain those buckets with a `doc_count` greater than 0. It is possible change that
|
|
||||||
and request buckets with either a higher minimum count or even 0 (in which case elasticsearch will "fill in the gaps"
|
The response above show that no documents has a price that falls within the range of `[100 - 150)`. By default the
|
||||||
and create buckets with zero documents). This can be configured using the `min_doc_count` setting:
|
response will fill gaps in the histogram with empty buckets. It is possible change that and request buckets with
|
||||||
|
a higher minimum count thanks to the `min_doc_count` setting:
|
||||||
|
|
||||||
[source,js]
|
[source,js]
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
@ -73,7 +78,7 @@ and create buckets with zero documents). This can be configured using the `min_d
|
||||||
"histogram" : {
|
"histogram" : {
|
||||||
"field" : "price",
|
"field" : "price",
|
||||||
"interval" : 50,
|
"interval" : 50,
|
||||||
"min_doc_count" : 0
|
"min_doc_count" : 1
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -96,10 +101,6 @@ Response:
|
||||||
"key": 50,
|
"key": 50,
|
||||||
"doc_count": 4
|
"doc_count": 4
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"key" : 100,
|
|
||||||
"doc_count" : 0 <1>
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"key": 150,
|
"key": 150,
|
||||||
"doc_count": 3
|
"doc_count": 3
|
||||||
|
@ -110,13 +111,11 @@ Response:
|
||||||
}
|
}
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
||||||
<1> No documents were found that belong in this bucket, yet it is still returned with zero `doc_count`.
|
|
||||||
|
|
||||||
[[search-aggregations-bucket-histogram-aggregation-extended-bounds]]
|
[[search-aggregations-bucket-histogram-aggregation-extended-bounds]]
|
||||||
By default the date_/histogram returns all the buckets within the range of the data itself, that is, the documents with
|
By default the date_/histogram returns all the buckets within the range of the data itself, that is, the documents with
|
||||||
the smallest values (on which with histogram) will determine the min bucket (the bucket with the smallest key) and the
|
the smallest values (on which with histogram) will determine the min bucket (the bucket with the smallest key) and the
|
||||||
documents with the highest values will determine the max bucket (the bucket with the highest key). Often, when when
|
documents with the highest values will determine the max bucket (the bucket with the highest key). Often, when when
|
||||||
requesting empty buckets (`"min_doc_count" : 0`), this causes a confusion, specifically, when the data is also filtered.
|
requesting empty buckets, this causes a confusion, specifically, when the data is also filtered.
|
||||||
|
|
||||||
To understand why, let's look at an example:
|
To understand why, let's look at an example:
|
||||||
|
|
||||||
|
@ -149,7 +148,6 @@ Example:
|
||||||
"histogram" : {
|
"histogram" : {
|
||||||
"field" : "price",
|
"field" : "price",
|
||||||
"interval" : 50,
|
"interval" : 50,
|
||||||
"min_doc_count" : 0,
|
|
||||||
"extended_bounds" : {
|
"extended_bounds" : {
|
||||||
"min" : 0,
|
"min" : 0,
|
||||||
"max" : 500
|
"max" : 500
|
||||||
|
@ -265,67 +263,6 @@ PATH := <AGG_NAME>[<AGG_SEPARATOR><AGG_NAME>]*[<METRIC_SEPARATOR
|
||||||
The above will sort the buckets based on the avg rating among the promoted products
|
The above will sort the buckets based on the avg rating among the promoted products
|
||||||
|
|
||||||
|
|
||||||
==== Minimum document count
|
|
||||||
|
|
||||||
It is possible to only return buckets that have a document count that is greater than or equal to a configured
|
|
||||||
limit through the `min_doc_count` option.
|
|
||||||
|
|
||||||
[source,js]
|
|
||||||
--------------------------------------------------
|
|
||||||
{
|
|
||||||
"aggs" : {
|
|
||||||
"prices" : {
|
|
||||||
"histogram" : {
|
|
||||||
"field" : "price",
|
|
||||||
"interval" : 50,
|
|
||||||
"min_doc_count": 10
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
--------------------------------------------------
|
|
||||||
|
|
||||||
The above aggregation would only return buckets that contain 10 documents or more. Default value is `1`.
|
|
||||||
|
|
||||||
NOTE: The special value `0` can be used to add empty buckets to the response between the minimum and the maximum buckets.
|
|
||||||
Here is an example of what the response could look like:
|
|
||||||
|
|
||||||
[source,js]
|
|
||||||
--------------------------------------------------
|
|
||||||
{
|
|
||||||
"aggregations": {
|
|
||||||
"prices": {
|
|
||||||
"buckets": {
|
|
||||||
"0": {
|
|
||||||
"key": 0,
|
|
||||||
"doc_count": 2
|
|
||||||
},
|
|
||||||
"50": {
|
|
||||||
"key": 50,
|
|
||||||
"doc_count": 0
|
|
||||||
},
|
|
||||||
"150": {
|
|
||||||
"key": 150,
|
|
||||||
"doc_count": 3
|
|
||||||
},
|
|
||||||
"200": {
|
|
||||||
"key": 150,
|
|
||||||
"doc_count": 0
|
|
||||||
},
|
|
||||||
"250": {
|
|
||||||
"key": 150,
|
|
||||||
"doc_count": 0
|
|
||||||
},
|
|
||||||
"300": {
|
|
||||||
"key": 150,
|
|
||||||
"doc_count": 1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
--------------------------------------------------
|
|
||||||
|
|
||||||
==== Offset
|
==== Offset
|
||||||
|
|
||||||
By default the bucket keys start with 0 and then continue in even spaced steps of `interval`, e.g. if the interval is 10 the first buckets
|
By default the bucket keys start with 0 and then continue in even spaced steps of `interval`, e.g. if the interval is 10 the first buckets
|
||||||
|
|
|
@ -128,6 +128,8 @@ It is also possible to customize the key for each range:
|
||||||
|
|
||||||
==== Script
|
==== Script
|
||||||
|
|
||||||
|
TIP: The `script` parameter expects an inline script. Use `script_id` for indexed scripts and `script_file` for scripts in the `config/scripts/` directory.
|
||||||
|
|
||||||
[source,js]
|
[source,js]
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
{
|
{
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
|
|
||||||
An aggregation that returns interesting or unusual occurrences of terms in a set.
|
An aggregation that returns interesting or unusual occurrences of terms in a set.
|
||||||
|
|
||||||
experimental[]
|
experimental[The `significant_terms` aggregation can be very heavy when run on large indices. Work is in progress to provide more lightweight sampling techniques. As a result, the API for this feature may change in non-backwards compatible ways]
|
||||||
|
|
||||||
.Example use cases:
|
.Example use cases:
|
||||||
* Suggesting "H5N1" when users search for "bird flu" in text
|
* Suggesting "H5N1" when users search for "bird flu" in text
|
||||||
|
|
|
@ -441,6 +441,9 @@ Generating the terms using a script:
|
||||||
}
|
}
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
||||||
|
TIP: The `script` parameter expects an inline script. Use `script_id` for indexed scripts and `script_file` for scripts in the `config/scripts/` directory.
|
||||||
|
|
||||||
|
|
||||||
==== Value Script
|
==== Value Script
|
||||||
|
|
||||||
[source,js]
|
[source,js]
|
||||||
|
@ -610,7 +613,7 @@ this would typically be too costly in terms of RAM.
|
||||||
[[search-aggregations-bucket-terms-aggregation-execution-hint]]
|
[[search-aggregations-bucket-terms-aggregation-execution-hint]]
|
||||||
==== Execution hint
|
==== Execution hint
|
||||||
|
|
||||||
experimental[]
|
experimental[The automated execution optimization is experimental, so this parameter is provided temporarily as a way to override the default behaviour]
|
||||||
|
|
||||||
There are different mechanisms by which terms aggregations can be executed:
|
There are different mechanisms by which terms aggregations can be executed:
|
||||||
|
|
||||||
|
|
|
@ -47,6 +47,8 @@ Computing the average grade based on a script:
|
||||||
}
|
}
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
||||||
|
TIP: The `script` parameter expects an inline script. Use `script_id` for indexed scripts and `script_file` for scripts in the `config/scripts/` directory.
|
||||||
|
|
||||||
===== Value Script
|
===== Value Script
|
||||||
|
|
||||||
It turned out that the exam was way above the level of the students and a grade correction needs to be applied. We can use value script to get the new average:
|
It turned out that the exam was way above the level of the students and a grade correction needs to be applied. We can use value script to get the new average:
|
||||||
|
|
|
@ -23,10 +23,10 @@ match a query:
|
||||||
|
|
||||||
==== Precision control
|
==== Precision control
|
||||||
|
|
||||||
experimental[]
|
|
||||||
|
|
||||||
This aggregation also supports the `precision_threshold` and `rehash` options:
|
This aggregation also supports the `precision_threshold` and `rehash` options:
|
||||||
|
|
||||||
|
experimental[The `precision_threshold` and `rehash` options are specific to the current internal implementation of the `cardinality` agg, which may change in the future]
|
||||||
|
|
||||||
[source,js]
|
[source,js]
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
{
|
{
|
||||||
|
@ -42,14 +42,14 @@ This aggregation also supports the `precision_threshold` and `rehash` options:
|
||||||
}
|
}
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
||||||
<1> experimental[] The `precision_threshold` options allows to trade memory for accuracy, and
|
<1> The `precision_threshold` options allows to trade memory for accuracy, and
|
||||||
defines a unique count below which counts are expected to be close to
|
defines a unique count below which counts are expected to be close to
|
||||||
accurate. Above this value, counts might become a bit more fuzzy. The maximum
|
accurate. Above this value, counts might become a bit more fuzzy. The maximum
|
||||||
supported value is 40000, thresholds above this number will have the same
|
supported value is 40000, thresholds above this number will have the same
|
||||||
effect as a threshold of 40000.
|
effect as a threshold of 40000.
|
||||||
Default value depends on the number of parent aggregations that multiple
|
Default value depends on the number of parent aggregations that multiple
|
||||||
create buckets (such as terms or histograms).
|
create buckets (such as terms or histograms).
|
||||||
<2> experimental[] If you computed a hash on client-side, stored it into your documents and want
|
<2> If you computed a hash on client-side, stored it into your documents and want
|
||||||
Elasticsearch to use them to compute counts using this hash function without
|
Elasticsearch to use them to compute counts using this hash function without
|
||||||
rehashing values, it is possible to specify `rehash: false`. Default value is
|
rehashing values, it is possible to specify `rehash: false`. Default value is
|
||||||
`true`. Please note that the hash must be indexed as a long when `rehash` is
|
`true`. Please note that the hash must be indexed as a long when `rehash` is
|
||||||
|
@ -152,3 +152,6 @@ however since hashes need to be computed on the fly.
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
||||||
|
TIP: The `script` parameter expects an inline script. Use `script_id` for indexed scripts and `script_file` for scripts in the `config/scripts/` directory.
|
||||||
|
|
||||||
|
|
|
@ -91,6 +91,8 @@ Computing the grades stats based on a script:
|
||||||
}
|
}
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
||||||
|
TIP: The `script` parameter expects an inline script. Use `script_id` for indexed scripts and `script_file` for scripts in the `config/scripts/` directory.
|
||||||
|
|
||||||
===== Value Script
|
===== Value Script
|
||||||
|
|
||||||
It turned out that the exam was way above the level of the students and a grade correction needs to be applied. We can use value script to get the new stats:
|
It turned out that the exam was way above the level of the students and a grade correction needs to be applied. We can use value script to get the new stats:
|
||||||
|
|
|
@ -1,8 +1,6 @@
|
||||||
[[search-aggregations-metrics-geobounds-aggregation]]
|
[[search-aggregations-metrics-geobounds-aggregation]]
|
||||||
=== Geo Bounds Aggregation
|
=== Geo Bounds Aggregation
|
||||||
|
|
||||||
experimental[]
|
|
||||||
|
|
||||||
A metric aggregation that computes the bounding box containing all geo_point values for a field.
|
A metric aggregation that computes the bounding box containing all geo_point values for a field.
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -44,6 +44,7 @@ Computing the max price value across all document, this time using a script:
|
||||||
}
|
}
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
||||||
|
TIP: The `script` parameter expects an inline script. Use `script_id` for indexed scripts and `script_file` for scripts in the `config/scripts/` directory.
|
||||||
|
|
||||||
==== Value Script
|
==== Value Script
|
||||||
|
|
||||||
|
|
|
@ -44,6 +44,7 @@ Computing the min price value across all document, this time using a script:
|
||||||
}
|
}
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
||||||
|
TIP: The `script` parameter expects an inline script. Use `script_id` for indexed scripts and `script_file` for scripts in the `config/scripts/` directory.
|
||||||
|
|
||||||
==== Value Script
|
==== Value Script
|
||||||
|
|
||||||
|
|
|
@ -113,6 +113,8 @@ a script to convert them on-the-fly:
|
||||||
script to generate values which percentiles are calculated on
|
script to generate values which percentiles are calculated on
|
||||||
<2> Scripting supports parameterized input just like any other script
|
<2> Scripting supports parameterized input just like any other script
|
||||||
|
|
||||||
|
TIP: The `script` parameter expects an inline script. Use `script_id` for indexed scripts and `script_file` for scripts in the `config/scripts/` directory.
|
||||||
|
|
||||||
[[search-aggregations-metrics-percentile-aggregation-approximation]]
|
[[search-aggregations-metrics-percentile-aggregation-approximation]]
|
||||||
==== Percentiles are (usually) approximate
|
==== Percentiles are (usually) approximate
|
||||||
|
|
||||||
|
@ -153,7 +155,7 @@ it. It would not be the case on more skewed distributions.
|
||||||
[[search-aggregations-metrics-percentile-aggregation-compression]]
|
[[search-aggregations-metrics-percentile-aggregation-compression]]
|
||||||
==== Compression
|
==== Compression
|
||||||
|
|
||||||
experimental[]
|
experimental[The `compression` parameter is specific to the current internal implementation of percentiles, and may change in the future]
|
||||||
|
|
||||||
Approximate algorithms must balance memory utilization with estimation accuracy.
|
Approximate algorithms must balance memory utilization with estimation accuracy.
|
||||||
This balance can be controlled using a `compression` parameter:
|
This balance can be controlled using a `compression` parameter:
|
||||||
|
|
|
@ -84,3 +84,5 @@ a script to convert them on-the-fly:
|
||||||
<1> The `field` parameter is replaced with a `script` parameter, which uses the
|
<1> The `field` parameter is replaced with a `script` parameter, which uses the
|
||||||
script to generate values which percentile ranks are calculated on
|
script to generate values which percentile ranks are calculated on
|
||||||
<2> Scripting supports parameterized input just like any other script
|
<2> Scripting supports parameterized input just like any other script
|
||||||
|
|
||||||
|
TIP: The `script` parameter expects an inline script. Use `script_id` for indexed scripts and `script_file` for scripts in the `config/scripts/` directory.
|
||||||
|
|
|
@ -84,24 +84,28 @@ $ curl -XPUT 'http://localhost:9200/transactions/stock/1' -d '
|
||||||
"type": "sale",
|
"type": "sale",
|
||||||
"amount": 80
|
"amount": 80
|
||||||
}
|
}
|
||||||
|
'
|
||||||
|
|
||||||
$ curl -XPUT 'http://localhost:9200/transactions/stock/2' -d '
|
$ curl -XPUT 'http://localhost:9200/transactions/stock/2' -d '
|
||||||
{
|
{
|
||||||
"type": "cost",
|
"type": "cost",
|
||||||
"amount": 10
|
"amount": 10
|
||||||
}
|
}
|
||||||
|
'
|
||||||
|
|
||||||
$ curl -XPUT 'http://localhost:9200/transactions/stock/3' -d '
|
$ curl -XPUT 'http://localhost:9200/transactions/stock/3' -d '
|
||||||
{
|
{
|
||||||
"type": "cost",
|
"type": "cost",
|
||||||
"amount": 30
|
"amount": 30
|
||||||
}
|
}
|
||||||
|
'
|
||||||
|
|
||||||
$ curl -XPUT 'http://localhost:9200/transactions/stock/4' -d '
|
$ curl -XPUT 'http://localhost:9200/transactions/stock/4' -d '
|
||||||
{
|
{
|
||||||
"type": "sale",
|
"type": "sale",
|
||||||
"amount": 130
|
"amount": 130
|
||||||
}
|
}
|
||||||
|
'
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
||||||
Lets say that documents 1 and 3 end up on shard A and documents 2 and 4 end up on shard B. The following is a breakdown of what the aggregation result is
|
Lets say that documents 1 and 3 end up on shard A and documents 2 and 4 end up on shard B. The following is a breakdown of what the aggregation result is
|
||||||
|
|
|
@ -53,6 +53,8 @@ Computing the grades stats based on a script:
|
||||||
}
|
}
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
||||||
|
TIP: The `script` parameter expects an inline script. Use `script_id` for indexed scripts and `script_file` for scripts in the `config/scripts/` directory.
|
||||||
|
|
||||||
===== Value Script
|
===== Value Script
|
||||||
|
|
||||||
It turned out that the exam was way above the level of the students and a grade correction needs to be applied. We can use a value script to get the new stats:
|
It turned out that the exam was way above the level of the students and a grade correction needs to be applied. We can use a value script to get the new stats:
|
||||||
|
|
|
@ -55,6 +55,8 @@ Computing the intraday return based on a script:
|
||||||
}
|
}
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
||||||
|
TIP: The `script` parameter expects an inline script. Use `script_id` for indexed scripts and `script_file` for scripts in the `config/scripts/` directory.
|
||||||
|
|
||||||
===== Value Script
|
===== Value Script
|
||||||
|
|
||||||
Computing the sum of squares over all stock tick changes:
|
Computing the sum of squares over all stock tick changes:
|
||||||
|
|
|
@ -34,6 +34,7 @@ The name of the aggregation (`grades_count` above) also serves as the key by whi
|
||||||
retrieved from the returned response.
|
retrieved from the returned response.
|
||||||
|
|
||||||
==== Script
|
==== Script
|
||||||
|
|
||||||
Counting the values generated by a script:
|
Counting the values generated by a script:
|
||||||
|
|
||||||
[source,js]
|
[source,js]
|
||||||
|
@ -46,3 +47,5 @@ Counting the values generated by a script:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
||||||
|
TIP: The `script` parameter expects an inline script. Use `script_id` for indexed scripts and `script_file` for scripts in the `config/scripts/` directory.
|
||||||
|
|
|
@ -0,0 +1,6 @@
|
||||||
|
[[search-aggregations-reducer]]
|
||||||
|
|
||||||
|
include::reducer/derivative-aggregation.asciidoc[]
|
||||||
|
include::reducer/max-bucket-aggregation.asciidoc[]
|
||||||
|
include::reducer/min-bucket-aggregation.asciidoc[]
|
||||||
|
include::reducer/movavg-aggregation.asciidoc[]
|