From f837213d6f1ff4262c1a43d6ea7f15d0d91de271 Mon Sep 17 00:00:00 2001 From: Karel Minarik Date: Thu, 22 Sep 2011 15:44:12 +0200 Subject: [PATCH] Reformatted and amended the example configuration file Edited elasticsearch.yml: * Separated different sections (using headers) * Added more information about nodes configuration * Added more information about various index configurations and their effects * Added information about setting network and HTTP configuration * Reworded information on gateway, recovery, discovery The example configuration file should allow operations stuff to quickly get a sense of ElasticSearch features relevant for systems support, and to understand how to configure node, cluster, network and discovery settings. The aim here is to vaguely respect the most often changed configuration settings, while having some top-to-bottom conceptual integrity. Table of Contents: * Cluster * Node * Index * Paths * Memory * Network And HTTP * Gateway * Recovery Throttling * Discovery --- config/elasticsearch.yml | 330 ++++++++++++++++++++++++++++++++++----- 1 file changed, 295 insertions(+), 35 deletions(-) diff --git a/config/elasticsearch.yml b/config/elasticsearch.yml index febbb8a86c7..b106343ca55 100644 --- a/config/elasticsearch.yml +++ b/config/elasticsearch.yml @@ -1,42 +1,302 @@ -# The cluster name -#cluster.name: elasticsearch +##################### ElasticSearch Configuration Example ##################### -# Path Settings -#path.conf: /path/to/conf -#path.data: /path/to/data -#path.work: /path/to/work -#path.logs: /path/to/logs -#path.plugins: /path/to/plugins +# This file contains an overview of various configuration settings, +# targeted at operations staff. Application developers should +# consult the guide at . +# +# The installation procedure is covered at +# . +# +# ElasticSearch comes with reasonable defaults for most settings, +# so you can try it out without bothering with configuration. +# +# Most of the time, these defaults are just fine for running a production +# cluster. If you're fine-tuning your cluster, or wondering about the +# effect of certain configuration option, please _do ask_ on the +# mailing list or IRC channel [http://elasticsearch.org/community]. -# Force all memory to be locked, forcing the JVM to never swap -# When setting it, make sure ES_MIN_MEM and ES_MAX_MEM are set to the same value -# and that the machine has enough memory to allocate. -#bootstrap.mlockall: true +# See +# for information on supported formats and syntax for the configuration file. -# Gateway Settings -# Controls when to start the initial recovery process when starting a new cluster -# allowing for better reused of existing data during recovery. -#gateway.recover_after_nodes: 1 -#gateway.recover_after_time: 5m -#gateway.expected_nodes: 2 -# Recovery Throttling -# The number of concurrent recoveries happening on a node -#cluster.routing.allocation.node_initial_primaries_recoveries: 4 -#cluster.routing.allocation.node_concurrent_recoveries: 2 -# Peer shard recovery size based throttling (set to 100mb for example to enable) -#indices.recovery.max_size_per_sec: 0 -# Number open concurrent recovery streams allows -#indices.recovery.concurrent_streams: 5 +################################### Cluster ################################### -# Controls the minimum number of master eligible nodes this node should "see" -# in order to operate within the cluster. -# Set this to a higher value (2-4) when running more than 2 nodes in the cluster -#discovery.zen.minimum_master_nodes: 1 +# Cluster name identifies your cluster for auto-discovery. If you're running +# multiple clusters on the same network, make sure you're using unique names. +# +# cluster.name: elasticsearch -# The time to wait for ping responses from other nodes when doing node discovery -#discovery.zen.ping.timeout: 3s -# Unicast Discovery (disable multicast) -#discovery.zen.ping.multicast.enabled: false -#discovery.zen.ping.unicast.hosts: ["host1", "host2"] +#################################### Node ##################################### + +# Node names are generated dynamically on startup, so you're relieved +# from configuring them manually. You can tie this node to a specific name: +# +# node.name: "Franz Kafka" + +# Every node can be configured to allow or deny being eligible as the master, +# and to allow or deny to store the data. +# +# Allow this node to be eligible as a master node (enabled by default): +# +# node.master: true +# +# Allow this node to store data (enabled by default): +# +# node.data: true + +# You can exploit these settings to design advanced cluster topologies. +# +# 1. You want this node to never become a master node, only to hold data. +# This will be the "workhorse" of your cluster. +# +# node.master: false +# node.data: true +# +# 2. You want this node to only serve as a master: to not store any data and +# to have free resources. This will be the "coordinator" of your cluster. +# +# node.master: true +# node.data: false +# +# 3. You want this node to be neither master nor data node, but +# to act as a "search load balancer" (fetching data from nodes, +# aggregating results, etc.) +# +# node.master: false +# node.data: false + +# Use the Cluster Health API [http://localhost:9200/_cluster/health], the +# Node Info API [http://localhost:9200/_cluster/nodes] or GUI tools +# such as and +# to inspect the cluster state. + + +#################################### Index #################################### + +# You can set a number of options (such as shard/replica options, mapping +# or analyzer definitions, translog settings, ...) for indices globally, +# in this file. +# +# Note, that it makes more sense to configure index settings specifically for +# a certain index, either when creating it or by using the index templates API. +# +# See and +# +# for more information. + +# Set the number of shards (splits) of an index (5 by default): +# +# index.number_of_shards: 5 + +# Set the number of replicas (additional copies) of an index (1 by default): +# +# index.number_of_replicas: 1 + +# Note, that for development on a local machine, with small indices, it usually +# makes sense to "disable" the distributed features: +# +# index.number_of_shards: 1 +# index.number_of_replicas: 0 + +# These settings directly affect the performance of index and search operations +# in your cluster. Assuming you have enough machines to hold shards and +# replicas, the rule of thumb is: +# +# 1. Having more *shards* enhances the _indexing_ performance and allows to +# _distribute_ a big index across machines. +# 2. Having more *replicas* enhances the _search_ performance and improves the +# cluster _availability_. +# +# The "number_of_shards" is a one-time setting for an index. +# +# The "number_of_replicas" can be increased or decreased anytime, +# by using the Index Update Settings API. +# +# ElasticSearch takes care about load balancing, relocating, gathering the +# results from nodes, etc. Experiment with different settings to fine-tune +# your setup. + +# Use the Index Status API () to inspect +# the index status. + + +#################################### Paths #################################### + +# Path to directory containing configuration (this file and logging.yml): +# +# path.conf: /path/to/conf + +# Path to directory where to store index data allocated for this node: +# +# path.data: /path/to/data + +# Path to temporary files: +# +# path.work: /path/to/work + +# Path to log files: +# +# path.logs: /path/to/logs + +# Path to where plugins are installed: +# +# path.plugins: /path/to/plugins + + +################################### Memory #################################### + +# ElasticSearch performs poorly when JVM starts swapping: you should ensure that +# it _never_ swaps. +# +# Set this property to true to lock the memory: +# +# bootstrap.mlockall: true + +# Make sure that the ES_MIN_MEM and ES_MAX_MEM environment variables are set +# to the same value, and that the machine has enough memory to allocate +# for ElasticSearch, leaving enough memory for the operating system itself. +# +# You should also make sure that the ElasticSearch process is allowed to lock +# the memory, eg. by using `ulimit -l unlimited`. + + +############################## Network And HTTP ############################### + +# ElasticSearch, by default, binds itself to the 0.0.0.0 address, and listens +# on port 9200 for HTTP traffic and on port 9300 for node-to-node communication. + +# Set the bind address specifically (IPv4 or IPv6): +# +# network.bind_host: 192.168.0.1 + +# Set the address other nodes will use to communicate with this node. If not +# set, it is automatically derived. It must point to an actual IP address. +# +# network.publish_host: 192.168.0.1 + +# Set both 'bind_host' and 'publish_host': +# +# network.host: 192.168.0.1 + +# Set a custom port for the node to node communication (9300 by default): +# +# transport.port: 9300 + +# Enable compression for all communication between (disabled by default): +# +# transport.tcp.compress: true + +# Set a custom port to listen for HTTP traffic: +# +# http.port: 9200 + +# Set a custom allowed content length: +# +# http.max_content_length: 100mb + +# Disable HTTP completely: +# +# http.enabled: false + + +################################### Gateway ################################### + +# The gateway allows for persisting the cluster state between full cluster +# restarts. Every change to the state (such as adding an index) will be stored +# in the gateway, and when the cluster starts up for the first time, +# it will read its state from the gateway. + +# There are several types of gateway implementations. For more information, +# see . + +# The default gateway type is the "local" gateway (recommended): +# +# gateway.type: local + +# Settings below control how and when to start the initial recovery process on +# a full cluster restart (to reuse as much local data as possible). + +# Allow recovery process after N nodes in a cluster are up: +# +# gateway.recover_after_nodes: 1 + +# Set the timeout to initiate the recovery process, once the N nodes +# from previous setting are up (accepts time value): +# +# gateway.recover_after_time: 5m + +# Set how many nodes are expected in this cluster. Once these N nodes +# are up, begin recovery process immediately: +# +# gateway.expected_nodes: 2 + + +############################# Recovery Throttling ############################# + +# These settings allow to control the process of shards allocation between +# nodes during initial recovery, replica allocation, rebalancing, +# or when adding and removing nodes. + +# Set the number of concurrent recoveries happening on a node: +# +# 1. During the initial recovery +# +# cluster.routing.allocation.node_initial_primaries_recoveries: 4 +# +# 2. During adding/removing nodes, rebalancing, etc +# +# cluster.routing.allocation.node_concurrent_recoveries: 2 + +# Set to throttle throughput when recovering (eg. 100mb, by default unlimited): +# +# indices.recovery.max_size_per_sec: 0 + +# Set to limit the number of open concurrent streams when +# recovering a shard from a peer: +# +# indices.recovery.concurrent_streams: 5 + + +################################## Discovery ################################## + +# Discovery infrastructure ensures nodes can be found within a cluster +# and master node is elected. Multicast discovery is the default. + +# Set to ensure a node sees N other master eligible nodes to be considered +# operational within the cluster. Set this option to a higher value (2-4) +# for large clusters: +# +# discovery.zen.minimum_master_nodes: 1 + +# Set the time to wait for ping responses from other nodes when discovering. +# Set this option to a higher value on a slow or congested network +# to minimize discovery failures: +# +# discovery.zen.ping.timeout: 3s + +# See +# for more information. + +# Unicast discovery allows to explicitly control which nodes will be used +# to discover the cluster. It can be used when multicast is not present, +# or to restrict the cluster communication-wise. +# +# 1. Disable multicast discovery (enabled by default): +# +# discovery.zen.ping.multicast.enabled: false +# +# 2. Configure an initial list of master nodes in the cluster +# to perform discovery when new nodes (master or data) are started: +# +# discovery.zen.ping.unicast.hosts: ["host1", "host2:port", "host3[portX-portY]"] + +# EC2 discovery allows to use AWS EC2 API in order to perform discovery. +# +# You have to install the cloud-aws plugin for enabling the EC2 discovery. +# +# See +# for more information. +# +# See +# for a step-by-step tutorial.