Merge remote-tracking branch 'es/master' into feature/ingest

This commit is contained in:
Martijn van Groningen 2015-11-30 10:24:27 +01:00
commit 467a47670c
20 changed files with 340 additions and 196 deletions

View File

@ -45,7 +45,7 @@ subprojects {
}
}
}
}
}
extraArchive {
javadoc = true
tests = false
@ -86,8 +86,8 @@ subprojects {
tasks.withType(Jar) {
into('META-INF') {
from project.rootProject.rootDir
include 'LICENSE.txt'
include 'NOTICE.txt'
include 'LICENSE.txt'
include 'NOTICE.txt'
}
}
// ignore missing javadocs
@ -101,12 +101,18 @@ subprojects {
}
}
/* Sets up the dependencies that we build as part of this project but
register as thought they were external to resolve internally. We register
them as external dependencies so the build plugin that we use can be used
to build elasticsearch plugins outside of the elasticsearch source tree. */
ext.projectSubstitutions = [
"org.elasticsearch:rest-api-spec:${version}": ':rest-api-spec',
"org.elasticsearch:elasticsearch:${version}": ':core',
"org.elasticsearch:test-framework:${version}": ':test-framework',
"org.elasticsearch.distribution.zip:elasticsearch:${version}": ':distribution:zip',
"org.elasticsearch.distribution.tar:elasticsearch:${version}": ':distribution:tar'
"org.elasticsearch.distribution.tar:elasticsearch:${version}": ':distribution:tar',
"org.elasticsearch.distribution.rpm:elasticsearch:${version}": ':distribution:rpm',
"org.elasticsearch.distribution.deb:elasticsearch:${version}": ':distribution:deb',
]
configurations.all {
resolutionStrategy.dependencySubstitution { DependencySubstitutions subs ->
@ -226,7 +232,7 @@ class Run extends DefaultTask {
)
public void setDebug(boolean enabled) {
project.project(':distribution').run.clusterConfig.debug = enabled
}
}
}
task run(type: Run) {
dependsOn ':distribution:run'
@ -234,4 +240,3 @@ task run(type: Run) {
group = 'Verification'
impliesSubProjects = true
}

View File

@ -27,9 +27,7 @@ import org.gradle.api.*
import org.gradle.api.artifacts.Configuration
import org.gradle.api.file.FileCollection
import org.gradle.api.logging.Logger
import org.gradle.api.tasks.Copy
import org.gradle.api.tasks.Delete
import org.gradle.api.tasks.Exec
import org.gradle.api.tasks.*
import java.nio.file.Paths
@ -132,6 +130,12 @@ class ClusterFormationTasks {
/** Adds a task to extract the elasticsearch distribution */
static Task configureExtractTask(String name, Project project, Task setup, NodeInfo node) {
List extractDependsOn = [project.configurations.elasticsearchDistro, setup]
/* project.configurations.elasticsearchDistro.singleFile will be an
external artifact if this is being run by a plugin not living in the
elasticsearch source tree. If this is a plugin built in the
elasticsearch source tree or this is a distro in the elasticsearch
source tree then this should be the version of elasticsearch built
by the source tree. If it isn't then Bad Things(TM) will happen. */
Task extract
switch (node.config.distribution) {
case 'zip':
@ -148,6 +152,33 @@ class ClusterFormationTasks {
into node.baseDir
}
break;
case 'rpm':
File rpmDatabase = new File(node.baseDir, 'rpm-database')
File rpmExtracted = new File(node.baseDir, 'rpm-extracted')
/* Delay reading the location of the rpm file until task execution */
Object rpm = "${ -> project.configurations.elasticsearchDistro.singleFile}"
extract = project.tasks.create(name: name, type: LoggedExec, dependsOn: extractDependsOn) {
commandLine 'rpm', '--badreloc', '--nodeps', '--noscripts', '--notriggers',
'--dbpath', rpmDatabase,
'--relocate', "/=${rpmExtracted}",
'-i', rpm
doFirst {
rpmDatabase.deleteDir()
rpmExtracted.deleteDir()
}
}
break;
case 'deb':
/* Delay reading the location of the deb file until task execution */
File debExtracted = new File(node.baseDir, 'deb-extracted')
Object deb = "${ -> project.configurations.elasticsearchDistro.singleFile}"
extract = project.tasks.create(name: name, type: LoggedExec, dependsOn: extractDependsOn) {
commandLine 'dpkg-deb', '-x', deb, debExtracted
doFirst {
debExtracted.deleteDir()
}
}
break;
default:
throw new InvalidUserDataException("Unknown distribution: ${node.config.distribution}")
}
@ -172,7 +203,7 @@ class ClusterFormationTasks {
Task writeConfig = project.tasks.create(name: name, type: DefaultTask, dependsOn: setup)
writeConfig.doFirst {
File configFile = new File(node.homeDir, 'config/elasticsearch.yml')
File configFile = new File(node.confDir, 'elasticsearch.yml')
logger.info("Configuring ${configFile}")
configFile.setText(esConfig.collect { key, value -> "${key}: ${value}" }.join('\n'), 'UTF-8')
}
@ -185,7 +216,8 @@ class ClusterFormationTasks {
Copy copyConfig = project.tasks.create(name: name, type: Copy, dependsOn: setup)
copyConfig.into(new File(node.homeDir, 'config')) // copy must always have a general dest dir, even though we don't use it
for (Map.Entry<String,Object> extraConfigFile : node.config.extraConfigFiles.entrySet()) {
Closure delayedSrc = {
copyConfig.doFirst {
// make sure the copy won't be a no-op or act on a directory
File srcConfigFile = project.file(extraConfigFile.getValue())
if (srcConfigFile.isDirectory()) {
throw new GradleException("Source for extraConfigFile must be a file: ${srcConfigFile}")
@ -193,11 +225,10 @@ class ClusterFormationTasks {
if (srcConfigFile.exists() == false) {
throw new GradleException("Source file for extraConfigFile does not exist: ${srcConfigFile}")
}
return srcConfigFile
}
File destConfigFile = new File(node.homeDir, 'config/' + extraConfigFile.getKey())
copyConfig.from(delayedSrc)
.into(destConfigFile.canonicalFile.parentFile)
copyConfig.into(destConfigFile.canonicalFile.parentFile)
.from({ extraConfigFile.getValue() }) // wrap in closure to delay resolution to execution time
.rename { destConfigFile.name }
}
return copyConfig
@ -284,18 +315,27 @@ class ClusterFormationTasks {
/** Adds a task to start an elasticsearch node with the given configuration */
static Task configureStartTask(String name, Project project, Task setup, NodeInfo node) {
String executable
List<String> esArgs = []
if (Os.isFamily(Os.FAMILY_WINDOWS)) {
executable = 'cmd'
esArgs.add('/C')
esArgs.add('call')
} else {
executable = 'sh'
}
// this closure is converted into ant nodes by groovy's AntBuilder
Closure antRunner = { AntBuilder ant ->
ant.exec(executable: node.executable, spawn: node.config.daemonize, dir: node.cwd, taskname: 'elasticsearch') {
node.env.each { key, value -> env(key: key, value: value) }
node.args.each { arg(value: it) }
}
}
// this closure is the actual code to run elasticsearch
Closure elasticsearchRunner = {
// Due to how ant exec works with the spawn option, we lose all stdout/stderr from the
// process executed. To work around this, when spawning, we wrap the elasticsearch start
// command inside another shell script, which simply internally redirects the output
// of the real elasticsearch script. This allows ant to keep the streams open with the
// dummy process, but us to have the output available if there is an error in the
// elasticsearch start script
if (node.config.daemonize) {
node.writeWrapperScript()
}
// we must add debug options inside the closure so the config is read at execution time, as
// gradle task options are not processed until the end of the configuration phase
if (node.config.debug) {
@ -303,37 +343,6 @@ class ClusterFormationTasks {
node.env['JAVA_OPTS'] = '-agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=8000'
}
// Due to how ant exec works with the spawn option, we lose all stdout/stderr from the
// process executed. To work around this, when spawning, we wrap the elasticsearch start
// command inside another shell script, which simply internally redirects the output
// of the real elasticsearch script. This allows ant to keep the streams open with the
// dummy process, but us to have the output available if there is an error in the
// elasticsearch start script
String script = node.esScript
if (node.config.daemonize) {
String scriptName = 'run'
String argsPasser = '"$@"'
String exitMarker = "; if [ \$? != 0 ]; then touch run.failed; fi"
if (Os.isFamily(Os.FAMILY_WINDOWS)) {
scriptName += '.bat'
argsPasser = '%*'
exitMarker = "\r\n if \"%errorlevel%\" neq \"0\" ( type nul >> run.failed )"
}
File wrapperScript = new File(node.cwd, scriptName)
wrapperScript.setText("\"${script}\" ${argsPasser} > run.log 2>&1 ${exitMarker}", 'UTF-8')
script = wrapperScript.toString()
}
ant.exec(executable: executable, spawn: node.config.daemonize, dir: node.cwd, taskname: 'elasticsearch') {
node.env.each { key, value -> env(key: key, value: value) }
arg(value: script)
node.args.each { arg(value: it) }
}
}
// this closure is the actual code to run elasticsearch
Closure elasticsearchRunner = {
node.getCommandString().eachLine { line -> logger.info(line) }
if (logger.isInfoEnabled() || node.config.daemonize == false) {
@ -405,14 +414,19 @@ class ClusterFormationTasks {
// We already log the command at info level. No need to do it twice.
node.getCommandString().eachLine { line -> logger.error(line) }
}
// the waitfor failed, so dump any output we got (may be empty if info logging, but that is ok)
logger.error("Node ${node.nodeNum} ant output:")
node.buffer.toString('UTF-8').eachLine { line -> logger.error(line) }
logger.error("Node ${node.nodeNum} output:")
logger.error("|-----------------------------------------")
logger.error("| failure marker exists: ${node.failedMarker.exists()}")
logger.error("| pid file exists: ${node.pidFile.exists()}")
// the waitfor failed, so dump any output we got (if info logging this goes directly to stdout)
logger.error("|\n| [ant output]")
node.buffer.toString('UTF-8').eachLine { line -> logger.error("| ${line}") }
// also dump the log file for the startup script (which will include ES logging output to stdout)
if (node.startLog.exists()) {
logger.error("Node ${node.nodeNum} log:")
node.startLog.eachLine { line -> logger.error(line) }
logger.error("|\n| [log]")
node.startLog.eachLine { line -> logger.error("| ${line}") }
}
logger.error("|-----------------------------------------")
}
throw new GradleException(msg)
}

View File

@ -18,6 +18,7 @@
*/
package org.elasticsearch.gradle.test
import org.apache.tools.ant.taskdefs.condition.Os
import org.elasticsearch.gradle.VersionProperties
import org.gradle.api.InvalidUserDataException
import org.gradle.api.Project
@ -45,6 +46,12 @@ class NodeInfo {
/** elasticsearch home dir */
File homeDir
/** config directory */
File confDir
/** THE config file */
File configFile
/** working directory for the node process */
File cwd
@ -63,8 +70,14 @@ class NodeInfo {
/** arguments to start the node with */
List<String> args
/** Executable to run the bin/elasticsearch with, either cmd or sh */
String executable
/** Path to the elasticsearch start script */
String esScript
File esScript
/** script to run when running in the background */
File wrapperScript
/** buffer for ant output when starting this node */
ByteArrayOutputStream buffer = new ByteArrayOutputStream()
@ -77,34 +90,75 @@ class NodeInfo {
baseDir = new File(project.buildDir, "cluster/${task.name} node${nodeNum}")
pidFile = new File(baseDir, 'es.pid')
homeDir = homeDir(baseDir, config.distribution)
confDir = confDir(baseDir, config.distribution)
configFile = new File(confDir, 'elasticsearch.yml')
cwd = new File(baseDir, "cwd")
failedMarker = new File(cwd, 'run.failed')
startLog = new File(cwd, 'run.log')
pluginsTmpDir = new File(baseDir, "plugins tmp")
args = []
if (Os.isFamily(Os.FAMILY_WINDOWS)) {
executable = 'cmd'
args.add('/C')
args.add('"') // quote the entire command
wrapperScript = new File(cwd, "run.bat")
esScript = new File(homeDir, 'bin/elasticsearch.bat')
} else {
executable = 'sh'
wrapperScript = new File(cwd, "run")
esScript = new File(homeDir, 'bin/elasticsearch')
}
if (config.daemonize) {
args.add("${wrapperScript}")
} else {
args.add("${esScript}")
}
env = [
'JAVA_HOME' : project.javaHome,
'ES_GC_OPTS': config.jvmArgs // we pass these with the undocumented gc opts so the argline can set gc, etc
]
args = config.systemProperties.collect { key, value -> "-D${key}=${value}" }
args.addAll(config.systemProperties.collect { key, value -> "-D${key}=${value}" })
for (Map.Entry<String, String> property : System.properties.entrySet()) {
if (property.getKey().startsWith('es.')) {
args.add("-D${property.getKey()}=${property.getValue()}")
}
}
// running with cmd on windows will look for this with the .bat extension
esScript = new File(homeDir, 'bin/elasticsearch').toString()
args.add("-Des.path.conf=${confDir}")
if (Os.isFamily(Os.FAMILY_WINDOWS)) {
args.add('"') // end the entire command, quoted
}
}
/** Returns debug string for the command that started this node. */
String getCommandString() {
String esCommandString = "Elasticsearch node ${nodeNum} command: ${esScript} "
esCommandString += args.join(' ')
esCommandString += '\nenvironment:'
env.each { k, v -> esCommandString += "\n ${k}: ${v}" }
String esCommandString = "\nNode ${nodeNum} configuration:\n"
esCommandString += "|-----------------------------------------\n"
esCommandString += "| cwd: ${cwd}\n"
esCommandString += "| command: ${executable} ${args.join(' ')}\n"
esCommandString += '| environment:\n'
env.each { k, v -> esCommandString += "| ${k}: ${v}\n" }
if (config.daemonize) {
esCommandString += "|\n| [${wrapperScript.name}]\n"
wrapperScript.eachLine('UTF-8', { line -> esCommandString += " ${line}\n"})
}
esCommandString += '|\n| [elasticsearch.yml]\n'
configFile.eachLine('UTF-8', { line -> esCommandString += "| ${line}\n" })
esCommandString += "|-----------------------------------------"
return esCommandString
}
void writeWrapperScript() {
String argsPasser = '"$@"'
String exitMarker = "; if [ \$? != 0 ]; then touch run.failed; fi"
if (Os.isFamily(Os.FAMILY_WINDOWS)) {
argsPasser = '%*'
exitMarker = "\r\n if \"%errorlevel%\" neq \"0\" ( type nul >> run.failed )"
}
wrapperScript.setText("\"${esScript}\" ${argsPasser} > run.log 2>&1 ${exitMarker}", 'UTF-8')
}
/** Returns the http port for this node */
int httpPort() {
return config.baseHttpPort + nodeNum
@ -122,10 +176,28 @@ class NodeInfo {
case 'zip':
case 'tar':
path = "elasticsearch-${VersionProperties.elasticsearch}"
break;
break
case 'rpm':
case 'deb':
path = "${distro}-extracted/usr/share/elasticsearch"
break
default:
throw new InvalidUserDataException("Unknown distribution: ${distro}")
}
return new File(baseDir, path)
}
static File confDir(File baseDir, String distro) {
String Path
switch (distro) {
case 'zip':
case 'tar':
return new File(homeDir(baseDir, distro), 'config')
case 'rpm':
case 'deb':
return new File(baseDir, "${distro}-extracted/etc/elasticsearch")
default:
throw new InvalidUserDataException("Unkown distribution: ${distro}")
}
}
}

View File

@ -20,6 +20,8 @@
package org.elasticsearch.search.suggest.completion;
import com.carrotsearch.hppc.ObjectLongHashMap;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
@ -32,28 +34,35 @@ import java.io.IOException;
public class CompletionFieldStats {
public static CompletionStats completionStats(IndexReader indexReader, String ... fields) {
/**
* Returns total in-heap bytes used by all suggesters. This method has CPU cost <code>O(numIndexedFields)</code>.
*
* @param fieldNamePatterns if non-null, any completion field name matching any of these patterns will break out its in-heap bytes
* separately in the returned {@link CompletionStats}
*/
public static CompletionStats completionStats(IndexReader indexReader, String ... fieldNamePatterns) {
long sizeInBytes = 0;
ObjectLongHashMap<String> completionFields = null;
if (fields != null && fields.length > 0) {
completionFields = new ObjectLongHashMap<>(fields.length);
if (fieldNamePatterns != null && fieldNamePatterns.length > 0) {
completionFields = new ObjectLongHashMap<>(fieldNamePatterns.length);
}
for (LeafReaderContext atomicReaderContext : indexReader.leaves()) {
LeafReader atomicReader = atomicReaderContext.reader();
try {
for (String fieldName : atomicReader.fields()) {
Terms terms = atomicReader.fields().terms(fieldName);
Fields fields = atomicReader.fields();
for (String fieldName : fields) {
Terms terms = fields.terms(fieldName);
if (terms instanceof CompletionTerms) {
// TODO: currently we load up the suggester for reporting its size
long fstSize = ((CompletionTerms) terms).suggester().ramBytesUsed();
if (fields != null && fields.length > 0 && Regex.simpleMatch(fields, fieldName)) {
if (fieldNamePatterns != null && fieldNamePatterns.length > 0 && Regex.simpleMatch(fieldNamePatterns, fieldName)) {
completionFields.addTo(fieldName, fstSize);
}
sizeInBytes += fstSize;
}
}
} catch (IOException ignored) {
throw new ElasticsearchException(ignored);
} catch (IOException ioe) {
throw new ElasticsearchException(ioe);
}
}
return new CompletionStats(sizeInBytes, completionFields);

View File

@ -80,6 +80,10 @@ grant {
// TODO: look into this and decide if users should simply set the actual sysprop?!
permission java.util.PropertyPermission "org.jboss.netty.epollBugWorkaround", "write";
// Netty SelectorUtil wants to change this, because of https://bugs.openjdk.java.net/browse/JDK-6427854
// the bug says it only happened rarely, and that its fixed, but apparently it still happens rarely!
permission java.util.PropertyPermission "sun.nio.ch.bugLevel", "write";
// needed by lucene SPI currently
permission java.lang.RuntimePermission "getClassLoader";

View File

@ -143,9 +143,7 @@ configure(subprojects.findAll { it.name == 'zip' || it.name == 'tar' }) {
* MavenFilteringHack or any other copy-style action.
*/
configure(subprojects.findAll { it.name == 'deb' || it.name == 'rpm' }) {
// Currently disabled these because they are broken.
// integTest.enabled = Os.isFamily(Os.FAMILY_WINDOWS) == false
integTest.enabled = false
integTest.enabled = Os.isFamily(Os.FAMILY_WINDOWS) == false
File packagingFiles = new File(buildDir, 'packaging')
project.ext.packagingFiles = packagingFiles
task processPackagingFiles(type: Copy) {

View File

@ -19,6 +19,7 @@
task buildDeb(type: Deb) {
dependsOn dependencyFiles, preparePackagingFiles
baseName 'elasticsearch' // this is what pom generation uses for artifactId
// Follow elasticsearch's deb file naming convention
archiveName "${packageName}-${project.version}.deb"
packageGroup 'web'
@ -34,7 +35,15 @@ task buildDeb(type: Deb) {
}
artifacts {
'default' buildDeb
archives buildDeb
}
integTest.dependsOn buildDeb
integTest {
/* We use real deb tools to extract the deb file for testing so we have to
skip the test if they aren't around. */
enabled = new File('/usr/bin/dpkg-deb').exists() || // Standard location
new File('/usr/local/bin/dpkg-deb').exists() // Homebrew location
dependsOn buildDeb
clusterConfig.distribution = 'deb'
}

View File

@ -19,8 +19,9 @@
task buildRpm(type: Rpm) {
dependsOn dependencyFiles, preparePackagingFiles
baseName 'elasticsearch' // this is what pom generation uses for artifactId
// Follow elasticsearch's rpm file naming convention
archiveName = "${packageName}-${project.version}.rpm"
archiveName "${packageName}-${project.version}.rpm"
packageGroup 'Application/Internet'
prefix '/usr'
packager 'Elasticsearch'
@ -31,7 +32,16 @@ task buildRpm(type: Rpm) {
}
artifacts {
'default' buildRpm
archives buildRpm
}
integTest.dependsOn buildRpm
integTest {
/* We use real rpm tools to extract the rpm file for testing so we have to
skip the test if they aren't around. */
enabled = new File('/bin/rpm').exists() || // Standard location
new File('/usr/bin/rpm').exists() || // Debian location
new File('/usr/local/bin/rpm').exists() // Homebrew location
dependsOn buildRpm
clusterConfig.distribution = 'rpm'
}

View File

@ -56,5 +56,6 @@ These community plugins appear to have been abandoned:
* https://github.com/endgameinc/elasticsearch-term-plugin[Terms Component Plugin] (by Endgame Inc.)
* https://github.com/etsy/es-restlog[REST Request Logging Plugin] (by Etsy/Shikhar Bhushan)
include::delete-by-query.asciidoc[]

View File

@ -1,10 +1,17 @@
[[mapping-index-field]]
=== `_index` field
When performing queries across multiple indexes, it is sometimes desirable
to add query clauses that are associated with documents of only certain
indexes. The `_index` field allows matching on the index a document was
indexed into. Its value is accessible in queries, aggregations, scripts, and when sorting:
When performing queries across multiple indexes, it is sometimes desirable to
add query clauses that are associated with documents of only certain indexes.
The `_index` field allows matching on the index a document was indexed into.
Its value is accessible in `term`, or `terms` queries, aggregations,
scripts, and when sorting:
NOTE: The `_index` is exposed as a virtual field -- it is not added to the
Lucene index as a real field. This means that you can use the `_index` field
in a `term` or `terms` query (or any query that is rewritten to a `term`
query, such as the `match`, `query_string` or `simple_query_string` query),
but it does not support `prefix`, `wildcard`, `regexp`, or `fuzzy` queries.
[source,js]
--------------------------

View File

@ -73,7 +73,7 @@ In this example, words that have a document frequency greater than 0.1%
{
"common": {
"body": {
"query": "this is bonsai cool",
"query": "this is bonsai cool",
"cutoff_frequency": 0.001
}
}
@ -93,7 +93,7 @@ all terms required:
{
"common": {
"body": {
"query": "nelly the elephant as a cartoon",
"query": "nelly the elephant as a cartoon",
"cutoff_frequency": 0.001,
"low_freq_operator": "and"
}
@ -113,8 +113,8 @@ which is roughly equivalent to:
{ "term": { "body": "cartoon"}}
],
"should": [
{ "term": { "body": "the"}},
{ "term": { "body": "as"}},
{ "term": { "body": "the"}}
{ "term": { "body": "as"}}
{ "term": { "body": "a"}}
]
}
@ -131,8 +131,8 @@ must be present, for instance:
{
"common": {
"body": {
"query": "nelly the elephant as a cartoon",
"cutoff_frequency": 0.001,
"query": "nelly the elephant as a cartoon",
"cutoff_frequency": 0.001,
"minimum_should_match": 2
}
}
@ -156,8 +156,8 @@ which is roughly equivalent to:
}
},
"should": [
{ "term": { "body": "the"}},
{ "term": { "body": "as"}},
{ "term": { "body": "the"}}
{ "term": { "body": "as"}}
{ "term": { "body": "a"}}
]
}
@ -169,7 +169,7 @@ minimum_should_match
A different
<<query-dsl-minimum-should-match,`minimum_should_match`>>
can be applied for low and high frequency terms with the additional
`low_freq` and `high_freq` parameters Here is an example when providing
`low_freq` and `high_freq` parameters. Here is an example when providing
additional parameters (note the change in structure):
[source,js]
@ -177,8 +177,8 @@ additional parameters (note the change in structure):
{
"common": {
"body": {
"query": "nelly the elephant not as a cartoon",
"cutoff_frequency": 0.001,
"query": "nelly the elephant not as a cartoon",
"cutoff_frequency": 0.001,
"minimum_should_match": {
"low_freq" : 2,
"high_freq" : 3
@ -230,8 +230,8 @@ for high frequency terms is when there are only high frequency terms:
{
"common": {
"body": {
"query": "how not to be",
"cutoff_frequency": 0.001,
"query": "how not to be",
"cutoff_frequency": 0.001,
"minimum_should_match": {
"low_freq" : 2,
"high_freq" : 3

View File

@ -78,7 +78,7 @@ curl -XGET localhost:9200/shirts/_search -d '
},
"aggs": {
"colors": {
"terms": { "field": "color" }, <2>
"terms": { "field": "color" } <2>
},
"color_red": {
"filter": {

View File

@ -84,13 +84,12 @@ field support has the following parameters on top of the already
existing sort options:
`nested_path`::
Defines the on what nested object to sort. The actual
sort field must be a direct field inside this nested object. The default
is to use the most immediate inherited nested object from the sort
field.
Defines on which nested object to sort. The actual
sort field must be a direct field inside this nested object.
When sorting by nested field, this field is mandatory.
`nested_filter`::
A filter the inner objects inside the nested path
A filter that the inner objects inside the nested path
should match with in order for its field values to be taken into account
by sorting. Common case is to repeat the query / filter inside the
nested filter or query. By default no `nested_filter` is active.
@ -98,7 +97,7 @@ existing sort options:
===== Nested sorting example
In the below example `offer` is a field of type `nested`.
The `nested_path` needs to be specified other elasticsearch doesn't on what nested level sort values need to be captured.
The `nested_path` needs to be specified; otherwise, elasticsearch doesn't know on what nested level sort values need to be captured.
[source,js]
--------------------------------------------------

View File

@ -56,7 +56,7 @@ If you encounter an issue, https://github.com/elasticsearch/elasticsearch/issues
We are committed to tracking down and fixing all the issues that are posted.
[float]
=== Use two phase commit for Cluster State publishing (STATUS: ONGOING)
=== Use two phase commit for Cluster State publishing (STATUS: ONGOING, v3.0.0)
A master node in Elasticsearch continuously https://www.elastic.co/guide/en/elasticsearch/reference/current/modules-discovery-zen.html#fault-detection[monitors the cluster nodes]
and removes any node from the cluster that doesn't respond to its pings in a timely
@ -103,38 +103,6 @@ Further issues remain with the retry mechanism:
See {GIT}9967[#9967]. (STATUS: ONGOING)
[float]
=== Wait on incoming joins before electing local node as master (STATUS: ONGOING)
During master election each node pings in order to discover other nodes and validate the liveness of existing
nodes. Based on this information the node either discovers an existing master or, if enough nodes are found
(see https://www.elastic.co/guide/en/elasticsearch/reference/current/modules-discovery-zen.html#master-election[`discovery.zen.minimum_master_nodes`]) a new master will be elected. Currently, the node that is
elected as master will update the cluster state to indicate the result of the election. Other nodes will submit
a join request to the newly elected master node. Instead of immediately processing the election result, the elected master
node should wait for the incoming joins from other nodes, thus validating that the result of the election is properly applied. As soon as enough
nodes have sent their joins request (based on the `minimum_master_nodes` settings) the cluster state is updated.
{GIT}12161[#12161]
[float]
=== Write index metadata on data nodes where shards allocated (STATUS: ONGOING)
Today, index metadata is written only on nodes that are master-eligible, not on
data-only nodes. This is not a problem when running with multiple master nodes,
as recommended, as the loss of all but one master node is still recoverable.
However, users running with a single master node are at risk of losing
their index metadata if the master fails. Instead, this metadata should
also be written on any node where a shard is allocated. {GIT}8823[#8823]
[float]
=== Better file distribution with multiple data paths (STATUS: ONGOING)
Today, a node configured with multiple data paths distributes writes across
all paths by writing one file to each path in turn. This can mean that the
failure of a single disk corrupts many shards at once. Instead, by allocating
an entire shard to a single data path, the extent of the damage can be limited
to just the shards on that disk. {GIT}9498[#9498]
[float]
=== OOM resiliency (STATUS: ONGOING)
@ -142,21 +110,10 @@ The family of circuit breakers has greatly reduced the occurrence of OOM
exceptions, but it is still possible to cause a node to run out of heap
space. The following issues have been identified:
* Set a hard limit on `from`/`size` parameters {GIT}9311[#9311]. (STATUS: ONGOING)
* Set a hard limit on `from`/`size` parameters {GIT}9311[#9311]. (STATUS: DONE, v2.1.0)
* Prevent combinatorial explosion in aggregations from causing OOM {GIT}8081[#8081]. (STATUS: ONGOING)
* Add the byte size of each hit to the request circuit breaker {GIT}9310[#9310]. (STATUS: ONGOING)
[float]
=== Mapping changes should be applied synchronously (STATUS: ONGOING)
When introducing new fields using dynamic mapping, it is possible that the same
field can be added to different shards with different data types. Each shard
will operate with its local data type but, if the shard is relocated, the
data type from the cluster state will be applied to the new shard, which
can result in a corrupt shard. To prevent this, new fields should not
be added to a shard's mapping until confirmed by the master.
{GIT}8688[#8688] (STATUS: DONE)
[float]
=== Loss of documents during network partition (STATUS: ONGOING)
@ -166,26 +123,6 @@ If the node hosts a primary shard at the moment of partition, and ends up being
A test to replicate this condition was added in {GIT}7493[#7493].
[float]
=== Lucene checksums phase 3 (STATUS:ONGOING)
Almost all files in Elasticsearch now have checksums which are validated before use. A few changes remain:
* {GIT}7586[#7586] adds checksums for cluster and index state files. (STATUS: DONE, Fixed in v1.5.0)
* {GIT}9183[#9183] supports validating the checksums on all files when starting a node. (STATUS: DONE, Fixed in v2.0.0)
* {JIRA}5894[LUCENE-5894] lays the groundwork for extending more efficient checksum validation to all files during optimized bulk merges. (STATUS: DONE, Fixed in v2.0.0)
* {GIT}8403[#8403] to add validation of checksums on Lucene `segments_N` files. (STATUS: NOT STARTED)
[float]
=== Add per-segment and per-commit ID to help replication (STATUS: ONGOING)
{JIRA}5895[LUCENE-5895] adds a unique ID for each segment and each commit point. File-based replication (as performed by snapshot/restore) can use this ID to know whether the segment/commit on the source and destination machines are the same. Fixed in Lucene 5.0.
[float]
=== Report shard-level statuses on write operations (STATUS: ONGOING)
Make write calls return the number of total/successful/missing shards in the same way that we do in search, which ensures transparency in the consistency of write operations. {GIT}7994[#7994]. (STATUS: DONE, v2.0.0)
[float]
=== Jepsen Test Failures (STATUS: ONGOING)
@ -196,24 +133,96 @@ We have increased our test coverage to include scenarios tested by Jepsen. We ma
This status page is a start, but we can do a better job of explicitly documenting the processes at work in Elasticsearch, and what happens in the case of each type of failure. The plan is to have a test case that validates each behavior under simulated conditions. Every test will document the expected results, the associated test code and an explicit PASS or FAIL status for each simulated case.
[float]
=== Take filter cache key size into account (STATUS: ONGOING)
Commonly used filters are cached in Elasticsearch. That cache is limited in size (10% of node's memory by default) and is being evicted based on a least recently used policy. The amount of memory used by the cache depends on two primary components - the values it stores and the keys associated with them. Calculating the memory footprint of the values is easy enough but the keys accounting is trickier to achieve as they are, by default, raw Lucene objects. This is largely not a problem as the keys are dominated by the values. However, recent optimizations in Lucene have changed the balance causing the filter cache to grow beyond it's size.
While we are working on a longer term solution ({GIT}9176[#9176]), we introduced a minimum weight of 1k for each cache entry. This puts an effective limit on the number of entries in the cache. See {GIT}8304[#8304] (STATUS: DONE, fixed in v1.4.0)
[float]
=== Do not allow stale shards to automatically be promoted to primary (STATUS: ONGOING)
In some scenarios, after loss of all valid copies, a stale replica shard can be assigned as a primary. This can lead to
In some scenarios, after the loss of all valid copies, a stale replica shard can be assigned as a primary. This can lead to
a loss of acknowledged writes if the valid copies are not lost but are rather temporarily isolated. Work is underway
({GIT}14671[#14671]) to prevent the automatic promotion of a stale primary and only allow such promotion to occur when
a system operator manually intervenes.
== Completed
[float]
=== Wait on incoming joins before electing local node as master (STATUS: DONE, v2.0.0)
During master election each node pings in order to discover other nodes and validate the liveness of existing
nodes. Based on this information the node either discovers an existing master or, if enough nodes are found
(see https://www.elastic.co/guide/en/elasticsearch/reference/current/modules-discovery-zen.html#master-election[`discovery.zen.minimum_master_nodes`]) a new master will be elected. Currently, the node that is
elected as master will update the cluster state to indicate the result of the election. Other nodes will submit
a join request to the newly elected master node. Instead of immediately processing the election result, the elected master
node should wait for the incoming joins from other nodes, thus validating that the result of the election is properly applied. As soon as enough
nodes have sent their joins request (based on the `minimum_master_nodes` settings) the cluster state is updated.
{GIT}12161[#12161]
[float]
=== Mapping changes should be applied synchronously (STATUS: DONE, v2.0.0)
When introducing new fields using dynamic mapping, it is possible that the same
field can be added to different shards with different data types. Each shard
will operate with its local data type but, if the shard is relocated, the
data type from the cluster state will be applied to the new shard, which
can result in a corrupt shard. To prevent this, new fields should not
be added to a shard's mapping until confirmed by the master.
{GIT}8688[#8688] (STATUS: DONE)
[float]
=== Add per-segment and per-commit ID to help replication (STATUS: DONE, v2.0.0)
{JIRA}5895[LUCENE-5895] adds a unique ID for each segment and each commit point. File-based replication (as performed by snapshot/restore) can use this ID to know whether the segment/commit on the source and destination machines are the same. Fixed in Lucene 5.0.
[float]
=== Write index metadata on data nodes where shards allocated (STATUS: DONE, v2.0.0)
Today, index metadata is written only on nodes that are master-eligible, not on
data-only nodes. This is not a problem when running with multiple master nodes,
as recommended, as the loss of all but one master node is still recoverable.
However, users running with a single master node are at risk of losing
their index metadata if the master fails. Instead, this metadata should
also be written on any node where a shard is allocated. {GIT}8823[#8823], {GIT}9952[#9952]
[float]
=== Better file distribution with multiple data paths (STATUS: DONE, v2.0.0)
Today, a node configured with multiple data paths distributes writes across
all paths by writing one file to each path in turn. This can mean that the
failure of a single disk corrupts many shards at once. Instead, by allocating
an entire shard to a single data path, the extent of the damage can be limited
to just the shards on that disk. {GIT}9498[#9498]
[float]
=== Lucene checksums phase 3 (STATUS: DONE, v2.0.0)
Almost all files in Elasticsearch now have checksums which are validated before use. A few changes remain:
* {GIT}7586[#7586] adds checksums for cluster and index state files. (STATUS: DONE, Fixed in v1.5.0)
* {GIT}9183[#9183] supports validating the checksums on all files when starting a node. (STATUS: DONE, Fixed in v2.0.0)
* {JIRA}5894[LUCENE-5894] lays the groundwork for extending more efficient checksum validation to all files during optimized bulk merges. (STATUS: DONE, Fixed in v2.0.0)
* {GIT}8403[#8403] to add validation of checksums on Lucene `segments_N` files. (STATUS: DONE, v2.0.0)
[float]
=== Report shard-level statuses on write operations (STATUS: DONE, v2.0.0)
Make write calls return the number of total/successful/missing shards in the same way that we do in search, which ensures transparency in the consistency of write operations. {GIT}7994[#7994]. (STATUS: DONE, v2.0.0)
[float]
=== Take filter cache key size into account (STATUS: DONE, v2.0.0)
Commonly used filters are cached in Elasticsearch. That cache is limited in size
(10% of node's memory by default) and is being evicted based on a least recently
used policy. The amount of memory used by the cache depends on two primary
components - the values it stores and the keys associated with them. Calculating
the memory footprint of the values is easy enough but the keys accounting is
trickier to achieve as they are, by default, raw Lucene objects. This is largely
not a problem as the keys are dominated by the values. However, recent
optimizations in Lucene have changed the balance causing the filter cache to
grow beyond it's size.
As a temporary solution, we introduced a minimum weight of 1k for each cache entry.
This puts an effective limit on the number of entries in the cache. See {GIT}8304[#8304] (STATUS: DONE, fixed in v1.4.0)
The issue has been completely solved by the move to Lucene's query cache. See {GIT}10897[#10897]
[float]
=== Ensure shard state ID is incremental (STATUS: DONE, v1.5.1)
@ -491,4 +500,3 @@ At Elasticsearch, we live the philosophy that we can miss a bug once, but never
=== Lucene Loses Data On File Descriptors Failure (STATUS: DONE, v0.90.0)
When a process runs out of file descriptors, Lucene can causes an index to be completely deleted. This issue was fixed in Lucene ({JIRA}4870[version 4.2.1]) and fixed in an early version of Elasticsearch. See issue {GIT}2812[#2812].

View File

@ -60,8 +60,8 @@ public class EncryptedDocMapperTests extends AttachmentUnitTestCase {
assertThat(doc.get(docMapper.mappers().getMapper("file1.title").fieldType().names().indexName()), equalTo("Hello"));
assertThat(doc.get(docMapper.mappers().getMapper("file1.author").fieldType().names().indexName()), equalTo("kimchy"));
assertThat(doc.get(docMapper.mappers().getMapper("file1.keywords").fieldType().names().indexName()), equalTo("elasticsearch,cool,bonsai"));
assertThat(doc.get(docMapper.mappers().getMapper("file1.content_type").fieldType().names().indexName()), equalTo("text/html; charset=ISO-8859-1"));
assertThat(doc.getField(docMapper.mappers().getMapper("file1.content_length").fieldType().names().indexName()).numericValue().longValue(), is(344L));
assertThat(doc.get(docMapper.mappers().getMapper("file1.content_type").fieldType().names().indexName()), startsWith("text/html;"));
assertThat(doc.getField(docMapper.mappers().getMapper("file1.content_length").fieldType().names().indexName()).numericValue().longValue(), greaterThan(0L));
assertThat(doc.get(docMapper.mappers().getMapper("file2").fieldType().names().indexName()), nullValue());
assertThat(doc.get(docMapper.mappers().getMapper("file2.title").fieldType().names().indexName()), nullValue());
@ -96,8 +96,8 @@ public class EncryptedDocMapperTests extends AttachmentUnitTestCase {
assertThat(doc.get(docMapper.mappers().getMapper("file2.title").fieldType().names().indexName()), equalTo("Hello"));
assertThat(doc.get(docMapper.mappers().getMapper("file2.author").fieldType().names().indexName()), equalTo("kimchy"));
assertThat(doc.get(docMapper.mappers().getMapper("file2.keywords").fieldType().names().indexName()), equalTo("elasticsearch,cool,bonsai"));
assertThat(doc.get(docMapper.mappers().getMapper("file2.content_type").fieldType().names().indexName()), equalTo("text/html; charset=ISO-8859-1"));
assertThat(doc.getField(docMapper.mappers().getMapper("file2.content_length").fieldType().names().indexName()).numericValue().longValue(), is(344L));
assertThat(doc.get(docMapper.mappers().getMapper("file2.content_type").fieldType().names().indexName()), startsWith("text/html;"));
assertThat(doc.getField(docMapper.mappers().getMapper("file2.content_length").fieldType().names().indexName()).numericValue().longValue(), greaterThan(0L));
}
public void testMultipleDocsEncryptedNotIgnoringErrors() throws IOException {

View File

@ -69,8 +69,12 @@ public class MetadataMapperTests extends AttachmentUnitTestCase {
assertThat(doc.get(docMapper.mappers().getMapper("file.title").fieldType().names().indexName()), equalTo("Hello"));
assertThat(doc.get(docMapper.mappers().getMapper("file.author").fieldType().names().indexName()), equalTo("kimchy"));
assertThat(doc.get(docMapper.mappers().getMapper("file.keywords").fieldType().names().indexName()), equalTo("elasticsearch,cool,bonsai"));
assertThat(doc.get(docMapper.mappers().getMapper("file.content_type").fieldType().names().indexName()), equalTo("text/html; charset=ISO-8859-1"));
assertThat(doc.getField(docMapper.mappers().getMapper("file.content_length").fieldType().names().indexName()).numericValue().longValue(), is(expectedLength));
assertThat(doc.get(docMapper.mappers().getMapper("file.content_type").fieldType().names().indexName()), startsWith("text/html;"));
if (expectedLength == null) {
assertNull(doc.getField(docMapper.mappers().getMapper("file.content_length").fieldType().names().indexName()).numericValue().longValue());
} else {
assertThat(doc.getField(docMapper.mappers().getMapper("file.content_length").fieldType().names().indexName()).numericValue().longValue(), greaterThan(0L));
}
}
public void testIgnoreWithoutDate() throws Exception {

View File

@ -86,7 +86,6 @@ public class MultifieldAttachmentMapperTests extends AttachmentUnitTestCase {
public void testExternalValues() throws Exception {
String originalText = "This is an elasticsearch mapper attachment test.";
String contentType = "text/plain; charset=ISO-8859-1";
String forcedName = "dummyname.txt";
String bytes = Base64.encodeBytes(originalText.getBytes(StandardCharsets.ISO_8859_1));
@ -108,9 +107,9 @@ public class MultifieldAttachmentMapperTests extends AttachmentUnitTestCase {
assertThat(doc.rootDoc().getField("file.content").stringValue(), is(originalText + "\n"));
assertThat(doc.rootDoc().getField("file.content_type"), notNullValue());
assertThat(doc.rootDoc().getField("file.content_type").stringValue(), is(contentType));
assertThat(doc.rootDoc().getField("file.content_type").stringValue(), startsWith("text/plain;"));
assertThat(doc.rootDoc().getField("file.content_type.suggest"), notNullValue());
assertThat(doc.rootDoc().getField("file.content_type.suggest").stringValue(), is(contentType));
assertThat(doc.rootDoc().getField("file.content_type.suggest").stringValue(), startsWith("text/plain;"));
assertThat(doc.rootDoc().getField("file.content_length"), notNullValue());
assertThat(doc.rootDoc().getField("file.content_length").numericValue().intValue(), is(originalText.length()));
@ -131,9 +130,9 @@ public class MultifieldAttachmentMapperTests extends AttachmentUnitTestCase {
assertThat(doc.rootDoc().getField("file.content").stringValue(), is(originalText + "\n"));
assertThat(doc.rootDoc().getField("file.content_type"), notNullValue());
assertThat(doc.rootDoc().getField("file.content_type").stringValue(), is(contentType));
assertThat(doc.rootDoc().getField("file.content_type").stringValue(), startsWith("text/plain;"));
assertThat(doc.rootDoc().getField("file.content_type.suggest"), notNullValue());
assertThat(doc.rootDoc().getField("file.content_type.suggest").stringValue(), is(contentType));
assertThat(doc.rootDoc().getField("file.content_type.suggest").stringValue(), startsWith("text/plain;"));
assertThat(doc.rootDoc().getField("file.content_length"), notNullValue());
assertThat(doc.rootDoc().getField("file.content_length").numericValue().intValue(), is(originalText.length()));

View File

@ -33,6 +33,11 @@
},
"operation_threading": {
"description" : "TODO: ?"
},
"verbose": {
"type": "boolean",
"description": "Includes detailed memory usage by Lucene.",
"default": false
}
}
},

View File

@ -53,8 +53,8 @@
"type" : "list",
"description" : "A comma-separated list of document types for the `indexing` index metric"
}
},
"body": null
}
}
},
"body": null
}
}

View File

@ -1035,7 +1035,7 @@ public abstract class ESIntegTestCase extends ESTestCase {
/**
* Sets the cluster's minimum master node and make sure the response is acknowledge.
* Note: this doesn't guaranty the new settings is in effect, just that it has been received bu all nodes.
* Note: this doesn't guarantee that the new setting has taken effect, just that it has been received by all nodes.
*/
public void setMinimumMasterNodes(int n) {
assertTrue(client().admin().cluster().prepareUpdateSettings().setTransientSettings(