Merge branch 'master' into feature/query-refactoring

2015-09-21 15:38:43 +02:00 · 2015-09-21 15:38:43 +02:00 · cc69de5c5f
parent 77ffabc471 1f76f49003
commit cc69de5c5f
22 changed files with 90 additions and 50 deletions
--- a/core/src/test/java/org/elasticsearch/bootstrap/BootstrapForTesting.java
+++ b/core/src/test/java/org/elasticsearch/bootstrap/BootstrapForTesting.java
@ -61,13 +61,7 @@ public class BootstrapForTesting {
        try {
            JarHell.checkJarHell();
        } catch (Exception e) {
            if (Boolean.parseBoolean(System.getProperty("tests.maven"))) {
            throw new RuntimeException("found jar hell in test classpath", e);
            } else {
                Loggers.getLogger(BootstrapForTesting.class)
                    .warn("Your ide or custom test runner has jar hell issues, " +
                          "you might want to look into that", e);
            }
        }
        // make sure java.io.tmpdir exists always (in case code uses it in a static initializer)
--- a/docs/java-api/search.asciidoc
+++ b/docs/java-api/search.asciidoc
@ -60,17 +60,13 @@ SearchResponse scrollResp = client.prepareSearch(test)
        .setQuery(qb)
        .setSize(100).execute().actionGet(); //100 hits per shard will be returned for each scroll
 //Scroll until no hits are returned
-while (true) {
+do {
    for (SearchHit hit : scrollResp.getHits().getHits()) {
        //Handle the hit...
    }
    scrollResp = client.prepareSearchScroll(scrollResp.getScrollId()).setScroll(new TimeValue(600000)).execute().actionGet();
-    //Break condition: No hits are returned
+} while(scrollResp.getHits().getHits().length != 0); // Zero hits mark the end of the scroll and the while loop.
    if (scrollResp.getHits().getHits().length == 0) {
        break;
    }
 }
 --------------------------------------------------
 [[java-search-msearch]]
--- a/docs/reference/analysis/tokenfilters/compound-word-tokenfilter.asciidoc
+++ b/docs/reference/analysis/tokenfilters/compound-word-tokenfilter.asciidoc
@ -1,34 +1,83 @@
 [[analysis-compound-word-tokenfilter]]
 === Compound Word Token Filter
-Token filters that allow to decompose compound words. There are two
+The `hyphenation_decompounder` and `dictionary_decompounder` token filters can
-types available: `dictionary_decompounder` and
+decompose compound words found in many German languages into word parts.
 `hyphenation_decompounder`.
-The following are settings that can be set for a compound word token
+Both token filters require a dictionary of word parts, which can be provided
-filter type:
+as:
-[cols="<,<",options="header",]
+[horizontal]
-|=======================================================================
+`word_list`::
 |Setting |Description
 |`word_list` |A list of words to use.
-|`word_list_path` |A path (either relative to `config` location, or
+An array of words, specified inline in the token filter configuration, or
 absolute) to a list of words.
-|`hyphenation_patterns_path` |A path (either relative to `config` location, or
+`word_list_path`::
 absolute) to a FOP XML hyphenation pattern file. (See http://offo.sourceforge.net/hyphenation/)
 Required for `hyphenation_decompounder`.
-|`min_word_size` |Minimum word size(Integer). Defaults to 5.
+The path (either absolute or relative to the `config` directory) to a UTF-8
 encoded file containing one word per line.
-|`min_subword_size` |Minimum subword size(Integer). Defaults to 2.
+[float]
 === Hyphenation decompounder
-|`max_subword_size` |Maximum subword size(Integer). Defaults to 15.
+The `hyphenation_decompounder` uses hyphenation grammars to find potential
 subwords that are then checked against the word dictionary. The quality of the
 output tokens is directly connected to the quality of the grammar file you
 use. For languages like German they are quite good.
 XML based hyphenation grammar files can be found in the
 http://offo.sourceforge.net/hyphenation/#FOP+XML+Hyphenation+Patterns[Objects For Formatting Objects]
 (OFFO) Sourceforge project. You can download http://downloads.sourceforge.net/offo/offo-hyphenation.zip[offo-hyphenation.zip]
 directly and look in the `offo-hyphenation/hyph/` directory.
 Credits for the hyphenation code go to the Apache FOP project .
 [float]
 === Dictionary decompounder
 The `dictionary_decompounder` uses a brute force approach in conjuction with
 only the word dictionary to find subwords in a compound word. It is much
 slower than the hyphenation decompounder but can be used as a first start to
 check the quality of your dictionary.
 [float]
 === Compound token filter parameters
 The following parameters can be used to configure a compound word token
 filter:
 [horizontal]
 `type`::
 Either `dictionary_decompounder` or `hyphenation_decompounder`.
 `word_list`::
 A array containing a list of words to use for the word dictionary.
 `word_list_path`::
 The path (either absolute or relative to the `config` directory) to the word dictionary.
 `hyphenation_patterns_path`::
 The path (either absolute or relative to the `config` directory) to a FOP XML hyphenation pattern file. (required for hyphenation)
 `min_word_size`::
 Minimum word size. Defaults to 5.
 `min_subword_size`::
 Minimum subword size. Defaults to 2.
 `max_subword_size`::
 Maximum subword size. Defaults to 15.
 `only_longest_match`::
 Whether to include only the longest matching subword or not.  Defaults to `false`
 |`only_longest_match` |Only matching the longest(Boolean). Defaults to
 `false`
 |=======================================================================
 Here is an example:
@ -48,5 +97,6 @@ index :
            myTokenFilter2 :
                type : hyphenation_decompounder
                word_list_path: path/to/words.txt
                hyphenation_patterns_path: path/to/fop.xml
                max_subword_size : 22
 --------------------------------------------------
--- a/docs/reference/docs/bulk.asciidoc
+++ b/docs/reference/docs/bulk.asciidoc
@ -56,7 +56,7 @@ newlines. Example:
 $ cat requests
 { "index" : { "_index" : "test", "_type" : "type1", "_id" : "1" } }
 { "field1" : "value1" }
-$ curl -s -XPOST localhost:9200/_bulk --data-binary @requests; echo
+$ curl -s -XPOST localhost:9200/_bulk --data-binary "@requests"; echo
 {"took":7,"items":[{"create":{"_index":"test","_type":"type1","_id":"1","_version":1}}]}
 --------------------------------------------------
--- a/docs/reference/getting-started.asciidoc
+++ b/docs/reference/getting-started.asciidoc
@ -544,7 +544,7 @@ You can download the sample dataset (accounts.json) from https://github.com/bly2
 [source,sh]
 --------------------------------------------------
-curl -XPOST 'localhost:9200/bank/account/_bulk?pretty' --data-binary @accounts.json
+curl -XPOST 'localhost:9200/bank/account/_bulk?pretty' --data-binary "@accounts.json"
 curl 'localhost:9200/_cat/indices?v'
 --------------------------------------------------
@ -915,7 +915,7 @@ In SQL, the above aggregation is similar in concept to:
 [source,sh]
 --------------------------------------------------
-SELECT COUNT(*) from bank GROUP BY state ORDER BY COUNT(*) DESC
+SELECT state, COUNT(*) FROM bank GROUP BY state ORDER BY COUNT(*) DESC
 --------------------------------------------------
 And the response (partially shown):
--- a/docs/reference/index-modules.asciidoc
+++ b/docs/reference/index-modules.asciidoc
@ -70,8 +70,9 @@ Checking shards may take a lot of time on large indices.
 [[index-codec]] `index.codec`::
-    experimental[] The `default` value compresses stored data with LZ4
+    experimental[] The +default+ value compresses stored data with LZ4
-    compression, but this can be set to `best_compression` for a higher
+    compression, but this can be set to +best_compression+
    which uses https://en.wikipedia.org/wiki/DEFLATE[DEFLATE] for a higher
    compression ratio, at the expense of slower stored fields performance.
 [float]
--- a/docs/reference/migration/migrate_2_0/query_dsl.asciidoc
+++ b/docs/reference/migration/migrate_2_0/query_dsl.asciidoc
@ -8,7 +8,7 @@ _filter context_:
 Query context::
-A query used in query context will caculated relevance scores and will not be
+A query used in query context will calculate relevance scores and will not be
 cacheable.  Query context is used whenever filter context does not apply.
 Filter context::
--- a/docs/reference/search/multi-search.asciidoc
+++ b/docs/reference/search/multi-search.asciidoc
@ -35,7 +35,7 @@ $ cat requests
 {"search_type" : "dfs_query_then_fetch"}
 {"query" : {"match_all" : {}}}
-$ curl -XGET localhost:9200/_msearch --data-binary @requests; echo
+$ curl -XGET localhost:9200/_msearch --data-binary "@requests"; echo
 --------------------------------------------------
 Note, the above includes an example of an empty header (can also be just
--- a/docs/reference/search/percolate.asciidoc
+++ b/docs/reference/search/percolate.asciidoc
@ -366,7 +366,7 @@ Request:
 [source,js]
 --------------------------------------------------
-curl -XGET 'localhost:9200/twitter/tweet/_mpercolate' --data-binary @requests.txt; echo
+curl -XGET 'localhost:9200/twitter/tweet/_mpercolate' --data-binary "@requests.txt"; echo
 --------------------------------------------------
 The index `twitter` is the default index, and the type `tweet` is the default type and will be used in the case a header
--- a/plugins/discovery-ec2/licenses/aws-java-sdk-core-1.10.12.jar.sha1
+++ b/plugins/discovery-ec2/licenses/aws-java-sdk-core-1.10.12.jar.sha1
@ -1 +0,0 @@
 7ff51040bbcc9085dcb9a24a2c2a3cc7ac995988
--- a/plugins/discovery-ec2/licenses/aws-java-sdk-core-1.10.19.jar.sha1
+++ b/plugins/discovery-ec2/licenses/aws-java-sdk-core-1.10.19.jar.sha1
@ -0,0 +1 @@
 b53f650323b7242dcced25b679f3e9aa4b494da5
--- a/plugins/discovery-ec2/licenses/aws-java-sdk-ec2-1.10.12.jar.sha1
+++ b/plugins/discovery-ec2/licenses/aws-java-sdk-ec2-1.10.12.jar.sha1
@ -1 +0,0 @@
 b0712cc659e72b9da0f5b03872d2476ab4a695f7
--- a/plugins/discovery-ec2/licenses/aws-java-sdk-ec2-1.10.19.jar.sha1
+++ b/plugins/discovery-ec2/licenses/aws-java-sdk-ec2-1.10.19.jar.sha1
@ -0,0 +1 @@
 50ba7eb31719be1260bdae51cf69340df2d91ec4
--- a/plugins/discovery-ec2/pom.xml
+++ b/plugins/discovery-ec2/pom.xml
@ -16,7 +16,6 @@
    <properties>
        <elasticsearch.plugin.classname>org.elasticsearch.plugin.discovery.ec2.Ec2DiscoveryPlugin</elasticsearch.plugin.classname>
        <amazonaws.version>1.10.12</amazonaws.version>
        <tests.jvms>1</tests.jvms>
        <tests.rest.suite>discovery_ec2</tests.rest.suite>
        <tests.rest.load_packaged>false</tests.rest.load_packaged>
--- a/plugins/pom.xml
+++ b/plugins/pom.xml
@ -26,6 +26,7 @@
        <elasticsearch.plugin.jvm>true</elasticsearch.plugin.jvm>
        <elasticsearch.plugin.isolated>true</elasticsearch.plugin.isolated>
        <elasticsearch.plugin.site>false</elasticsearch.plugin.site>
        <amazonaws.version>1.10.19</amazonaws.version>
    </properties>
    <dependencies>
--- a/plugins/repository-s3/licenses/aws-java-sdk-core-1.10.12.jar.sha1
+++ b/plugins/repository-s3/licenses/aws-java-sdk-core-1.10.12.jar.sha1
@ -1 +0,0 @@
 7ff51040bbcc9085dcb9a24a2c2a3cc7ac995988
--- a/plugins/repository-s3/licenses/aws-java-sdk-core-1.10.19.jar.sha1
+++ b/plugins/repository-s3/licenses/aws-java-sdk-core-1.10.19.jar.sha1
@ -0,0 +1 @@
 b53f650323b7242dcced25b679f3e9aa4b494da5
--- a/plugins/repository-s3/licenses/aws-java-sdk-kms-1.10.12.jar.sha1
+++ b/plugins/repository-s3/licenses/aws-java-sdk-kms-1.10.12.jar.sha1
@ -1 +0,0 @@
 31afbe46b65e9933316c7e8dfb8b88dc4b37b6ba
--- a/plugins/repository-s3/licenses/aws-java-sdk-kms-1.10.19.jar.sha1
+++ b/plugins/repository-s3/licenses/aws-java-sdk-kms-1.10.19.jar.sha1
@ -0,0 +1 @@
 c8764f3e61a3c420db429870ec22b31fe755d81d
--- a/plugins/repository-s3/licenses/aws-java-sdk-s3-1.10.12.jar.sha1
+++ b/plugins/repository-s3/licenses/aws-java-sdk-s3-1.10.12.jar.sha1
@ -1 +0,0 @@
 c9e2593fdf398c5f8906a704db037d17b2de4b2a
--- a/plugins/repository-s3/licenses/aws-java-sdk-s3-1.10.19.jar.sha1
+++ b/plugins/repository-s3/licenses/aws-java-sdk-s3-1.10.19.jar.sha1
@ -0,0 +1 @@
 a23dc60d56d54126250c23cab1d01328b1e83678
--- a/plugins/repository-s3/pom.xml
+++ b/plugins/repository-s3/pom.xml
@ -16,7 +16,6 @@
    <properties>
        <elasticsearch.plugin.classname>org.elasticsearch.plugin.repository.s3.S3RepositoryPlugin</elasticsearch.plugin.classname>
        <amazonaws.version>1.10.12</amazonaws.version>
        <tests.jvms>1</tests.jvms>
        <tests.rest.suite>repository_s3</tests.rest.suite>
        <tests.rest.load_packaged>false</tests.rest.load_packaged>