Merge branch 'master' into feature/query-refactoring

2015-09-21 15:38:43 +02:00 · 2015-09-21 15:38:43 +02:00 · cc69de5c5f
parent 77ffabc471 1f76f49003
commit cc69de5c5f
22 changed files with 90 additions and 50 deletions
--- a/core/src/test/java/org/elasticsearch/bootstrap/BootstrapForTesting.java
+++ b/core/src/test/java/org/elasticsearch/bootstrap/BootstrapForTesting.java
@ -61,13 +61,7 @@ public class BootstrapForTesting {
        try {
            JarHell.checkJarHell();
        } catch (Exception e) {
-            if (Boolean.parseBoolean(System.getProperty("tests.maven"))) {
-                throw new RuntimeException("found jar hell in test classpath", e);
-            } else {
-                Loggers.getLogger(BootstrapForTesting.class)
-                    .warn("Your ide or custom test runner has jar hell issues, " +
-                          "you might want to look into that", e);
-            }
+            throw new RuntimeException("found jar hell in test classpath", e);
        }

        // make sure java.io.tmpdir exists always (in case code uses it in a static initializer)
--- a/docs/java-api/search.asciidoc
+++ b/docs/java-api/search.asciidoc
@ -60,17 +60,13 @@ SearchResponse scrollResp = client.prepareSearch(test)
        .setQuery(qb)
        .setSize(100).execute().actionGet(); //100 hits per shard will be returned for each scroll
 //Scroll until no hits are returned
-while (true) {
-
+do {
    for (SearchHit hit : scrollResp.getHits().getHits()) {
        //Handle the hit...
    }
+
    scrollResp = client.prepareSearchScroll(scrollResp.getScrollId()).setScroll(new TimeValue(600000)).execute().actionGet();
-    //Break condition: No hits are returned
-    if (scrollResp.getHits().getHits().length == 0) {
-        break;
-    }
-}
+} while(scrollResp.getHits().getHits().length != 0); // Zero hits mark the end of the scroll and the while loop.
 --------------------------------------------------

 [[java-search-msearch]]
--- a/docs/reference/analysis/tokenfilters/compound-word-tokenfilter.asciidoc
+++ b/docs/reference/analysis/tokenfilters/compound-word-tokenfilter.asciidoc
@ -1,34 +1,83 @@
 [[analysis-compound-word-tokenfilter]]
 === Compound Word Token Filter

-Token filters that allow to decompose compound words. There are two
-types available: `dictionary_decompounder` and
-`hyphenation_decompounder`.
+The `hyphenation_decompounder` and `dictionary_decompounder` token filters can
+decompose compound words found in many German languages into word parts.

-The following are settings that can be set for a compound word token
-filter type:
+Both token filters require a dictionary of word parts, which can be provided
+as:

-[cols="<,<",options="header",]
-|=======================================================================
-|Setting |Description
-|`word_list` |A list of words to use.
+[horizontal]
+`word_list`::

-|`word_list_path` |A path (either relative to `config` location, or
-absolute) to a list of words.
+An array of words, specified inline in the token filter configuration, or

-|`hyphenation_patterns_path` |A path (either relative to `config` location, or
-absolute) to a FOP XML hyphenation pattern file. (See http://offo.sourceforge.net/hyphenation/)
-Required for `hyphenation_decompounder`.
+`word_list_path`::

-|`min_word_size` |Minimum word size(Integer). Defaults to 5.
+The path (either absolute or relative to the `config` directory) to a UTF-8
+encoded file containing one word per line.

-|`min_subword_size` |Minimum subword size(Integer). Defaults to 2.
+[float]
+=== Hyphenation decompounder

-|`max_subword_size` |Maximum subword size(Integer). Defaults to 15.
+The `hyphenation_decompounder` uses hyphenation grammars to find potential
+subwords that are then checked against the word dictionary. The quality of the
+output tokens is directly connected to the quality of the grammar file you
+use. For languages like German they are quite good.
+
+XML based hyphenation grammar files can be found in the
+http://offo.sourceforge.net/hyphenation/#FOP+XML+Hyphenation+Patterns[Objects For Formatting Objects]
+(OFFO) Sourceforge project. You can download http://downloads.sourceforge.net/offo/offo-hyphenation.zip[offo-hyphenation.zip]
+directly and look in the `offo-hyphenation/hyph/` directory.
+Credits for the hyphenation code go to the Apache FOP project .
+
+[float]
+=== Dictionary decompounder
+
+The `dictionary_decompounder` uses a brute force approach in conjuction with
+only the word dictionary to find subwords in a compound word. It is much
+slower than the hyphenation decompounder but can be used as a first start to
+check the quality of your dictionary.
+
+[float]
+=== Compound token filter parameters
+
+The following parameters can be used to configure a compound word token
+filter:
+
+[horizontal]
+`type`::
+
+Either `dictionary_decompounder` or `hyphenation_decompounder`.
+
+`word_list`::
+
+A array containing a list of words to use for the word dictionary.
+
+`word_list_path`::
+
+The path (either absolute or relative to the `config` directory) to the word dictionary.
+
+`hyphenation_patterns_path`::
+
+The path (either absolute or relative to the `config` directory) to a FOP XML hyphenation pattern file. (required for hyphenation)
+
+`min_word_size`::
+
+Minimum word size. Defaults to 5.
+
+`min_subword_size`::
+
+Minimum subword size. Defaults to 2.
+
+`max_subword_size`::
+
+Maximum subword size. Defaults to 15.
+
+`only_longest_match`::
+
+Whether to include only the longest matching subword or not.  Defaults to `false`

-|`only_longest_match` |Only matching the longest(Boolean). Defaults to
-`false`
-|=======================================================================

 Here is an example:

@ -44,9 +93,10 @@ index :
        filter :
            myTokenFilter1 :
                type : dictionary_decompounder
-                word_list: [one, two, three]                
+                word_list: [one, two, three]
            myTokenFilter2 :
                type : hyphenation_decompounder
                word_list_path: path/to/words.txt
+                hyphenation_patterns_path: path/to/fop.xml
                max_subword_size : 22
 --------------------------------------------------
--- a/docs/reference/docs/bulk.asciidoc
+++ b/docs/reference/docs/bulk.asciidoc
@ -56,7 +56,7 @@ newlines. Example:
 $ cat requests
 { "index" : { "_index" : "test", "_type" : "type1", "_id" : "1" } }
 { "field1" : "value1" }
-$ curl -s -XPOST localhost:9200/_bulk --data-binary @requests; echo
+$ curl -s -XPOST localhost:9200/_bulk --data-binary "@requests"; echo
 {"took":7,"items":[{"create":{"_index":"test","_type":"type1","_id":"1","_version":1}}]}
 --------------------------------------------------

--- a/docs/reference/getting-started.asciidoc
+++ b/docs/reference/getting-started.asciidoc
@ -544,7 +544,7 @@ You can download the sample dataset (accounts.json) from https://github.com/bly2

 [source,sh]
 --------------------------------------------------
-curl -XPOST 'localhost:9200/bank/account/_bulk?pretty' --data-binary @accounts.json
+curl -XPOST 'localhost:9200/bank/account/_bulk?pretty' --data-binary "@accounts.json"
 curl 'localhost:9200/_cat/indices?v'
 --------------------------------------------------

@ -915,7 +915,7 @@ In SQL, the above aggregation is similar in concept to:

 [source,sh]
 --------------------------------------------------
-SELECT COUNT(*) from bank GROUP BY state ORDER BY COUNT(*) DESC
+SELECT state, COUNT(*) FROM bank GROUP BY state ORDER BY COUNT(*) DESC
 --------------------------------------------------

 And the response (partially shown):
--- a/docs/reference/index-modules.asciidoc
+++ b/docs/reference/index-modules.asciidoc
@ -70,8 +70,9 @@ Checking shards may take a lot of time on large indices.

 [[index-codec]] `index.codec`::

-    experimental[] The `default` value compresses stored data with LZ4
-    compression, but this can be set to `best_compression` for a higher
+    experimental[] The +default+ value compresses stored data with LZ4
+    compression, but this can be set to +best_compression+
+    which uses https://en.wikipedia.org/wiki/DEFLATE[DEFLATE] for a higher
    compression ratio, at the expense of slower stored fields performance.

 [float]
--- a/docs/reference/migration/migrate_2_0/query_dsl.asciidoc
+++ b/docs/reference/migration/migrate_2_0/query_dsl.asciidoc
@ -8,7 +8,7 @@ _filter context_:

 Query context::

-A query used in query context will caculated relevance scores and will not be
+A query used in query context will calculate relevance scores and will not be
 cacheable.  Query context is used whenever filter context does not apply.

 Filter context::
--- a/docs/reference/search/multi-search.asciidoc
+++ b/docs/reference/search/multi-search.asciidoc
@ -35,7 +35,7 @@ $ cat requests
 {"search_type" : "dfs_query_then_fetch"}
 {"query" : {"match_all" : {}}}

-$ curl -XGET localhost:9200/_msearch --data-binary @requests; echo
+$ curl -XGET localhost:9200/_msearch --data-binary "@requests"; echo
 --------------------------------------------------

 Note, the above includes an example of an empty header (can also be just
--- a/docs/reference/search/percolate.asciidoc
+++ b/docs/reference/search/percolate.asciidoc
@ -366,7 +366,7 @@ Request:

 [source,js]
 --------------------------------------------------
-curl -XGET 'localhost:9200/twitter/tweet/_mpercolate' --data-binary @requests.txt; echo
+curl -XGET 'localhost:9200/twitter/tweet/_mpercolate' --data-binary "@requests.txt"; echo
 --------------------------------------------------

 The index `twitter` is the default index, and the type `tweet` is the default type and will be used in the case a header
--- a/plugins/discovery-ec2/licenses/aws-java-sdk-core-1.10.12.jar.sha1
+++ b/plugins/discovery-ec2/licenses/aws-java-sdk-core-1.10.12.jar.sha1
@ -1 +0,0 @@
-7ff51040bbcc9085dcb9a24a2c2a3cc7ac995988
--- a/plugins/discovery-ec2/licenses/aws-java-sdk-core-1.10.19.jar.sha1
+++ b/plugins/discovery-ec2/licenses/aws-java-sdk-core-1.10.19.jar.sha1
@ -0,0 +1 @@
+b53f650323b7242dcced25b679f3e9aa4b494da5
--- a/plugins/discovery-ec2/licenses/aws-java-sdk-ec2-1.10.12.jar.sha1
+++ b/plugins/discovery-ec2/licenses/aws-java-sdk-ec2-1.10.12.jar.sha1
@ -1 +0,0 @@
-b0712cc659e72b9da0f5b03872d2476ab4a695f7
--- a/plugins/discovery-ec2/licenses/aws-java-sdk-ec2-1.10.19.jar.sha1
+++ b/plugins/discovery-ec2/licenses/aws-java-sdk-ec2-1.10.19.jar.sha1
@ -0,0 +1 @@
+50ba7eb31719be1260bdae51cf69340df2d91ec4
--- a/plugins/discovery-ec2/pom.xml
+++ b/plugins/discovery-ec2/pom.xml
@ -16,7 +16,6 @@

    <properties>
        <elasticsearch.plugin.classname>org.elasticsearch.plugin.discovery.ec2.Ec2DiscoveryPlugin</elasticsearch.plugin.classname>
-        <amazonaws.version>1.10.12</amazonaws.version>
        <tests.jvms>1</tests.jvms>
        <tests.rest.suite>discovery_ec2</tests.rest.suite>
        <tests.rest.load_packaged>false</tests.rest.load_packaged>
--- a/plugins/pom.xml
+++ b/plugins/pom.xml
@ -26,6 +26,7 @@
        <elasticsearch.plugin.jvm>true</elasticsearch.plugin.jvm>
        <elasticsearch.plugin.isolated>true</elasticsearch.plugin.isolated>
        <elasticsearch.plugin.site>false</elasticsearch.plugin.site>
+        <amazonaws.version>1.10.19</amazonaws.version>
    </properties>

    <dependencies>
--- a/plugins/repository-s3/licenses/aws-java-sdk-core-1.10.12.jar.sha1
+++ b/plugins/repository-s3/licenses/aws-java-sdk-core-1.10.12.jar.sha1
@ -1 +0,0 @@
-7ff51040bbcc9085dcb9a24a2c2a3cc7ac995988
--- a/plugins/repository-s3/licenses/aws-java-sdk-core-1.10.19.jar.sha1
+++ b/plugins/repository-s3/licenses/aws-java-sdk-core-1.10.19.jar.sha1
@ -0,0 +1 @@
+b53f650323b7242dcced25b679f3e9aa4b494da5
--- a/plugins/repository-s3/licenses/aws-java-sdk-kms-1.10.12.jar.sha1
+++ b/plugins/repository-s3/licenses/aws-java-sdk-kms-1.10.12.jar.sha1
@ -1 +0,0 @@
-31afbe46b65e9933316c7e8dfb8b88dc4b37b6ba
--- a/plugins/repository-s3/licenses/aws-java-sdk-kms-1.10.19.jar.sha1
+++ b/plugins/repository-s3/licenses/aws-java-sdk-kms-1.10.19.jar.sha1
@ -0,0 +1 @@
+c8764f3e61a3c420db429870ec22b31fe755d81d
--- a/plugins/repository-s3/licenses/aws-java-sdk-s3-1.10.12.jar.sha1
+++ b/plugins/repository-s3/licenses/aws-java-sdk-s3-1.10.12.jar.sha1
@ -1 +0,0 @@
-c9e2593fdf398c5f8906a704db037d17b2de4b2a
--- a/plugins/repository-s3/licenses/aws-java-sdk-s3-1.10.19.jar.sha1
+++ b/plugins/repository-s3/licenses/aws-java-sdk-s3-1.10.19.jar.sha1
@ -0,0 +1 @@
+a23dc60d56d54126250c23cab1d01328b1e83678
--- a/plugins/repository-s3/pom.xml
+++ b/plugins/repository-s3/pom.xml
@ -16,7 +16,6 @@

    <properties>
        <elasticsearch.plugin.classname>org.elasticsearch.plugin.repository.s3.S3RepositoryPlugin</elasticsearch.plugin.classname>
-        <amazonaws.version>1.10.12</amazonaws.version>
        <tests.jvms>1</tests.jvms>
        <tests.rest.suite>repository_s3</tests.rest.suite>
        <tests.rest.load_packaged>false</tests.rest.load_packaged>