Merge branch 'master' into fix-datasource-doc

2014-11-11 11:55:47 -08:00 · 2014-11-11 11:55:47 -08:00 · 34a6f7f7ca
parent e07b02f54b e4649a24a1
commit 34a6f7f7ca
10 changed files with 68 additions and 26 deletions
--- a/docs/content/Having.md
+++ b/docs/content/Having.md
@ -13,6 +13,15 @@ Druid supports the following types of having clauses.
 The simplest having clause is a numeric filter.
 Numeric filters can be used as the base filters for more complex boolean expressions of filters.

+Here's an example of a having-clause numeric filter:
+
+```json
+{
+    "type": "greaterThan",
+    "aggregation": "myAggMetric",
+    "value": 100
+}
+
 #### Equal To

 The equalTo filter will match rows with a specific aggregate value.
@ -21,7 +30,7 @@ The grammar for an `equalTo` filter is as follows:
 ```json
 {
    "type": "equalTo",
-    "aggregation": <aggregate_metric>,
+    "aggregation": "<aggregate_metric>",
    "value": <numeric_value>
 }
 ```
@ -36,7 +45,7 @@ The grammar for a `greaterThan` filter is as follows:
 ```json
 {
    "type": "greaterThan",
-    "aggregation": <aggregate_metric>,
+    "aggregation": "<aggregate_metric>",
    "value": <numeric_value>
 }
 ```
@ -51,7 +60,7 @@ The grammar for a `greaterThan` filter is as follows:
 ```json
 {
    "type": "lessThan",
-    "aggregation": <aggregate_metric>,
+    "aggregation": "<aggregate_metric>",
    "value": <numeric_value>
 }
 ```
--- a/docs/content/SearchQuery.md
+++ b/docs/content/SearchQuery.md
@ -30,14 +30,14 @@ There are several main parts to a search query:

 |property|description|required?|
 |--------|-----------|---------|
-|queryType|This String should always be "search"; this is the first thing Druid looks at to figure out how to interpret the query|yes|
-|dataSource|A String defining the data source to query, very similar to a table in a relational database|yes|
-|granularity|Defines the granularity of the query. See [Granularities](Granularities.html)|yes|
-|filter|See [Filters](Filters.html)|no|
+|queryType|This String should always be "search"; this is the first thing Druid looks at to figure out how to interpret the query.|yes|
+|dataSource|A String defining the data source to query, very similar to a table in a relational database.|yes|
+|granularity|Defines the granularity of the query. See [Granularities](Granularities.html).|yes|
+|filter|See [Filters](Filters.html).|no|
 |intervals|A JSON Object representing ISO-8601 Intervals. This defines the time ranges to run the query over.|yes|
 |searchDimensions|The dimensions to run the search over. Excluding this means the search is run over all dimensions.|no|
 |query|See [SearchQuerySpec](SearchQuerySpec.html).|yes|
-|sort|How the results of the search should be sorted. Two possible types here are "lexicographic" and "strlen".|yes|
+|sort|An object specifying how the results of the search should be sorted. Two possible types here are "lexicographic" (the default sort) and "strlen".|no|
 |context|An additional JSON Object which can be used to specify certain flags.|no|

 The format of the result is:
--- a/docs/content/SelectQuery.md
+++ b/docs/content/SelectQuery.md
@ -26,9 +26,9 @@ There are several main parts to a select query:
 |dataSource|A String defining the data source to query, very similar to a table in a relational database|yes|
 |intervals|A JSON Object representing ISO-8601 Intervals. This defines the time ranges to run the query over.|yes|
 |filter|See [Filters](Filters.html)|no|
-|dimensions|The list of dimensions to select. If left empty, all dimensions are returned.|no|
-|metrics|The list of metrics to select. If left empty, all metrics are returned.|no|
-|pagingSpec|A JSON object indicating offsets into different scanned segments. Select query results will return a pagingSpec that can be reused for pagination.|yes|
+|dimensions|A String array of dimensions to select. If left empty, all dimensions are returned.|no|
+|metrics|A String array of metrics to select. If left empty, all metrics are returned.|no|
+|pagingSpec|A JSON object indicating offsets into different scanned segments. Query results will return a `pagingIdentifiers` value that can be reused in the next query for pagination.|yes|
 |context|An additional JSON Object which can be used to specify certain flags.|no|

 The format of the result is:
@ -140,4 +140,30 @@ The format of the result is:
 } ]
 ```

-The result returns a global pagingSpec that can be reused for the next select query. The offset will need to be increased by 1 on the client side.
+The `threshold` determines how many hits are returned, with each hit indexed by an offset.
+
+The results above include:
+
+```json 
+    "pagingIdentifiers" : {
+      "wikipedia_2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z_2013-01-10T08:13:47.830Z_v9" : 4
+    },
+```
+
+This can be used with the next query's pagingSpec:
+
+```json
+ {
+   "queryType": "select",
+   "dataSource": "wikipedia",
+   "dimensions":[],
+   "metrics":[],
+   "granularity": "all",
+   "intervals": [
+     "2013-01-01/2013-01-02"
+   ],
+   "pagingSpec":{"pagingIdentifiers": {"wikipedia_2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z_2013-01-10T08:13:47.830Z_v9" : 5}, "threshold":5}
+      
+ }
+
+Note that in the second query, an offset is specified and that it is 1 greater than the largest offset found in the initial results. To return the next "page", this offset must be incremented by 1 with each new query. When an empty results set is received, the very last page has been returned.
--- a/docs/content/TopNMetricSpec.md
+++ b/docs/content/TopNMetricSpec.md
@ -11,7 +11,7 @@ The topN metric spec specifies how topN values should be sorted.
 The simplest metric specification is a String value indicating the metric to sort topN results by. They are included in a topN query with:

 ```json
-"metric": <metric_value_string>
+"metric": "<metric_name>"
 ```

 The metric field can also be given as a JSON object. The grammar for dimension values sorted by numeric value is shown below:
@ -19,7 +19,7 @@ The metric field can also be given as a JSON object. The grammar for dimension v
 ```json
 "metric": {
    "type": "numeric",
-    "metric": "<metric_value>"
+    "metric": "<metric_name>"
 }
 ```

--- a/docs/content/TopNQuery.md
+++ b/docs/content/TopNQuery.md
@ -72,9 +72,9 @@ There are 10 parts to a topN query, but 7 of them are shared with [TimeseriesQue

 |property|description|required?|
 |--------|-----------|---------|
-|dimension|A JSON object defining the dimension that you want the top taken for. For more info, see [DimensionSpecs](DimensionSpecs.html)|yes|
+|dimension|A String or JSON object defining the dimension that you want the top taken for. For more info, see [DimensionSpecs](DimensionSpecs.html)|yes|
 |threshold|An integer defining the N in the topN (i.e. how many you want in the top list)|yes|
-|metric|A JSON object specifying the metric to sort by for the top list. For more info, see [TopNMetricSpec](TopNMetricSpec.html).|yes|
+|metric|A String or JSON object specifying the metric to sort by for the top list. For more info, see [TopNMetricSpec](TopNMetricSpec.html).|yes|

 Please note the context JSON object is also available for topN queries and should be used with the same caution as the timeseries case.
 The format of the results would look like so:
--- a/docs/content/Tutorial:-A-First-Look-at-Druid.md
+++ b/docs/content/Tutorial:-A-First-Look-at-Druid.md
@ -75,9 +75,13 @@ Setting up Zookeeper
 Before we get started, we need to start Apache Zookeeper.

 ```bash
-curl http://apache.osuosl.org/zookeeper/zookeeper-3.4.5/zookeeper-3.4.5.tar.gz -o zookeeper-3.4.5.tar.gz
-tar xzf zookeeper-3.4.5.tar.gz
-cd zookeeper-3.4.5
+Download zookeeper from [http://www.apache.org/dyn/closer.cgi/zookeeper/](http://www.apache.org/dyn/closer.cgi/zookeeper/)
+Install zookeeper.
+
+e.g.
+curl http://www.gtlib.gatech.edu/pub/apache/zookeeper/zookeeper-3.4.6/zookeeper-3.4.6.tar.gz -o zookeeper-3.4.6.tar.gz
+tar xzf zookeeper-3.4.6.tar.gz
+cd zookeeper-3.4.6
 cp conf/zoo_sample.cfg conf/zoo.cfg
 ./bin/zkServer.sh start
 cd ..
--- a/docs/content/Tutorial:-The-Druid-Cluster.md
+++ b/docs/content/Tutorial:-The-Druid-Cluster.md
@ -48,9 +48,13 @@ CREATE database druid;
 #### Setting up Zookeeper

 ```bash
-curl http://apache.osuosl.org/zookeeper/zookeeper-3.4.5/zookeeper-3.4.5.tar.gz -o zookeeper-3.4.5.tar.gz
-tar xzf zookeeper-3.4.5.tar.gz
-cd zookeeper-3.4.5
+Download zookeeper from [http://www.apache.org/dyn/closer.cgi/zookeeper/](http://www.apache.org/dyn/closer.cgi/zookeeper/)
+Install zookeeper.
+
+e.g.
+curl http://www.gtlib.gatech.edu/pub/apache/zookeeper/zookeeper-3.4.6/zookeeper-3.4.6.tar.gz -o zookeeper-3.4.6.tar.gz
+tar xzf zookeeper-3.4.6.tar.gz
+cd zookeeper-3.4.6
 cp conf/zoo_sample.cfg conf/zoo.cfg
 ./bin/zkServer.sh start
 cd ..
--- a/server/src/main/java/io/druid/guice/http/DruidHttpClientConfig.java
+++ b/server/src/main/java/io/druid/guice/http/DruidHttpClientConfig.java
@ -32,7 +32,7 @@ public class DruidHttpClientConfig
 {
  @JsonProperty
  @Min(0)
-  private int numConnections = 5;
+  private int numConnections = 20;

  @JsonProperty
  private Period readTimeout = new Period("PT15M");
--- a/server/src/main/java/io/druid/server/coordination/ServerManager.java
+++ b/server/src/main/java/io/druid/server/coordination/ServerManager.java
@ -421,7 +421,7 @@ public class ServerManager implements QuerySegmentWalker
                        },
                        new ReferenceCountingSegmentQueryRunner<T>(factory, adapter),
                        "scan/time"
-                    ).withWaitMeasuredFromNow(),
+                    ),
                    cacheConfig
                )
            )
--- a/server/src/main/java/io/druid/server/initialization/ServerConfig.java
+++ b/server/src/main/java/io/druid/server/initialization/ServerConfig.java
@ -31,8 +31,7 @@ public class ServerConfig
 {
  @JsonProperty
  @Min(1)
-  // Jetty defaults are whack
-  private int numThreads = Math.max(10, (Runtime.getRuntime().availableProcessors() * 17) / 16 + 2);
+  private int numThreads = Math.max(10, (Runtime.getRuntime().availableProcessors() * 17) / 16 + 2) + 30;

  @JsonProperty
  @NotNull