Merge remote-tracking branch 'upstream/master' into rabbitmq-module

Conflicts: examples/config/realtime/runtime.properties
2013-11-27 20:58:22 +00:00 · 2013-11-27 20:58:22 +00:00 · 2b7fcfe738
parent f04940f8b5 de0d6e2264
commit 2b7fcfe738
41 changed files with 561 additions and 95 deletions
--- a/build.sh
+++ b/build.sh
@ -30,4 +30,4 @@ echo "For examples, see: "
 echo " "
 ls -1 examples/*/*sh
 echo " "
-echo "See also http://druid.io/docs/0.6.23"
+echo "See also http://druid.io/docs/0.6.24"
--- a/cassandra-storage/pom.xml
+++ b/cassandra-storage/pom.xml
@ -28,7 +28,7 @@
    <parent>
        <groupId>io.druid</groupId>
        <artifactId>druid</artifactId>
-        <version>0.6.24-SNAPSHOT</version>
+        <version>0.6.25-SNAPSHOT</version>
    </parent>

    <dependencies>
--- a/common/pom.xml
+++ b/common/pom.xml
@ -28,7 +28,7 @@
    <parent>
        <groupId>io.druid</groupId>
        <artifactId>druid</artifactId>
-        <version>0.6.24-SNAPSHOT</version>
+        <version>0.6.25-SNAPSHOT</version>
    </parent>

    <dependencies>
--- a/docs/content/Booting-a-production-cluster.md
+++ b/docs/content/Booting-a-production-cluster.md
@ -3,7 +3,7 @@ layout: doc_page
 ---
 # Booting a Single Node Cluster #

-[Loading Your Data](Tutorial%3A-Loading-Your-Data-Part-2.html) and [All About Queries](Tutorial%3A-All-About-Queries.html) contain recipes to boot a small druid cluster on localhost. Here we will boot a small cluster on EC2. You can checkout the code, or download a tarball from [here](http://static.druid.io/artifacts/druid-services-0.6.23-bin.tar.gz).
+[Loading Your Data](Tutorial%3A-Loading-Your-Data-Part-2.html) and [All About Queries](Tutorial%3A-All-About-Queries.html) contain recipes to boot a small druid cluster on localhost. Here we will boot a small cluster on EC2. You can checkout the code, or download a tarball from [here](http://static.druid.io/artifacts/druid-services-0.6.24-bin.tar.gz).

 The [ec2 run script](https://github.com/metamx/druid/blob/master/examples/bin/run_ec2.sh), run_ec2.sh, is located at 'examples/bin' if you have checked out the code, or at the root of the project if you've downloaded a tarball. The scripts rely on the [Amazon EC2 API Tools](http://aws.amazon.com/developertools/351), and you will need to set three environment variables:

--- a/docs/content/DimensionSpecs.md
+++ b/docs/content/DimensionSpecs.md
@ -0,0 +1,76 @@
+---
+layout: doc_page
+---
+
+## DimensionSpec
+
+`DimensionSpec`s define how dimension values get transformed prior to aggregation.
+
+### DefaultDimensionSpec
+
+Returns dimension values as is and optionally renames renames the dimension.
+
+```json
+{ "type" : "default", "dimension" : <dimension>, "outputName": <output_name> }
+```
+
+### ExtractionDimensionSpec
+
+Returns dimension values transformed using the given [DimExtractionFn](#toc_3)
+
+```json
+{
+  "type" : "extraction",
+  "dimension" : <dimension>,
+  "outputName" :  <output_name>,
+  "dimExtractionFn" : <dim_extraction_fn>
+}
+```
+
+## DimExtractionFn
+
+`DimExtractionFn`s define the transformation applied to each dimenion value
+
+### RegexDimExtractionFn
+
+Returns the first group matched by the given regular expression. If there is no match it returns the dimension value as is.
+
+```json
+{ "type" : "regex", "expr", <regular_expression> }
+```
+
+### PartialDimExtractionFn
+
+Returns the dimension value as is if there is a match, otherwise returns null.
+
+```json
+{ "type" : "partial", "expr", <regular_expression> }
+```
+
+### SearchQuerySpecDimExtractionFn
+
+Returns the dimension value as is if the given [SearchQuerySpec](SearchQuerySpec.html) matches, otherwise returns null.
+
+```json
+{ "type" : "searchQuery", "query" : <search_query_spec> }
+```
+
+### TimeDimExtractionFn
+
+Parses dimension values as timestamps using the given input format, and returns them formatted using the given output format. Time formats follow the [com.ibm.icu.text.SimpleDateFormat](http://icu-project.org/apiref/icu4j/com/ibm/icu/text/SimpleDateFormat.html) format
+
+```json
+{ "type" : "time", "timeFormat" : <input_format>, "resultFormat" : <output_format> }
+```
+
+### JavascriptDimExtractionFn
+
+Returns the dimension value as transformed by the given JavaScript function.
+
+Example
+
+```json
+{
+  "type" : "javascript",
+  "function" : "function(str) { return str.substr(0, 3); }"
+}
--- a/docs/content/Examples.md
+++ b/docs/content/Examples.md
@ -19,13 +19,13 @@ Clone Druid and build it:
 git clone https://github.com/metamx/druid.git druid
 cd druid
 git fetch --tags
-git checkout druid-0.6.23
+git checkout druid-0.6.24
 ./build.sh
 ```

 ### Downloading the DSK (Druid Standalone Kit)

-[Download](http://static.druid.io/artifacts/releases/druid-services-0.6.23-bin.tar.gz) a stand-alone tarball and run it:
+[Download](http://static.druid.io/artifacts/releases/druid-services-0.6.24-bin.tar.gz) a stand-alone tarball and run it:

 ``` bash
 tar -xzf druid-services-0.X.X-bin.tar.gz
--- a/docs/content/Modules.md
+++ b/docs/content/Modules.md
@ -158,7 +158,7 @@ DruidBinders.queryRunnerFactoryBinder(binder)

 The first one binds the SegmentMetadataQueryQueryToolChest for usage when a SegmentMetadataQuery is used.  The second one does the same thing but for the QueryRunnerFactory instead.

-#### Adding new Jersey resources
+### Adding new Jersey resources

 Adding new Jersey resources to a module requires calling the following code to bind the resource in the module:

--- a/docs/content/Realtime.md
+++ b/docs/content/Realtime.md
@ -27,7 +27,7 @@ druid.host=localhost
 druid.service=realtime
 druid.port=8083

-druid.extensions.coordinates=["io.druid.extensions:druid-kafka-seven:0.6.23"]
+druid.extensions.coordinates=["io.druid.extensions:druid-kafka-seven:0.6.24"]


 druid.zk.service.host=localhost
@ -49,7 +49,7 @@ The realtime module uses several of the default modules in [Configuration](Confi
 |Property|Description|Default|
 |--------|-----------|-------|
 |`druid.realtime.specFile`|The file with realtime specifications in it.|none|
-|`druid.publish.type`|Choices:noop, db. After a real-time node completes building a segment after the window period, what does it do with it? For true handoff to occur, this should be set to "db".|noop|
+|`druid.publish.type`|Choices:noop, db. After a real-time node completes building a segment after the window period, what does it do with it? For true handoff to occur, this should be set to "db".|db|

 ### Realtime "specFile"

--- a/docs/content/SearchQuery.md
+++ b/docs/content/SearchQuery.md
@ -3,26 +3,27 @@ layout: doc_page
 ---
 A search query returns dimension values that match the search specification.

-    <code>{
-      "queryType": "search",
-      "dataSource": "sample_datasource",
-      "granularity": "day",
-      "searchDimensions": [
-        "dim1",
-        "dim2"
-      ],
-      "query": {
-        "type": "insensitive_contains",
-        "value": "Ke"
-      },
-      "sort" : {
-        "type": "lexicographic"
-      },
-      "intervals": [
-        "2013-01-01T00:00:00.000/2013-01-03T00:00:00.000"
-      ]
-    }
-    </code>
+```json
+{
+  "queryType": "search",
+  "dataSource": "sample_datasource",
+  "granularity": "day",
+  "searchDimensions": [
+    "dim1",
+    "dim2"
+  ],
+  "query": {
+    "type": "insensitive_contains",
+    "value": "Ke"
+  },
+  "sort" : {
+    "type": "lexicographic"
+  },
+  "intervals": [
+    "2013-01-01T00:00:00.000/2013-01-03T00:00:00.000"
+  ]
+}
+```

 There are several main parts to a search query:

@ -40,32 +41,33 @@ There are several main parts to a search query:

 The format of the result is:

-    <code>[
+```json
+[
+  {
+    "timestamp": "2012-01-01T00:00:00.000Z",
+    "result": [
      {
-        "timestamp": "2012-01-01T00:00:00.000Z",
-        "result": [
-          {
-            "dimension": "dim1",
-            "value": "Ke$ha"
-          },
-          {
-            "dimension": "dim2",
-            "value": "Ke$haForPresident"
-          }
-        ]
+        "dimension": "dim1",
+        "value": "Ke$ha"
      },
      {
-        "timestamp": "2012-01-02T00:00:00.000Z",
-        "result": [
-          {
-            "dimension": "dim1",
-            "value": "SomethingThatContainsKe"
-          },
-          {
-            "dimension": "dim2",
-            "value": "SomethingElseThatContainsKe"
-          }      
-        ]
+        "dimension": "dim2",
+        "value": "Ke$haForPresident"
      }
    ]
-    </code>
+  },
+  {
+    "timestamp": "2012-01-02T00:00:00.000Z",
+    "result": [
+      {
+        "dimension": "dim1",
+        "value": "SomethingThatContainsKe"
+      },
+      {
+        "dimension": "dim2",
+        "value": "SomethingElseThatContainsKe"
+      }
+    ]
+  }
+]
+```
--- a/docs/content/Tasks.md
+++ b/docs/content/Tasks.md
@ -73,11 +73,23 @@ The Hadoop Index Task is used to index larger data sets that require the paralle
 |--------|-----------|---------|
 |type|The task type, this should always be "index_hadoop".|yes|
 |config|A Hadoop Index Config. See [Batch Ingestion](Batch-ingestion.html)|yes|
-|hadoopCoordinates|The Maven <groupId>:<artifactId>:<version> of Hadoop to use. The default is "org.apache.hadoop:hadoop-core:1.0.3".|no|
+|hadoopCoordinates|The Maven \<groupId\>:\<artifactId\>:\<version\> of Hadoop to use. The default is "org.apache.hadoop:hadoop-core:1.0.3".|no|


 The Hadoop Index Config submitted as part of an Hadoop Index Task is identical to the Hadoop Index Config used by the `HadoopBatchIndexer` except that three fields must be omitted: `segmentOutputPath`, `workingPath`, `updaterJobSpec`. The Indexing Service takes care of setting these fields internally.

+##### Using your own Hadoop distribution
+
+Druid is compiled against Apache hadoop-core 1.0.3. However, if you happen to use a different flavor of hadoop that is API compatible with hadoop-core 1.0.3, you should only have to change the hadoopCoordinates property to point to the maven artifact used by your distribution.
+
+##### Resolving dependency conflicts running HadoopIndexTask
+
+Currently, the HadoopIndexTask creates a single classpath to run the HadoopDruidIndexerJob, which can lead to version conflicts between various dependencies of Druid, extension modules, and Hadoop's own dependencies.
+
+The Hadoop index task will put Druid's dependencies first on the classpath, followed by any extensions dependencies, and any Hadoop dependencies last.
+
+If you are having trouble with any extensions in HadoopIndexTask, it may be the case that Druid, or one of its dependencies, depends on a different version of a library than what you are using as part of your extensions, but Druid's version overrides the one in your extension. In that case you probably want to build your own Druid version and override the offending library by adding an explicit dependency to the pom.xml of each druid sub-module that depends on it.
+
 #### Realtime Index Task

 The indexing service can also run real-time tasks. These tasks effectively transform a middle manager into a real-time node. We introduced real-time tasks as a way to programmatically add new real-time data sources without needing to manually add nodes. The grammar for the real-time task is as follows:
--- a/docs/content/Tutorial:-A-First-Look-at-Druid.md
+++ b/docs/content/Tutorial:-A-First-Look-at-Druid.md
@ -47,7 +47,7 @@ There are two ways to setup Druid: download a tarball, or [Build From Source](Bu

 ### Download a Tarball

-We've built a tarball that contains everything you'll need. You'll find it [here](http://static.druid.io/artifacts/releases/druid-services-0.6.23-bin.tar.gz). Download this file to a directory of your choosing.
+We've built a tarball that contains everything you'll need. You'll find it [here](http://static.druid.io/artifacts/releases/druid-services-0.6.24-bin.tar.gz). Download this file to a directory of your choosing.

 You can extract the awesomeness within by issuing:

@ -58,7 +58,7 @@ tar -zxvf druid-services-*-bin.tar.gz
 Not too lost so far right? That's great! If you cd into the directory:

 ```
-cd druid-services-0.6.23
+cd druid-services-0.6.24
 ```

 You should see a bunch of files:
--- a/docs/content/Tutorial:-Loading-Your-Data-Part-2.md
+++ b/docs/content/Tutorial:-Loading-Your-Data-Part-2.md
@ -42,7 +42,7 @@ With real-world data, we recommend having a message bus such as [Apache Kafka](h

 #### Setting up Kafka

-[KafkaFirehoseFactory](https://github.com/metamx/druid/blob/druid-0.6.23/realtime/src/main/java/com/metamx/druid/realtime/firehose/KafkaFirehoseFactory.java) is how druid communicates with Kafka. Using this [Firehose](Firehose.html) with the right configuration, we can import data into Druid in real-time without writing any code. To load data to a real-time node via Kafka, we'll first need to initialize Zookeeper and Kafka, and then configure and initialize a [Realtime](Realtime.html) node.
+[KafkaFirehoseFactory](https://github.com/metamx/druid/blob/druid-0.6.24/realtime/src/main/java/com/metamx/druid/realtime/firehose/KafkaFirehoseFactory.java) is how druid communicates with Kafka. Using this [Firehose](Firehose.html) with the right configuration, we can import data into Druid in real-time without writing any code. To load data to a real-time node via Kafka, we'll first need to initialize Zookeeper and Kafka, and then configure and initialize a [Realtime](Realtime.html) node.

 Instructions for booting a Zookeeper and then Kafka cluster are available [here](http://kafka.apache.org/07/quickstart.html).

--- a/docs/content/Tutorial:-The-Druid-Cluster.md
+++ b/docs/content/Tutorial:-The-Druid-Cluster.md
@ -11,7 +11,7 @@ In this tutorial, we will set up other types of Druid nodes as well as and exter

 If you followed the first tutorial, you should already have Druid downloaded. If not, let's go back and do that first.

-You can download the latest version of druid [here](http://static.druid.io/artifacts/releases/druid-services-0.6.23-bin.tar.gz)
+You can download the latest version of druid [here](http://static.druid.io/artifacts/releases/druid-services-0.6.24-bin.tar.gz)

 and untar the contents within by issuing:

@ -147,7 +147,7 @@ druid.port=8081

 druid.zk.service.host=localhost

-druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.23"]
+druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.24"]

 # Dummy read only AWS account (used to download example data)
 druid.s3.secretKey=QyyfVZ7llSiRg6Qcrql1eEUG7buFpAK6T6engr1b
@ -237,11 +237,15 @@ druid.port=8083

 druid.zk.service.host=localhost

-druid.extensions.coordinates=["io.druid.extensions:druid-examples:0.6.23-SNAPSHOT"]
+druid.extensions.coordinates=["io.druid.extensions:druid-examples:0.6.24","io.druid.extensions:druid-kafka-seven:0.6.24"]

-druid.db.connector.connectURI=jdbc\:mysql\://localhost\:3306/druid
-druid.db.connector.user=druid
-druid.db.connector.password=diurd
+# Change this config to db to hand off to the rest of the Druid cluster
+druid.publish.type=noop
+
+# These configs are only required for real hand off
+# druid.db.connector.connectURI=jdbc\:mysql\://localhost\:3306/druid
+# druid.db.connector.user=druid
+# druid.db.connector.password=diurd

 druid.processing.buffer.sizeBytes=10000000
 ```
--- a/docs/content/Tutorial:-Webstream.md
+++ b/docs/content/Tutorial:-Webstream.md
@ -37,7 +37,7 @@ There are two ways to setup Druid: download a tarball, or [Build From Source](Bu

 h3. Download a Tarball

-We've built a tarball that contains everything you'll need. You'll find it [here](http://static.druid.io/artifacts/releases/druid-services-0.6.23-bin.tar.gz)
+We've built a tarball that contains everything you'll need. You'll find it [here](http://static.druid.io/artifacts/releases/druid-services-0.6.24-bin.tar.gz)
 Download this file to a directory of your choosing.
 You can extract the awesomeness within by issuing:

@ -48,7 +48,7 @@ tar zxvf druid-services-*-bin.tar.gz
 Not too lost so far right? That's great! If you cd into the directory:

 ```
-cd druid-services-0.6.23
+cd druid-services-0.6.24
 ```

 You should see a bunch of files:
--- a/docs/content/Twitter-Tutorial.textile
+++ b/docs/content/Twitter-Tutorial.textile
@ -9,7 +9,7 @@ There are two ways to setup Druid: download a tarball, or build it from source.

 h3. Download a Tarball

-We've built a tarball that contains everything you'll need. You'll find it "here":http://static.druid.io/artifacts/releases/druid-services-0.6.23-bin.tar.gz.
+We've built a tarball that contains everything you'll need. You'll find it "here":http://static.druid.io/artifacts/releases/druid-services-0.6.24-bin.tar.gz.
 Download this bad boy to a directory of your choosing.

 You can extract the awesomeness within by issuing:
--- a/docs/content/toc.textile
+++ b/docs/content/toc.textile
@ -8,7 +8,6 @@ h1. Contents
 * "Concepts and Terminology":./Concepts-and-Terminology.html

 h2. Getting Started
-* "Concepts and Terminology":./Concepts-and-Terminology.html
 * "Tutorial: A First Look at Druid":./Tutorial:-A-First-Look-at-Druid.html
 * "Tutorial: The Druid Cluster":./Tutorial:-The-Druid-Cluster.html
 * "Tutorial: Loading Your Data Part 1":./Tutorial:-Loading-Your-Data-Part-1.html
@ -37,11 +36,12 @@ h2. Querying
 ** "Aggregations":./Aggregations.html
 ** "Post Aggregations":./Post-aggregations.html
 ** "Granularities":./Granularities.html
+** "DimensionSpecs":./DimensionSpecs.html
 * Query Types
 ** "GroupByQuery":./GroupByQuery.html
 *** "OrderBy":./OrderBy.html
 *** "Having":./Having.html
-** "SearchQuery":./Having.html
+** "SearchQuery":./SearchQuery.html
 *** "SearchQuerySpec":./SearchQuerySpec.html
 ** "SegmentMetadataQuery":./SegmentMetadataQuery.html
 ** "TimeBoundaryQuery":./TimeBoundaryQuery.html
--- a/examples/config/historical/runtime.properties
+++ b/examples/config/historical/runtime.properties
@ -4,7 +4,7 @@ druid.port=8081

 druid.zk.service.host=localhost

-druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.23"]
+druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.24"]

 # Dummy read only AWS account (used to download example data)
 druid.s3.secretKey=QyyfVZ7llSiRg6Qcrql1eEUG7buFpAK6T6engr1b
--- a/examples/config/realtime/runtime.properties
+++ b/examples/config/realtime/runtime.properties
@ -4,10 +4,14 @@ druid.port=8083

 druid.zk.service.host=localhost

-druid.extensions.coordinates=["io.druid.extensions:druid-examples:0.6.23","io.druid.extensions:druid-kafka-seven:0.6.23","io.druid.extensions:druid-rabbitmq:0.6.23"]
+druid.extensions.coordinates=["io.druid.extensions:druid-examples:0.6.24","io.druid.extensions:druid-kafka-seven:0.6.24","io.druid.extensions:druid-rabbitmq:0.6.24"]

-druid.db.connector.connectURI=jdbc\:mysql\://localhost\:3306/druid
-druid.db.connector.user=druid
-druid.db.connector.password=diurd
+# Change this config to db to hand off to the rest of the Druid cluster
+druid.publish.type=noop
+
+# These configs are only required for real hand off
+# druid.db.connector.connectURI=jdbc\:mysql\://localhost\:3306/druid
+# druid.db.connector.user=druid
+# druid.db.connector.password=diurd

 druid.processing.buffer.sizeBytes=10000000
--- a/examples/pom.xml
+++ b/examples/pom.xml
@ -28,7 +28,7 @@
    <parent>
        <groupId>io.druid</groupId>
        <artifactId>druid</artifactId>
-        <version>0.6.24-SNAPSHOT</version>
+        <version>0.6.25-SNAPSHOT</version>
    </parent>

    <dependencies>
--- a/hdfs-storage/pom.xml
+++ b/hdfs-storage/pom.xml
@ -28,7 +28,7 @@
    <parent>
        <groupId>io.druid</groupId>
        <artifactId>druid</artifactId>
-        <version>0.6.24-SNAPSHOT</version>
+        <version>0.6.25-SNAPSHOT</version>
    </parent>

    <dependencies>
--- a/indexing-hadoop/pom.xml
+++ b/indexing-hadoop/pom.xml
@ -28,7 +28,7 @@
    <parent>
        <groupId>io.druid</groupId>
        <artifactId>druid</artifactId>
-        <version>0.6.24-SNAPSHOT</version>
+        <version>0.6.25-SNAPSHOT</version>
    </parent>

    <dependencies>
--- a/indexing-service/pom.xml
+++ b/indexing-service/pom.xml
@ -28,7 +28,7 @@
    <parent>
        <groupId>io.druid</groupId>
        <artifactId>druid</artifactId>
-        <version>0.6.24-SNAPSHOT</version>
+        <version>0.6.25-SNAPSHOT</version>
    </parent>

    <dependencies>
--- a/kafka-eight/pom.xml
+++ b/kafka-eight/pom.xml
@ -28,7 +28,7 @@
    <parent>
        <groupId>io.druid</groupId>
        <artifactId>druid</artifactId>
-        <version>0.6.24-SNAPSHOT</version>
+        <version>0.6.25-SNAPSHOT</version>
    </parent>

    <dependencies>
--- a/kafka-seven/pom.xml
+++ b/kafka-seven/pom.xml
@ -28,7 +28,7 @@
    <parent>
        <groupId>io.druid</groupId>
        <artifactId>druid</artifactId>
-        <version>0.6.24-SNAPSHOT</version>
+        <version>0.6.25-SNAPSHOT</version>
    </parent>

    <dependencies>
--- a/pom.xml
+++ b/pom.xml
@ -23,7 +23,7 @@
    <groupId>io.druid</groupId>
    <artifactId>druid</artifactId>
    <packaging>pom</packaging>
-    <version>0.6.24-SNAPSHOT</version>
+    <version>0.6.25-SNAPSHOT</version>
    <name>druid</name>
    <description>druid</description>
    <scm>
--- a/processing/pom.xml
+++ b/processing/pom.xml
@ -28,7 +28,7 @@
    <parent>
        <groupId>io.druid</groupId>
        <artifactId>druid</artifactId>
-        <version>0.6.24-SNAPSHOT</version>
+        <version>0.6.25-SNAPSHOT</version>
    </parent>

    <dependencies>
--- a/processing/src/main/java/io/druid/query/aggregation/post/JavaScriptPostAggregator.java
+++ b/processing/src/main/java/io/druid/query/aggregation/post/JavaScriptPostAggregator.java
@ -57,7 +57,7 @@ public class JavaScriptPostAggregator implements PostAggregator

    final ScriptableObject scope = context.initStandardObjects();

-    final org.mozilla.javascript.Function fn = context.compileFunction(scope, function, "aggregate", 1, null);
+    final org.mozilla.javascript.Function fn = context.compileFunction(scope, function, "fn", 1, null);
    Context.exit();


--- a/processing/src/main/java/io/druid/query/extraction/DimExtractionFn.java
+++ b/processing/src/main/java/io/druid/query/extraction/DimExtractionFn.java
@ -29,7 +29,8 @@ import com.fasterxml.jackson.annotation.JsonTypeInfo;
    @JsonSubTypes.Type(name = "time", value = TimeDimExtractionFn.class),
    @JsonSubTypes.Type(name = "regex", value = RegexDimExtractionFn.class),
    @JsonSubTypes.Type(name = "partial", value = PartialDimExtractionFn.class),
-    @JsonSubTypes.Type(name = "searchQuery", value = SearchQuerySpecDimExtractionFn.class)
+    @JsonSubTypes.Type(name = "searchQuery", value = SearchQuerySpecDimExtractionFn.class),
+    @JsonSubTypes.Type(name = "javascript", value = JavascriptDimExtractionFn.class)
 })
 public interface DimExtractionFn
 {
--- a/processing/src/main/java/io/druid/query/extraction/JavascriptDimExtractionFn.java
+++ b/processing/src/main/java/io/druid/query/extraction/JavascriptDimExtractionFn.java
@ -0,0 +1,94 @@
+/*
+ * Druid - a distributed column store.
+ * Copyright (C) 2012, 2013  Metamarkets Group Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+package io.druid.query.extraction;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.google.common.base.Function;
+import org.mozilla.javascript.Context;
+import org.mozilla.javascript.ContextFactory;
+import org.mozilla.javascript.ScriptableObject;
+
+import java.nio.ByteBuffer;
+
+public class JavascriptDimExtractionFn implements DimExtractionFn
+{
+  private static Function<String, String> compile(String function) {
+    final ContextFactory contextFactory = ContextFactory.getGlobal();
+    final Context context = contextFactory.enterContext();
+    context.setOptimizationLevel(9);
+
+    final ScriptableObject scope = context.initStandardObjects();
+
+    final org.mozilla.javascript.Function fn = context.compileFunction(scope, function, "fn", 1, null);
+    Context.exit();
+
+
+    return new Function<String, String>()
+    {
+      public String apply(String input)
+      {
+        // ideally we need a close() function to discard the context once it is not used anymore
+        Context cx = Context.getCurrentContext();
+        if (cx == null) {
+          cx = contextFactory.enterContext();
+        }
+
+        return Context.toString(fn.call(cx, scope, scope, new String[]{input}));
+      }
+    };
+  }
+
+  private static final byte CACHE_TYPE_ID = 0x4;
+
+  private final String function;
+  private final Function<String, String> fn;
+
+  @JsonCreator
+  public JavascriptDimExtractionFn(
+      @JsonProperty("function") String function
+  )
+  {
+    this.function = function;
+    this.fn = compile(function);
+  }
+
+  @JsonProperty
+  public String getFunction()
+  {
+    return function;
+  }
+
+  @Override
+  public byte[] getCacheKey()
+  {
+    byte[] bytes = function.getBytes();
+    return ByteBuffer.allocate(1 + bytes.length)
+                     .put(CACHE_TYPE_ID)
+                     .put(bytes)
+                     .array();
+  }
+
+  @Override
+  public String apply(String dimValue)
+  {
+    return fn.apply(dimValue);
+  }
+}
--- a/processing/src/test/java/io/druid/query/extraction/extraction/JavascriptDimExtractionFnTest.java
+++ b/processing/src/test/java/io/druid/query/extraction/extraction/JavascriptDimExtractionFnTest.java
@ -0,0 +1,273 @@
+/*
+ * Druid - a distributed column store.
+ * Copyright (C) 2012, 2013  Metamarkets Group Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+package io.druid.query.extraction.extraction;
+
+import com.google.common.collect.Iterators;
+import io.druid.query.extraction.DimExtractionFn;
+import io.druid.query.extraction.JavascriptDimExtractionFn;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.Iterator;
+
+public class JavascriptDimExtractionFnTest
+{
+  private static final String[] testStrings = {
+      "Quito",
+      "Calgary",
+      "Tokyo",
+      "Stockholm",
+      "Vancouver",
+      "Pretoria",
+      "Wellington",
+      "Ontario"
+  };
+
+  @Test
+  public void testJavascriptSubstring()
+  {
+    String function = "function(str) { return str.substring(0,3); }";
+    DimExtractionFn dimExtractionFn = new JavascriptDimExtractionFn(function);
+
+    for (String str : testStrings) {
+      String res = dimExtractionFn.apply(str);
+      Assert.assertEquals(str.substring(0, 3), res);
+    }
+  }
+
+  @Test
+  public void testJavascriptRegex()
+  {
+    String function = "function(str) { return str.replace(/[aeiou]/g, ''); }";
+    DimExtractionFn dimExtractionFn = new JavascriptDimExtractionFn(function);
+
+    Iterator it = Iterators.forArray("Qt", "Clgry", "Tky", "Stckhlm", "Vncvr", "Prtr", "Wllngtn", "Ontr");
+    for (String str : testStrings) {
+      String res = dimExtractionFn.apply(str);
+      Assert.assertEquals(it.next(), res);
+    }
+  }
+
+  @Test
+  public void testJavaScriptPorterStemmer()
+  {
+    // JavaScript porter stemmer adapted from
+    // https://github.com/kristopolous/Porter-Stemmer/blob/e990a8d456510571d1ef9ef923d2a30a94679e13/PorterStemmer1980.js
+    String function = "function(w) {"
+                      + "var step2list = {\n"
+                      + "      \"ational\" : \"ate\",\n"
+                      + "      \"tional\" : \"tion\",\n"
+                      + "      \"enci\" : \"ence\",\n"
+                      + "      \"anci\" : \"ance\",\n"
+                      + "      \"izer\" : \"ize\",\n"
+                      + "      \"bli\" : \"ble\",\n"
+                      + "      \"alli\" : \"al\",\n"
+                      + "      \"entli\" : \"ent\",\n"
+                      + "      \"eli\" : \"e\",\n"
+                      + "      \"ousli\" : \"ous\",\n"
+                      + "      \"ization\" : \"ize\",\n"
+                      + "      \"ation\" : \"ate\",\n"
+                      + "      \"ator\" : \"ate\",\n"
+                      + "      \"alism\" : \"al\",\n"
+                      + "      \"iveness\" : \"ive\",\n"
+                      + "      \"fulness\" : \"ful\",\n"
+                      + "      \"ousness\" : \"ous\",\n"
+                      + "      \"aliti\" : \"al\",\n"
+                      + "      \"iviti\" : \"ive\",\n"
+                      + "      \"biliti\" : \"ble\",\n"
+                      + "      \"logi\" : \"log\"\n"
+                      + "    },\n"
+                      + "\n"
+                      + "    step3list = {\n"
+                      + "      \"icate\" : \"ic\",\n"
+                      + "      \"ative\" : \"\",\n"
+                      + "      \"alize\" : \"al\",\n"
+                      + "      \"iciti\" : \"ic\",\n"
+                      + "      \"ical\" : \"ic\",\n"
+                      + "      \"ful\" : \"\",\n"
+                      + "      \"ness\" : \"\"\n"
+                      + "    },\n"
+                      + "\n"
+                      + "    c = \"[^aeiou]\",          // consonant\n"
+                      + "    v = \"[aeiouy]\",          // vowel\n"
+                      + "    C = c + \"[^aeiouy]*\",    // consonant sequence\n"
+                      + "    V = v + \"[aeiou]*\",      // vowel sequence\n"
+                      + "\n"
+                      + "    mgr0 = \"^(\" + C + \")?\" + V + C,               // [C]VC... is m>0\n"
+                      + "    meq1 = \"^(\" + C + \")?\" + V + C + \"(\" + V + \")?$\",  // [C]VC[V] is m=1\n"
+                      + "    mgr1 = \"^(\" + C + \")?\" + V + C + V + C,       // [C]VCVC... is m>1\n"
+                      + "    s_v = \"^(\" + C + \")?\" + v;     "
+                      + ""
+                      + "var\n"
+                      + "      stem,\n"
+                      + "      suffix,\n"
+                      + "      firstch,\n"
+                      + "      re,\n"
+                      + "      re2,\n"
+                      + "      re3,\n"
+                      + "      re4,\n"
+                      + "      debugFunction,\n"
+                      + "      origword = w;\n"
+                      + "\n"
+                      + "\n"
+                      + "    if (w.length < 3) { return w; }\n"
+                      + "\n"
+                      + "    firstch = w.substr(0,1);\n"
+                      + "    if (firstch == \"y\") {\n"
+                      + "      w = firstch.toUpperCase() + w.substr(1);\n"
+                      + "    }\n"
+                      + "\n"
+                      + "    // Step 1a\n"
+                      + "    re = /^(.+?)(ss|i)es$/;\n"
+                      + "    re2 = /^(.+?)([^s])s$/;\n"
+                      + "\n"
+                      + "    if (re.test(w)) { \n"
+                      + "      w = w.replace(re,\"$1$2\"); \n"
+                      + "\n"
+                      + "    } else if (re2.test(w)) {\n"
+                      + "      w = w.replace(re2,\"$1$2\"); \n"
+                      + "    }\n"
+                      + "\n"
+                      + "    // Step 1b\n"
+                      + "    re = /^(.+?)eed$/;\n"
+                      + "    re2 = /^(.+?)(ed|ing)$/;\n"
+                      + "    if (re.test(w)) {\n"
+                      + "      var fp = re.exec(w);\n"
+                      + "      re = new RegExp(mgr0);\n"
+                      + "      if (re.test(fp[1])) {\n"
+                      + "        re = /.$/;\n"
+                      + "        w = w.replace(re,\"\");\n"
+                      + "      }\n"
+                      + "    } else if (re2.test(w)) {\n"
+                      + "      var fp = re2.exec(w);\n"
+                      + "      stem = fp[1];\n"
+                      + "      re2 = new RegExp(s_v);\n"
+                      + "      if (re2.test(stem)) {\n"
+                      + "        w = stem;\n"
+                      + "\n"
+                      + "        re2 = /(at|bl|iz)$/;\n"
+                      + "        re3 = new RegExp(\"([^aeiouylsz])\\\\1$\");\n"
+                      + "        re4 = new RegExp(\"^\" + C + v + \"[^aeiouwxy]$\");\n"
+                      + "\n"
+                      + "        if (re2.test(w)) { \n"
+                      + "          w = w + \"e\"; \n"
+                      + "\n"
+                      + "        } else if (re3.test(w)) { \n"
+                      + "          re = /.$/; \n"
+                      + "          w = w.replace(re,\"\"); \n"
+                      + "\n"
+                      + "        } else if (re4.test(w)) { \n"
+                      + "          w = w + \"e\"; \n"
+                      + "        }\n"
+                      + "      }\n"
+                      + "    }\n"
+                      + "\n"
+                      + "    // Step 1c\n"
+                      + "    re = new RegExp(\"^(.*\" + v + \".*)y$\");\n"
+                      + "    if (re.test(w)) {\n"
+                      + "      var fp = re.exec(w);\n"
+                      + "      stem = fp[1];\n"
+                      + "      w = stem + \"i\";\n"
+                      + "    }\n"
+                      + "\n"
+                      + "    // Step 2\n"
+                      + "    re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/;\n"
+                      + "    if (re.test(w)) {\n"
+                      + "      var fp = re.exec(w);\n"
+                      + "      stem = fp[1];\n"
+                      + "      suffix = fp[2];\n"
+                      + "      re = new RegExp(mgr0);\n"
+                      + "      if (re.test(stem)) {\n"
+                      + "        w = stem + step2list[suffix];\n"
+                      + "      }\n"
+                      + "    }\n"
+                      + "\n"
+                      + "    // Step 3\n"
+                      + "    re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/;\n"
+                      + "    if (re.test(w)) {\n"
+                      + "      var fp = re.exec(w);\n"
+                      + "      stem = fp[1];\n"
+                      + "      suffix = fp[2];\n"
+                      + "      re = new RegExp(mgr0);\n"
+                      + "      if (re.test(stem)) {\n"
+                      + "        w = stem + step3list[suffix];\n"
+                      + "      }\n"
+                      + "    }\n"
+                      + "\n"
+                      + "    // Step 4\n"
+                      + "    re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/;\n"
+                      + "    re2 = /^(.+?)(s|t)(ion)$/;\n"
+                      + "    if (re.test(w)) {\n"
+                      + "      var fp = re.exec(w);\n"
+                      + "      stem = fp[1];\n"
+                      + "      re = new RegExp(mgr1);\n"
+                      + "      if (re.test(stem)) {\n"
+                      + "        w = stem;\n"
+                      + "      }\n"
+                      + "    } else if (re2.test(w)) {\n"
+                      + "      var fp = re2.exec(w);\n"
+                      + "      stem = fp[1] + fp[2];\n"
+                      + "      re2 = new RegExp(mgr1);\n"
+                      + "      if (re2.test(stem)) {\n"
+                      + "        w = stem;\n"
+                      + "      }\n"
+                      + "    }\n"
+                      + "\n"
+                      + "    // Step 5\n"
+                      + "    re = /^(.+?)e$/;\n"
+                      + "    if (re.test(w)) {\n"
+                      + "      var fp = re.exec(w);\n"
+                      + "      stem = fp[1];\n"
+                      + "      re = new RegExp(mgr1);\n"
+                      + "      re2 = new RegExp(meq1);\n"
+                      + "      re3 = new RegExp(\"^\" + C + v + \"[^aeiouwxy]$\");\n"
+                      + "      if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) {\n"
+                      + "        w = stem;\n"
+                      + "      }\n"
+                      + "    }\n"
+                      + "\n"
+                      + "    re = /ll$/;\n"
+                      + "    re2 = new RegExp(mgr1);\n"
+                      + "    if (re.test(w) && re2.test(w)) {\n"
+                      + "      re = /.$/;\n"
+                      + "      w = w.replace(re,\"\");\n"
+                      + "    }\n"
+                      + "\n"
+                      + "    // and turn initial Y back to y\n"
+                      + "    if (firstch == \"y\") {\n"
+                      + "      w = firstch.toLowerCase() + w.substr(1);\n"
+                      + "    }\n"
+                      + "\n"
+                      + "\n"
+                      + "    return w;"
+                      + ""
+                      + "}";
+
+    DimExtractionFn dimExtractionFn = new JavascriptDimExtractionFn(function);
+
+    Iterator<String> inputs = Iterators.forArray("introducing", "exploratory", "analytics", "on", "large", "datasets");
+    Iterator<String> it = Iterators.forArray("introduc", "exploratori", "analyt", "on", "larg", "dataset");
+
+    while(inputs.hasNext()) {
+      String res = dimExtractionFn.apply(inputs.next());
+      Assert.assertEquals(it.next(), res);
+    }
+  }
+}
--- a/s3-extensions/pom.xml
+++ b/s3-extensions/pom.xml
@ -28,7 +28,7 @@
    <parent>
        <groupId>io.druid</groupId>
        <artifactId>druid</artifactId>
-        <version>0.6.24-SNAPSHOT</version>
+        <version>0.6.25-SNAPSHOT</version>
    </parent>

    <dependencies>
--- a/server/pom.xml
+++ b/server/pom.xml
@ -28,7 +28,7 @@
    <parent>
        <groupId>io.druid</groupId>
        <artifactId>druid</artifactId>
-        <version>0.6.24-SNAPSHOT</version>
+        <version>0.6.25-SNAPSHOT</version>
    </parent>

    <dependencies>
--- a/services/pom.xml
+++ b/services/pom.xml
@ -27,7 +27,7 @@
    <parent>
        <groupId>io.druid</groupId>
        <artifactId>druid</artifactId>
-        <version>0.6.24-SNAPSHOT</version>
+        <version>0.6.25-SNAPSHOT</version>
    </parent>

    <dependencies>
--- a/services/src/main/java/io/druid/cli/CliBroker.java
+++ b/services/src/main/java/io/druid/cli/CliBroker.java
@ -53,7 +53,7 @@ import java.util.List;
 */
@Command(
    name = "broker",
-    description = "Runs a broker node, see http://druid.io/docs/0.6.23/Broker.html for a description"
+    description = "Runs a broker node, see http://druid.io/docs/0.6.24/Broker.html for a description"
 )
 public class CliBroker extends ServerRunnable
 {
--- a/services/src/main/java/io/druid/cli/CliCoordinator.java
+++ b/services/src/main/java/io/druid/cli/CliCoordinator.java
@ -63,7 +63,7 @@ import java.util.List;
 */
@Command(
    name = "coordinator",
-    description = "Runs the Coordinator, see http://druid.io/docs/0.6.23/Coordinator.html for a description."
+    description = "Runs the Coordinator, see http://druid.io/docs/0.6.24/Coordinator.html for a description."
 )
 public class CliCoordinator extends ServerRunnable
 {
--- a/services/src/main/java/io/druid/cli/CliHadoopIndexer.java
+++ b/services/src/main/java/io/druid/cli/CliHadoopIndexer.java
@ -41,7 +41,7 @@ import java.util.List;
 */
@Command(
    name = "hadoop",
-    description = "Runs the batch Hadoop Druid Indexer, see http://druid.io/docs/0.6.23/Batch-ingestion.html for a description."
+    description = "Runs the batch Hadoop Druid Indexer, see http://druid.io/docs/0.6.24/Batch-ingestion.html for a description."
 )
 public class CliHadoopIndexer implements Runnable
 {
--- a/services/src/main/java/io/druid/cli/CliHistorical.java
+++ b/services/src/main/java/io/druid/cli/CliHistorical.java
@ -42,7 +42,7 @@ import java.util.List;
 */
@Command(
    name = "historical",
-    description = "Runs a Historical node, see http://druid.io/docs/0.6.23/Historical.html for a description"
+    description = "Runs a Historical node, see http://druid.io/docs/0.6.24/Historical.html for a description"
 )
 public class CliHistorical extends ServerRunnable
 {
--- a/services/src/main/java/io/druid/cli/CliOverlord.java
+++ b/services/src/main/java/io/druid/cli/CliOverlord.java
@ -93,7 +93,7 @@ import java.util.List;
 */
@Command(
    name = "overlord",
-    description = "Runs an Overlord node, see http://druid.io/docs/0.6.23/Indexing-Service.html for a description"
+    description = "Runs an Overlord node, see http://druid.io/docs/0.6.24/Indexing-Service.html for a description"
 )
 public class CliOverlord extends ServerRunnable
 {
--- a/services/src/main/java/io/druid/cli/CliRealtime.java
+++ b/services/src/main/java/io/druid/cli/CliRealtime.java
@ -30,7 +30,7 @@ import java.util.List;
 */
@Command(
    name = "realtime",
-    description = "Runs a realtime node, see http://druid.io/docs/0.6.23/Realtime.html for a description"
+    description = "Runs a realtime node, see http://druid.io/docs/0.6.24/Realtime.html for a description"
 )
 public class CliRealtime extends ServerRunnable
 {
--- a/services/src/main/java/io/druid/cli/CliRealtimeExample.java
+++ b/services/src/main/java/io/druid/cli/CliRealtimeExample.java
@ -42,7 +42,7 @@ import java.util.concurrent.Executor;
 */
@Command(
    name = "realtime",
-    description = "Runs a standalone realtime node for examples, see http://druid.io/docs/0.6.23/Realtime.html for a description"
+    description = "Runs a standalone realtime node for examples, see http://druid.io/docs/0.6.24/Realtime.html for a description"
 )
 public class CliRealtimeExample extends ServerRunnable
 {
--- a/services/src/main/java/io/druid/guice/RealtimeModule.java
+++ b/services/src/main/java/io/druid/guice/RealtimeModule.java
@ -48,13 +48,13 @@ public class RealtimeModule implements Module
        binder,
        "druid.publish.type",
        Key.get(SegmentPublisher.class),
-        Key.get(NoopSegmentPublisher.class)
+        Key.get(DbSegmentPublisher.class)
    );
    final MapBinder<String, SegmentPublisher> publisherBinder = PolyBind.optionBinder(
        binder,
        Key.get(SegmentPublisher.class)
    );
-    publisherBinder.addBinding("db").to(DbSegmentPublisher.class);
+    publisherBinder.addBinding("noop").to(NoopSegmentPublisher.class);
    binder.bind(DbSegmentPublisher.class).in(LazySingleton.class);

    JsonConfigProvider.bind(binder, "druid.realtime", RealtimeManagerConfig.class);