Merge branch 'master' into igalDruid
2
build.sh
|
@ -30,4 +30,4 @@ echo "For examples, see: "
|
||||||
echo " "
|
echo " "
|
||||||
ls -1 examples/*/*sh
|
ls -1 examples/*/*sh
|
||||||
echo " "
|
echo " "
|
||||||
echo "See also http://druid.io/docs/0.6.24"
|
echo "See also http://druid.io/docs/0.6.26"
|
||||||
|
|
|
@ -28,7 +28,7 @@
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>io.druid</groupId>
|
<groupId>io.druid</groupId>
|
||||||
<artifactId>druid</artifactId>
|
<artifactId>druid</artifactId>
|
||||||
<version>0.6.25-SNAPSHOT</version>
|
<version>0.6.27-SNAPSHOT</version>
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
|
|
@ -28,7 +28,7 @@
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>io.druid</groupId>
|
<groupId>io.druid</groupId>
|
||||||
<artifactId>druid</artifactId>
|
<artifactId>druid</artifactId>
|
||||||
<version>0.6.25-SNAPSHOT</version>
|
<version>0.6.27-SNAPSHOT</version>
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
|
|
@ -202,7 +202,7 @@ The schema of the Hadoop Index Task contains a task "type" and a Hadoop Index Co
|
||||||
"type" : "static",
|
"type" : "static",
|
||||||
"paths" : "data.json"
|
"paths" : "data.json"
|
||||||
},
|
},
|
||||||
"targetPartitionSize" : 5000000,
|
"targetPartitionSi:qze" : 5000000,
|
||||||
"rollupSpec" : {
|
"rollupSpec" : {
|
||||||
"aggs": [{
|
"aggs": [{
|
||||||
"type" : "count",
|
"type" : "count",
|
||||||
|
|
|
@ -3,7 +3,7 @@ layout: doc_page
|
||||||
---
|
---
|
||||||
# Booting a Single Node Cluster #
|
# Booting a Single Node Cluster #
|
||||||
|
|
||||||
[Loading Your Data](Tutorial%3A-Loading-Your-Data-Part-2.html) and [All About Queries](Tutorial%3A-All-About-Queries.html) contain recipes to boot a small druid cluster on localhost. Here we will boot a small cluster on EC2. You can checkout the code, or download a tarball from [here](http://static.druid.io/artifacts/druid-services-0.6.24-bin.tar.gz).
|
[Loading Your Data](Tutorial%3A-Loading-Your-Data-Part-2.html) and [All About Queries](Tutorial%3A-All-About-Queries.html) contain recipes to boot a small druid cluster on localhost. Here we will boot a small cluster on EC2. You can checkout the code, or download a tarball from [here](http://static.druid.io/artifacts/druid-services-0.6.26-bin.tar.gz).
|
||||||
|
|
||||||
The [ec2 run script](https://github.com/metamx/druid/blob/master/examples/bin/run_ec2.sh), run_ec2.sh, is located at 'examples/bin' if you have checked out the code, or at the root of the project if you've downloaded a tarball. The scripts rely on the [Amazon EC2 API Tools](http://aws.amazon.com/developertools/351), and you will need to set three environment variables:
|
The [ec2 run script](https://github.com/metamx/druid/blob/master/examples/bin/run_ec2.sh), run_ec2.sh, is located at 'examples/bin' if you have checked out the code, or at the root of the project if you've downloaded a tarball. The scripts rely on the [Amazon EC2 API Tools](http://aws.amazon.com/developertools/351), and you will need to set three environment variables:
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,76 @@
|
||||||
|
---
|
||||||
|
layout: doc_page
|
||||||
|
---
|
||||||
|
|
||||||
|
## DimensionSpec
|
||||||
|
|
||||||
|
`DimensionSpec`s define how dimension values get transformed prior to aggregation.
|
||||||
|
|
||||||
|
### DefaultDimensionSpec
|
||||||
|
|
||||||
|
Returns dimension values as is and optionally renames renames the dimension.
|
||||||
|
|
||||||
|
```json
|
||||||
|
{ "type" : "default", "dimension" : <dimension>, "outputName": <output_name> }
|
||||||
|
```
|
||||||
|
|
||||||
|
### ExtractionDimensionSpec
|
||||||
|
|
||||||
|
Returns dimension values transformed using the given [DimExtractionFn](#toc_3)
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"type" : "extraction",
|
||||||
|
"dimension" : <dimension>,
|
||||||
|
"outputName" : <output_name>,
|
||||||
|
"dimExtractionFn" : <dim_extraction_fn>
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## DimExtractionFn
|
||||||
|
|
||||||
|
`DimExtractionFn`s define the transformation applied to each dimenion value
|
||||||
|
|
||||||
|
### RegexDimExtractionFn
|
||||||
|
|
||||||
|
Returns the first group matched by the given regular expression. If there is no match it returns the dimension value as is.
|
||||||
|
|
||||||
|
```json
|
||||||
|
{ "type" : "regex", "expr", <regular_expression> }
|
||||||
|
```
|
||||||
|
|
||||||
|
### PartialDimExtractionFn
|
||||||
|
|
||||||
|
Returns the dimension value as is if there is a match, otherwise returns null.
|
||||||
|
|
||||||
|
```json
|
||||||
|
{ "type" : "partial", "expr", <regular_expression> }
|
||||||
|
```
|
||||||
|
|
||||||
|
### SearchQuerySpecDimExtractionFn
|
||||||
|
|
||||||
|
Returns the dimension value as is if the given [SearchQuerySpec](SearchQuerySpec.html) matches, otherwise returns null.
|
||||||
|
|
||||||
|
```json
|
||||||
|
{ "type" : "searchQuery", "query" : <search_query_spec> }
|
||||||
|
```
|
||||||
|
|
||||||
|
### TimeDimExtractionFn
|
||||||
|
|
||||||
|
Parses dimension values as timestamps using the given input format, and returns them formatted using the given output format. Time formats follow the [com.ibm.icu.text.SimpleDateFormat](http://icu-project.org/apiref/icu4j/com/ibm/icu/text/SimpleDateFormat.html) format
|
||||||
|
|
||||||
|
```json
|
||||||
|
{ "type" : "time", "timeFormat" : <input_format>, "resultFormat" : <output_format> }
|
||||||
|
```
|
||||||
|
|
||||||
|
### JavascriptDimExtractionFn
|
||||||
|
|
||||||
|
Returns the dimension value as transformed by the given JavaScript function.
|
||||||
|
|
||||||
|
Example
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"type" : "javascript",
|
||||||
|
"function" : "function(str) { return str.substr(0, 3); }"
|
||||||
|
}
|
|
@ -19,13 +19,13 @@ Clone Druid and build it:
|
||||||
git clone https://github.com/metamx/druid.git druid
|
git clone https://github.com/metamx/druid.git druid
|
||||||
cd druid
|
cd druid
|
||||||
git fetch --tags
|
git fetch --tags
|
||||||
git checkout druid-0.6.24
|
git checkout druid-0.6.26
|
||||||
./build.sh
|
./build.sh
|
||||||
```
|
```
|
||||||
|
|
||||||
### Downloading the DSK (Druid Standalone Kit)
|
### Downloading the DSK (Druid Standalone Kit)
|
||||||
|
|
||||||
[Download](http://static.druid.io/artifacts/releases/druid-services-0.6.24-bin.tar.gz) a stand-alone tarball and run it:
|
[Download](http://static.druid.io/artifacts/releases/druid-services-0.6.26-bin.tar.gz) a stand-alone tarball and run it:
|
||||||
|
|
||||||
``` bash
|
``` bash
|
||||||
tar -xzf druid-services-0.X.X-bin.tar.gz
|
tar -xzf druid-services-0.X.X-bin.tar.gz
|
||||||
|
|
|
@ -32,7 +32,6 @@ druid.server.maxSize=100000000
|
||||||
|
|
||||||
druid.processing.buffer.sizeBytes=10000000
|
druid.processing.buffer.sizeBytes=10000000
|
||||||
|
|
||||||
druid.segmentCache.infoPath=/tmp/druid/segmentInfoCache
|
|
||||||
druid.segmentCache.locations=[{"path": "/tmp/druid/indexCache", "maxSize"\: 100000000}]```
|
druid.segmentCache.locations=[{"path": "/tmp/druid/indexCache", "maxSize"\: 100000000}]```
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
@ -27,7 +27,7 @@ druid.host=localhost
|
||||||
druid.service=realtime
|
druid.service=realtime
|
||||||
druid.port=8083
|
druid.port=8083
|
||||||
|
|
||||||
druid.extensions.coordinates=["io.druid.extensions:druid-kafka-seven:0.6.24"]
|
druid.extensions.coordinates=["io.druid.extensions:druid-kafka-seven:0.6.26"]
|
||||||
|
|
||||||
|
|
||||||
druid.zk.service.host=localhost
|
druid.zk.service.host=localhost
|
||||||
|
|
|
@ -3,7 +3,8 @@ layout: doc_page
|
||||||
---
|
---
|
||||||
A search query returns dimension values that match the search specification.
|
A search query returns dimension values that match the search specification.
|
||||||
|
|
||||||
<code>{
|
```json
|
||||||
|
{
|
||||||
"queryType": "search",
|
"queryType": "search",
|
||||||
"dataSource": "sample_datasource",
|
"dataSource": "sample_datasource",
|
||||||
"granularity": "day",
|
"granularity": "day",
|
||||||
|
@ -21,8 +22,8 @@ A search query returns dimension values that match the search specification.
|
||||||
"intervals": [
|
"intervals": [
|
||||||
"2013-01-01T00:00:00.000/2013-01-03T00:00:00.000"
|
"2013-01-01T00:00:00.000/2013-01-03T00:00:00.000"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
</code>
|
```
|
||||||
|
|
||||||
There are several main parts to a search query:
|
There are several main parts to a search query:
|
||||||
|
|
||||||
|
@ -40,7 +41,8 @@ There are several main parts to a search query:
|
||||||
|
|
||||||
The format of the result is:
|
The format of the result is:
|
||||||
|
|
||||||
<code>[
|
```json
|
||||||
|
[
|
||||||
{
|
{
|
||||||
"timestamp": "2012-01-01T00:00:00.000Z",
|
"timestamp": "2012-01-01T00:00:00.000Z",
|
||||||
"result": [
|
"result": [
|
||||||
|
@ -67,5 +69,5 @@ The format of the result is:
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
</code>
|
```
|
||||||
|
|
|
@ -78,6 +78,18 @@ The Hadoop Index Task is used to index larger data sets that require the paralle
|
||||||
|
|
||||||
The Hadoop Index Config submitted as part of an Hadoop Index Task is identical to the Hadoop Index Config used by the `HadoopBatchIndexer` except that three fields must be omitted: `segmentOutputPath`, `workingPath`, `updaterJobSpec`. The Indexing Service takes care of setting these fields internally.
|
The Hadoop Index Config submitted as part of an Hadoop Index Task is identical to the Hadoop Index Config used by the `HadoopBatchIndexer` except that three fields must be omitted: `segmentOutputPath`, `workingPath`, `updaterJobSpec`. The Indexing Service takes care of setting these fields internally.
|
||||||
|
|
||||||
|
##### Using your own Hadoop distribution
|
||||||
|
|
||||||
|
Druid is compiled against Apache hadoop-core 1.0.3. However, if you happen to use a different flavor of hadoop that is API compatible with hadoop-core 1.0.3, you should only have to change the hadoopCoordinates property to point to the maven artifact used by your distribution.
|
||||||
|
|
||||||
|
##### Resolving dependency conflicts running HadoopIndexTask
|
||||||
|
|
||||||
|
Currently, the HadoopIndexTask creates a single classpath to run the HadoopDruidIndexerJob, which can lead to version conflicts between various dependencies of Druid, extension modules, and Hadoop's own dependencies.
|
||||||
|
|
||||||
|
The Hadoop index task will put Druid's dependencies first on the classpath, followed by any extensions dependencies, and any Hadoop dependencies last.
|
||||||
|
|
||||||
|
If you are having trouble with any extensions in HadoopIndexTask, it may be the case that Druid, or one of its dependencies, depends on a different version of a library than what you are using as part of your extensions, but Druid's version overrides the one in your extension. In that case you probably want to build your own Druid version and override the offending library by adding an explicit dependency to the pom.xml of each druid sub-module that depends on it.
|
||||||
|
|
||||||
#### Realtime Index Task
|
#### Realtime Index Task
|
||||||
|
|
||||||
The indexing service can also run real-time tasks. These tasks effectively transform a middle manager into a real-time node. We introduced real-time tasks as a way to programmatically add new real-time data sources without needing to manually add nodes. The grammar for the real-time task is as follows:
|
The indexing service can also run real-time tasks. These tasks effectively transform a middle manager into a real-time node. We introduced real-time tasks as a way to programmatically add new real-time data sources without needing to manually add nodes. The grammar for the real-time task is as follows:
|
||||||
|
|
|
@ -49,7 +49,7 @@ There are two ways to setup Druid: download a tarball, or [Build From Source](Bu
|
||||||
|
|
||||||
### Download a Tarball
|
### Download a Tarball
|
||||||
|
|
||||||
We've built a tarball that contains everything you'll need. You'll find it [here](http://static.druid.io/artifacts/releases/druid-services-0.6.24-bin.tar.gz). Download this file to a directory of your choosing.
|
We've built a tarball that contains everything you'll need. You'll find it [here](http://static.druid.io/artifacts/releases/druid-services-0.6.26-bin.tar.gz). Download this file to a directory of your choosing.
|
||||||
|
|
||||||
You can extract the awesomeness within by issuing:
|
You can extract the awesomeness within by issuing:
|
||||||
|
|
||||||
|
@ -60,7 +60,7 @@ tar -zxvf druid-services-*-bin.tar.gz
|
||||||
Not too lost so far right? That's great! If you cd into the directory:
|
Not too lost so far right? That's great! If you cd into the directory:
|
||||||
|
|
||||||
```
|
```
|
||||||
cd druid-services-0.6.24
|
cd druid-services-0.6.26
|
||||||
```
|
```
|
||||||
|
|
||||||
You should see a bunch of files:
|
You should see a bunch of files:
|
||||||
|
|
|
@ -44,7 +44,7 @@ With real-world data, we recommend having a message bus such as [Apache Kafka](h
|
||||||
|
|
||||||
#### Setting up Kafka
|
#### Setting up Kafka
|
||||||
|
|
||||||
[KafkaFirehoseFactory](https://github.com/metamx/druid/blob/druid-0.6.24/realtime/src/main/java/com/metamx/druid/realtime/firehose/KafkaFirehoseFactory.java) is how druid communicates with Kafka. Using this [Firehose](Firehose.html) with the right configuration, we can import data into Druid in real-time without writing any code. To load data to a real-time node via Kafka, we'll first need to initialize Zookeeper and Kafka, and then configure and initialize a [Realtime](Realtime.html) node.
|
[KafkaFirehoseFactory](https://github.com/metamx/druid/blob/druid-0.6.26/realtime/src/main/java/com/metamx/druid/realtime/firehose/KafkaFirehoseFactory.java) is how druid communicates with Kafka. Using this [Firehose](Firehose.html) with the right configuration, we can import data into Druid in real-time without writing any code. To load data to a real-time node via Kafka, we'll first need to initialize Zookeeper and Kafka, and then configure and initialize a [Realtime](Realtime.html) node.
|
||||||
|
|
||||||
Instructions for booting a Zookeeper and then Kafka cluster are available [here](http://kafka.apache.org/07/quickstart.html).
|
Instructions for booting a Zookeeper and then Kafka cluster are available [here](http://kafka.apache.org/07/quickstart.html).
|
||||||
|
|
||||||
|
|
|
@ -13,7 +13,7 @@ In this tutorial, we will set up other types of Druid nodes as well as and exter
|
||||||
|
|
||||||
If you followed the first tutorial, you should already have Druid downloaded. If not, let's go back and do that first.
|
If you followed the first tutorial, you should already have Druid downloaded. If not, let's go back and do that first.
|
||||||
|
|
||||||
You can download the latest version of druid [here](http://static.druid.io/artifacts/releases/druid-services-0.6.24-bin.tar.gz)
|
You can download the latest version of druid [here](http://static.druid.io/artifacts/releases/druid-services-0.6.26-bin.tar.gz)
|
||||||
|
|
||||||
and untar the contents within by issuing:
|
and untar the contents within by issuing:
|
||||||
|
|
||||||
|
@ -149,7 +149,7 @@ druid.port=8081
|
||||||
|
|
||||||
druid.zk.service.host=localhost
|
druid.zk.service.host=localhost
|
||||||
|
|
||||||
druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.24"]
|
druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.26"]
|
||||||
|
|
||||||
# Dummy read only AWS account (used to download example data)
|
# Dummy read only AWS account (used to download example data)
|
||||||
druid.s3.secretKey=QyyfVZ7llSiRg6Qcrql1eEUG7buFpAK6T6engr1b
|
druid.s3.secretKey=QyyfVZ7llSiRg6Qcrql1eEUG7buFpAK6T6engr1b
|
||||||
|
@ -159,7 +159,6 @@ druid.server.maxSize=100000000
|
||||||
|
|
||||||
druid.processing.buffer.sizeBytes=10000000
|
druid.processing.buffer.sizeBytes=10000000
|
||||||
|
|
||||||
druid.segmentCache.infoPath=/tmp/druid/segmentInfoCache
|
|
||||||
druid.segmentCache.locations=[{"path": "/tmp/druid/indexCache", "maxSize"\: 100000000}]
|
druid.segmentCache.locations=[{"path": "/tmp/druid/indexCache", "maxSize"\: 100000000}]
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -239,7 +238,7 @@ druid.port=8083
|
||||||
|
|
||||||
druid.zk.service.host=localhost
|
druid.zk.service.host=localhost
|
||||||
|
|
||||||
druid.extensions.coordinates=["io.druid.extensions:druid-examples:0.6.24","io.druid.extensions:druid-kafka-seven:0.6.24"]
|
druid.extensions.coordinates=["io.druid.extensions:druid-examples:0.6.26","io.druid.extensions:druid-kafka-seven:0.6.26"]
|
||||||
|
|
||||||
# Change this config to db to hand off to the rest of the Druid cluster
|
# Change this config to db to hand off to the rest of the Druid cluster
|
||||||
druid.publish.type=noop
|
druid.publish.type=noop
|
||||||
|
|
|
@ -37,7 +37,7 @@ There are two ways to setup Druid: download a tarball, or [Build From Source](Bu
|
||||||
|
|
||||||
h3. Download a Tarball
|
h3. Download a Tarball
|
||||||
|
|
||||||
We've built a tarball that contains everything you'll need. You'll find it [here](http://static.druid.io/artifacts/releases/druid-services-0.6.24-bin.tar.gz)
|
We've built a tarball that contains everything you'll need. You'll find it [here](http://static.druid.io/artifacts/releases/druid-services-0.6.26-bin.tar.gz)
|
||||||
Download this file to a directory of your choosing.
|
Download this file to a directory of your choosing.
|
||||||
You can extract the awesomeness within by issuing:
|
You can extract the awesomeness within by issuing:
|
||||||
|
|
||||||
|
@ -48,7 +48,7 @@ tar zxvf druid-services-*-bin.tar.gz
|
||||||
Not too lost so far right? That's great! If you cd into the directory:
|
Not too lost so far right? That's great! If you cd into the directory:
|
||||||
|
|
||||||
```
|
```
|
||||||
cd druid-services-0.6.24
|
cd druid-services-0.6.26
|
||||||
```
|
```
|
||||||
|
|
||||||
You should see a bunch of files:
|
You should see a bunch of files:
|
||||||
|
|
|
@ -9,7 +9,7 @@ There are two ways to setup Druid: download a tarball, or build it from source.
|
||||||
|
|
||||||
h3. Download a Tarball
|
h3. Download a Tarball
|
||||||
|
|
||||||
We've built a tarball that contains everything you'll need. You'll find it "here":http://static.druid.io/artifacts/releases/druid-services-0.6.24-bin.tar.gz.
|
We've built a tarball that contains everything you'll need. You'll find it "here":http://static.druid.io/artifacts/releases/druid-services-0.6.26-bin.tar.gz.
|
||||||
Download this bad boy to a directory of your choosing.
|
Download this bad boy to a directory of your choosing.
|
||||||
|
|
||||||
You can extract the awesomeness within by issuing:
|
You can extract the awesomeness within by issuing:
|
||||||
|
|
|
@ -36,11 +36,12 @@ h2. Querying
|
||||||
** "Aggregations":./Aggregations.html
|
** "Aggregations":./Aggregations.html
|
||||||
** "Post Aggregations":./Post-aggregations.html
|
** "Post Aggregations":./Post-aggregations.html
|
||||||
** "Granularities":./Granularities.html
|
** "Granularities":./Granularities.html
|
||||||
|
** "DimensionSpecs":./DimensionSpecs.html
|
||||||
* Query Types
|
* Query Types
|
||||||
** "GroupByQuery":./GroupByQuery.html
|
** "GroupByQuery":./GroupByQuery.html
|
||||||
*** "OrderBy":./OrderBy.html
|
*** "OrderBy":./OrderBy.html
|
||||||
*** "Having":./Having.html
|
*** "Having":./Having.html
|
||||||
** "SearchQuery":./Having.html
|
** "SearchQuery":./SearchQuery.html
|
||||||
*** "SearchQuerySpec":./SearchQuerySpec.html
|
*** "SearchQuerySpec":./SearchQuerySpec.html
|
||||||
** "SegmentMetadataQuery":./SegmentMetadataQuery.html
|
** "SegmentMetadataQuery":./SegmentMetadataQuery.html
|
||||||
** "TimeBoundaryQuery":./TimeBoundaryQuery.html
|
** "TimeBoundaryQuery":./TimeBoundaryQuery.html
|
||||||
|
|
|
@ -26,7 +26,11 @@
|
||||||
"routingKey": "#",
|
"routingKey": "#",
|
||||||
"durable": "true",
|
"durable": "true",
|
||||||
"exclusive": "false",
|
"exclusive": "false",
|
||||||
"autoDelete": "false"
|
"autoDelete": "false",
|
||||||
|
|
||||||
|
"maxRetries": "10",
|
||||||
|
"retryIntervalSeconds": "1",
|
||||||
|
"maxDurationSeconds": "300"
|
||||||
},
|
},
|
||||||
"parser" : {
|
"parser" : {
|
||||||
"timestampSpec" : { "column" : "utcdt", "format" : "iso" },
|
"timestampSpec" : { "column" : "utcdt", "format" : "iso" },
|
||||||
|
|
|
@ -4,7 +4,7 @@ druid.port=8081
|
||||||
|
|
||||||
druid.zk.service.host=localhost
|
druid.zk.service.host=localhost
|
||||||
|
|
||||||
druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.24"]
|
druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.26"]
|
||||||
|
|
||||||
# Dummy read only AWS account (used to download example data)
|
# Dummy read only AWS account (used to download example data)
|
||||||
druid.s3.secretKey=QyyfVZ7llSiRg6Qcrql1eEUG7buFpAK6T6engr1b
|
druid.s3.secretKey=QyyfVZ7llSiRg6Qcrql1eEUG7buFpAK6T6engr1b
|
||||||
|
@ -16,5 +16,4 @@ druid.server.maxSize=100000000
|
||||||
druid.processing.buffer.sizeBytes=10000000
|
druid.processing.buffer.sizeBytes=10000000
|
||||||
druid.processing.numThreads=1
|
druid.processing.numThreads=1
|
||||||
|
|
||||||
druid.segmentCache.infoPath=/tmp/druid/segmentInfoCache
|
|
||||||
druid.segmentCache.locations=[{"path": "/tmp/druid/indexCache", "maxSize"\: 100000000}]
|
druid.segmentCache.locations=[{"path": "/tmp/druid/indexCache", "maxSize"\: 100000000}]
|
|
@ -4,7 +4,7 @@ druid.port=8083
|
||||||
|
|
||||||
druid.zk.service.host=localhost
|
druid.zk.service.host=localhost
|
||||||
|
|
||||||
druid.extensions.coordinates=["io.druid.extensions:druid-examples:0.6.24","io.druid.extensions:druid-kafka-seven:0.6.24"]
|
druid.extensions.coordinates=["io.druid.extensions:druid-examples:0.6.26","io.druid.extensions:druid-kafka-seven:0.6.26","io.druid.extensions:druid-rabbitmq:0.6.26"]
|
||||||
|
|
||||||
# Change this config to db to hand off to the rest of the Druid cluster
|
# Change this config to db to hand off to the rest of the Druid cluster
|
||||||
druid.publish.type=noop
|
druid.publish.type=noop
|
||||||
|
|
|
@ -28,7 +28,7 @@
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>io.druid</groupId>
|
<groupId>io.druid</groupId>
|
||||||
<artifactId>druid</artifactId>
|
<artifactId>druid</artifactId>
|
||||||
<version>0.6.25-SNAPSHOT</version>
|
<version>0.6.27-SNAPSHOT</version>
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
|
|
@ -28,7 +28,7 @@
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>io.druid</groupId>
|
<groupId>io.druid</groupId>
|
||||||
<artifactId>druid</artifactId>
|
<artifactId>druid</artifactId>
|
||||||
<version>0.6.25-SNAPSHOT</version>
|
<version>0.6.27-SNAPSHOT</version>
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
|
|
@ -28,7 +28,7 @@
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>io.druid</groupId>
|
<groupId>io.druid</groupId>
|
||||||
<artifactId>druid</artifactId>
|
<artifactId>druid</artifactId>
|
||||||
<version>0.6.25-SNAPSHOT</version>
|
<version>0.6.27-SNAPSHOT</version>
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
|
|
@ -28,7 +28,7 @@
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>io.druid</groupId>
|
<groupId>io.druid</groupId>
|
||||||
<artifactId>druid</artifactId>
|
<artifactId>druid</artifactId>
|
||||||
<version>0.6.25-SNAPSHOT</version>
|
<version>0.6.27-SNAPSHOT</version>
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
|
|
@ -28,7 +28,7 @@
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>io.druid</groupId>
|
<groupId>io.druid</groupId>
|
||||||
<artifactId>druid</artifactId>
|
<artifactId>druid</artifactId>
|
||||||
<version>0.6.25-SNAPSHOT</version>
|
<version>0.6.27-SNAPSHOT</version>
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
|
|
@ -28,7 +28,7 @@
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>io.druid</groupId>
|
<groupId>io.druid</groupId>
|
||||||
<artifactId>druid</artifactId>
|
<artifactId>druid</artifactId>
|
||||||
<version>0.6.25-SNAPSHOT</version>
|
<version>0.6.27-SNAPSHOT</version>
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
|
8
pom.xml
|
@ -23,7 +23,7 @@
|
||||||
<groupId>io.druid</groupId>
|
<groupId>io.druid</groupId>
|
||||||
<artifactId>druid</artifactId>
|
<artifactId>druid</artifactId>
|
||||||
<packaging>pom</packaging>
|
<packaging>pom</packaging>
|
||||||
<version>0.6.25-SNAPSHOT</version>
|
<version>0.6.27-SNAPSHOT</version>
|
||||||
<name>druid</name>
|
<name>druid</name>
|
||||||
<description>druid</description>
|
<description>druid</description>
|
||||||
<scm>
|
<scm>
|
||||||
|
@ -57,6 +57,7 @@
|
||||||
<module>s3-extensions</module>
|
<module>s3-extensions</module>
|
||||||
<module>kafka-seven</module>
|
<module>kafka-seven</module>
|
||||||
<module>kafka-eight</module>
|
<module>kafka-eight</module>
|
||||||
|
<module>rabbitmq</module>
|
||||||
</modules>
|
</modules>
|
||||||
|
|
||||||
<dependencyManagement>
|
<dependencyManagement>
|
||||||
|
@ -368,11 +369,6 @@
|
||||||
<artifactId>aether-api</artifactId>
|
<artifactId>aether-api</artifactId>
|
||||||
<version>0.9.0.M2</version>
|
<version>0.9.0.M2</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
|
||||||
<groupId>com.rabbitmq</groupId>
|
|
||||||
<artifactId>amqp-client</artifactId>
|
|
||||||
<version>3.1.1</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>net.java.dev.jets3t</groupId>
|
<groupId>net.java.dev.jets3t</groupId>
|
||||||
<artifactId>jets3t</artifactId>
|
<artifactId>jets3t</artifactId>
|
||||||
|
|
|
@ -28,7 +28,7 @@
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>io.druid</groupId>
|
<groupId>io.druid</groupId>
|
||||||
<artifactId>druid</artifactId>
|
<artifactId>druid</artifactId>
|
||||||
<version>0.6.25-SNAPSHOT</version>
|
<version>0.6.27-SNAPSHOT</version>
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
|
|
@ -57,7 +57,7 @@ public class JavaScriptPostAggregator implements PostAggregator
|
||||||
|
|
||||||
final ScriptableObject scope = context.initStandardObjects();
|
final ScriptableObject scope = context.initStandardObjects();
|
||||||
|
|
||||||
final org.mozilla.javascript.Function fn = context.compileFunction(scope, function, "aggregate", 1, null);
|
final org.mozilla.javascript.Function fn = context.compileFunction(scope, function, "fn", 1, null);
|
||||||
Context.exit();
|
Context.exit();
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -29,7 +29,8 @@ import com.fasterxml.jackson.annotation.JsonTypeInfo;
|
||||||
@JsonSubTypes.Type(name = "time", value = TimeDimExtractionFn.class),
|
@JsonSubTypes.Type(name = "time", value = TimeDimExtractionFn.class),
|
||||||
@JsonSubTypes.Type(name = "regex", value = RegexDimExtractionFn.class),
|
@JsonSubTypes.Type(name = "regex", value = RegexDimExtractionFn.class),
|
||||||
@JsonSubTypes.Type(name = "partial", value = PartialDimExtractionFn.class),
|
@JsonSubTypes.Type(name = "partial", value = PartialDimExtractionFn.class),
|
||||||
@JsonSubTypes.Type(name = "searchQuery", value = SearchQuerySpecDimExtractionFn.class)
|
@JsonSubTypes.Type(name = "searchQuery", value = SearchQuerySpecDimExtractionFn.class),
|
||||||
|
@JsonSubTypes.Type(name = "javascript", value = JavascriptDimExtractionFn.class)
|
||||||
})
|
})
|
||||||
public interface DimExtractionFn
|
public interface DimExtractionFn
|
||||||
{
|
{
|
||||||
|
|
|
@ -0,0 +1,94 @@
|
||||||
|
/*
|
||||||
|
* Druid - a distributed column store.
|
||||||
|
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public License
|
||||||
|
* as published by the Free Software Foundation; either version 2
|
||||||
|
* of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package io.druid.query.extraction;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||||
|
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||||
|
import com.google.common.base.Function;
|
||||||
|
import org.mozilla.javascript.Context;
|
||||||
|
import org.mozilla.javascript.ContextFactory;
|
||||||
|
import org.mozilla.javascript.ScriptableObject;
|
||||||
|
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
|
||||||
|
public class JavascriptDimExtractionFn implements DimExtractionFn
|
||||||
|
{
|
||||||
|
private static Function<String, String> compile(String function) {
|
||||||
|
final ContextFactory contextFactory = ContextFactory.getGlobal();
|
||||||
|
final Context context = contextFactory.enterContext();
|
||||||
|
context.setOptimizationLevel(9);
|
||||||
|
|
||||||
|
final ScriptableObject scope = context.initStandardObjects();
|
||||||
|
|
||||||
|
final org.mozilla.javascript.Function fn = context.compileFunction(scope, function, "fn", 1, null);
|
||||||
|
Context.exit();
|
||||||
|
|
||||||
|
|
||||||
|
return new Function<String, String>()
|
||||||
|
{
|
||||||
|
public String apply(String input)
|
||||||
|
{
|
||||||
|
// ideally we need a close() function to discard the context once it is not used anymore
|
||||||
|
Context cx = Context.getCurrentContext();
|
||||||
|
if (cx == null) {
|
||||||
|
cx = contextFactory.enterContext();
|
||||||
|
}
|
||||||
|
|
||||||
|
return Context.toString(fn.call(cx, scope, scope, new String[]{input}));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final byte CACHE_TYPE_ID = 0x4;
|
||||||
|
|
||||||
|
private final String function;
|
||||||
|
private final Function<String, String> fn;
|
||||||
|
|
||||||
|
@JsonCreator
|
||||||
|
public JavascriptDimExtractionFn(
|
||||||
|
@JsonProperty("function") String function
|
||||||
|
)
|
||||||
|
{
|
||||||
|
this.function = function;
|
||||||
|
this.fn = compile(function);
|
||||||
|
}
|
||||||
|
|
||||||
|
@JsonProperty
|
||||||
|
public String getFunction()
|
||||||
|
{
|
||||||
|
return function;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public byte[] getCacheKey()
|
||||||
|
{
|
||||||
|
byte[] bytes = function.getBytes();
|
||||||
|
return ByteBuffer.allocate(1 + bytes.length)
|
||||||
|
.put(CACHE_TYPE_ID)
|
||||||
|
.put(bytes)
|
||||||
|
.array();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String apply(String dimValue)
|
||||||
|
{
|
||||||
|
return fn.apply(dimValue);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,273 @@
|
||||||
|
/*
|
||||||
|
* Druid - a distributed column store.
|
||||||
|
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public License
|
||||||
|
* as published by the Free Software Foundation; either version 2
|
||||||
|
* of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package io.druid.query.extraction.extraction;
|
||||||
|
|
||||||
|
import com.google.common.collect.Iterators;
|
||||||
|
import io.druid.query.extraction.DimExtractionFn;
|
||||||
|
import io.druid.query.extraction.JavascriptDimExtractionFn;
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.util.Iterator;
|
||||||
|
|
||||||
|
public class JavascriptDimExtractionFnTest
|
||||||
|
{
|
||||||
|
private static final String[] testStrings = {
|
||||||
|
"Quito",
|
||||||
|
"Calgary",
|
||||||
|
"Tokyo",
|
||||||
|
"Stockholm",
|
||||||
|
"Vancouver",
|
||||||
|
"Pretoria",
|
||||||
|
"Wellington",
|
||||||
|
"Ontario"
|
||||||
|
};
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testJavascriptSubstring()
|
||||||
|
{
|
||||||
|
String function = "function(str) { return str.substring(0,3); }";
|
||||||
|
DimExtractionFn dimExtractionFn = new JavascriptDimExtractionFn(function);
|
||||||
|
|
||||||
|
for (String str : testStrings) {
|
||||||
|
String res = dimExtractionFn.apply(str);
|
||||||
|
Assert.assertEquals(str.substring(0, 3), res);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testJavascriptRegex()
|
||||||
|
{
|
||||||
|
String function = "function(str) { return str.replace(/[aeiou]/g, ''); }";
|
||||||
|
DimExtractionFn dimExtractionFn = new JavascriptDimExtractionFn(function);
|
||||||
|
|
||||||
|
Iterator it = Iterators.forArray("Qt", "Clgry", "Tky", "Stckhlm", "Vncvr", "Prtr", "Wllngtn", "Ontr");
|
||||||
|
for (String str : testStrings) {
|
||||||
|
String res = dimExtractionFn.apply(str);
|
||||||
|
Assert.assertEquals(it.next(), res);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testJavaScriptPorterStemmer()
|
||||||
|
{
|
||||||
|
// JavaScript porter stemmer adapted from
|
||||||
|
// https://github.com/kristopolous/Porter-Stemmer/blob/e990a8d456510571d1ef9ef923d2a30a94679e13/PorterStemmer1980.js
|
||||||
|
String function = "function(w) {"
|
||||||
|
+ "var step2list = {\n"
|
||||||
|
+ " \"ational\" : \"ate\",\n"
|
||||||
|
+ " \"tional\" : \"tion\",\n"
|
||||||
|
+ " \"enci\" : \"ence\",\n"
|
||||||
|
+ " \"anci\" : \"ance\",\n"
|
||||||
|
+ " \"izer\" : \"ize\",\n"
|
||||||
|
+ " \"bli\" : \"ble\",\n"
|
||||||
|
+ " \"alli\" : \"al\",\n"
|
||||||
|
+ " \"entli\" : \"ent\",\n"
|
||||||
|
+ " \"eli\" : \"e\",\n"
|
||||||
|
+ " \"ousli\" : \"ous\",\n"
|
||||||
|
+ " \"ization\" : \"ize\",\n"
|
||||||
|
+ " \"ation\" : \"ate\",\n"
|
||||||
|
+ " \"ator\" : \"ate\",\n"
|
||||||
|
+ " \"alism\" : \"al\",\n"
|
||||||
|
+ " \"iveness\" : \"ive\",\n"
|
||||||
|
+ " \"fulness\" : \"ful\",\n"
|
||||||
|
+ " \"ousness\" : \"ous\",\n"
|
||||||
|
+ " \"aliti\" : \"al\",\n"
|
||||||
|
+ " \"iviti\" : \"ive\",\n"
|
||||||
|
+ " \"biliti\" : \"ble\",\n"
|
||||||
|
+ " \"logi\" : \"log\"\n"
|
||||||
|
+ " },\n"
|
||||||
|
+ "\n"
|
||||||
|
+ " step3list = {\n"
|
||||||
|
+ " \"icate\" : \"ic\",\n"
|
||||||
|
+ " \"ative\" : \"\",\n"
|
||||||
|
+ " \"alize\" : \"al\",\n"
|
||||||
|
+ " \"iciti\" : \"ic\",\n"
|
||||||
|
+ " \"ical\" : \"ic\",\n"
|
||||||
|
+ " \"ful\" : \"\",\n"
|
||||||
|
+ " \"ness\" : \"\"\n"
|
||||||
|
+ " },\n"
|
||||||
|
+ "\n"
|
||||||
|
+ " c = \"[^aeiou]\", // consonant\n"
|
||||||
|
+ " v = \"[aeiouy]\", // vowel\n"
|
||||||
|
+ " C = c + \"[^aeiouy]*\", // consonant sequence\n"
|
||||||
|
+ " V = v + \"[aeiou]*\", // vowel sequence\n"
|
||||||
|
+ "\n"
|
||||||
|
+ " mgr0 = \"^(\" + C + \")?\" + V + C, // [C]VC... is m>0\n"
|
||||||
|
+ " meq1 = \"^(\" + C + \")?\" + V + C + \"(\" + V + \")?$\", // [C]VC[V] is m=1\n"
|
||||||
|
+ " mgr1 = \"^(\" + C + \")?\" + V + C + V + C, // [C]VCVC... is m>1\n"
|
||||||
|
+ " s_v = \"^(\" + C + \")?\" + v; "
|
||||||
|
+ ""
|
||||||
|
+ "var\n"
|
||||||
|
+ " stem,\n"
|
||||||
|
+ " suffix,\n"
|
||||||
|
+ " firstch,\n"
|
||||||
|
+ " re,\n"
|
||||||
|
+ " re2,\n"
|
||||||
|
+ " re3,\n"
|
||||||
|
+ " re4,\n"
|
||||||
|
+ " debugFunction,\n"
|
||||||
|
+ " origword = w;\n"
|
||||||
|
+ "\n"
|
||||||
|
+ "\n"
|
||||||
|
+ " if (w.length < 3) { return w; }\n"
|
||||||
|
+ "\n"
|
||||||
|
+ " firstch = w.substr(0,1);\n"
|
||||||
|
+ " if (firstch == \"y\") {\n"
|
||||||
|
+ " w = firstch.toUpperCase() + w.substr(1);\n"
|
||||||
|
+ " }\n"
|
||||||
|
+ "\n"
|
||||||
|
+ " // Step 1a\n"
|
||||||
|
+ " re = /^(.+?)(ss|i)es$/;\n"
|
||||||
|
+ " re2 = /^(.+?)([^s])s$/;\n"
|
||||||
|
+ "\n"
|
||||||
|
+ " if (re.test(w)) { \n"
|
||||||
|
+ " w = w.replace(re,\"$1$2\"); \n"
|
||||||
|
+ "\n"
|
||||||
|
+ " } else if (re2.test(w)) {\n"
|
||||||
|
+ " w = w.replace(re2,\"$1$2\"); \n"
|
||||||
|
+ " }\n"
|
||||||
|
+ "\n"
|
||||||
|
+ " // Step 1b\n"
|
||||||
|
+ " re = /^(.+?)eed$/;\n"
|
||||||
|
+ " re2 = /^(.+?)(ed|ing)$/;\n"
|
||||||
|
+ " if (re.test(w)) {\n"
|
||||||
|
+ " var fp = re.exec(w);\n"
|
||||||
|
+ " re = new RegExp(mgr0);\n"
|
||||||
|
+ " if (re.test(fp[1])) {\n"
|
||||||
|
+ " re = /.$/;\n"
|
||||||
|
+ " w = w.replace(re,\"\");\n"
|
||||||
|
+ " }\n"
|
||||||
|
+ " } else if (re2.test(w)) {\n"
|
||||||
|
+ " var fp = re2.exec(w);\n"
|
||||||
|
+ " stem = fp[1];\n"
|
||||||
|
+ " re2 = new RegExp(s_v);\n"
|
||||||
|
+ " if (re2.test(stem)) {\n"
|
||||||
|
+ " w = stem;\n"
|
||||||
|
+ "\n"
|
||||||
|
+ " re2 = /(at|bl|iz)$/;\n"
|
||||||
|
+ " re3 = new RegExp(\"([^aeiouylsz])\\\\1$\");\n"
|
||||||
|
+ " re4 = new RegExp(\"^\" + C + v + \"[^aeiouwxy]$\");\n"
|
||||||
|
+ "\n"
|
||||||
|
+ " if (re2.test(w)) { \n"
|
||||||
|
+ " w = w + \"e\"; \n"
|
||||||
|
+ "\n"
|
||||||
|
+ " } else if (re3.test(w)) { \n"
|
||||||
|
+ " re = /.$/; \n"
|
||||||
|
+ " w = w.replace(re,\"\"); \n"
|
||||||
|
+ "\n"
|
||||||
|
+ " } else if (re4.test(w)) { \n"
|
||||||
|
+ " w = w + \"e\"; \n"
|
||||||
|
+ " }\n"
|
||||||
|
+ " }\n"
|
||||||
|
+ " }\n"
|
||||||
|
+ "\n"
|
||||||
|
+ " // Step 1c\n"
|
||||||
|
+ " re = new RegExp(\"^(.*\" + v + \".*)y$\");\n"
|
||||||
|
+ " if (re.test(w)) {\n"
|
||||||
|
+ " var fp = re.exec(w);\n"
|
||||||
|
+ " stem = fp[1];\n"
|
||||||
|
+ " w = stem + \"i\";\n"
|
||||||
|
+ " }\n"
|
||||||
|
+ "\n"
|
||||||
|
+ " // Step 2\n"
|
||||||
|
+ " re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/;\n"
|
||||||
|
+ " if (re.test(w)) {\n"
|
||||||
|
+ " var fp = re.exec(w);\n"
|
||||||
|
+ " stem = fp[1];\n"
|
||||||
|
+ " suffix = fp[2];\n"
|
||||||
|
+ " re = new RegExp(mgr0);\n"
|
||||||
|
+ " if (re.test(stem)) {\n"
|
||||||
|
+ " w = stem + step2list[suffix];\n"
|
||||||
|
+ " }\n"
|
||||||
|
+ " }\n"
|
||||||
|
+ "\n"
|
||||||
|
+ " // Step 3\n"
|
||||||
|
+ " re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/;\n"
|
||||||
|
+ " if (re.test(w)) {\n"
|
||||||
|
+ " var fp = re.exec(w);\n"
|
||||||
|
+ " stem = fp[1];\n"
|
||||||
|
+ " suffix = fp[2];\n"
|
||||||
|
+ " re = new RegExp(mgr0);\n"
|
||||||
|
+ " if (re.test(stem)) {\n"
|
||||||
|
+ " w = stem + step3list[suffix];\n"
|
||||||
|
+ " }\n"
|
||||||
|
+ " }\n"
|
||||||
|
+ "\n"
|
||||||
|
+ " // Step 4\n"
|
||||||
|
+ " re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/;\n"
|
||||||
|
+ " re2 = /^(.+?)(s|t)(ion)$/;\n"
|
||||||
|
+ " if (re.test(w)) {\n"
|
||||||
|
+ " var fp = re.exec(w);\n"
|
||||||
|
+ " stem = fp[1];\n"
|
||||||
|
+ " re = new RegExp(mgr1);\n"
|
||||||
|
+ " if (re.test(stem)) {\n"
|
||||||
|
+ " w = stem;\n"
|
||||||
|
+ " }\n"
|
||||||
|
+ " } else if (re2.test(w)) {\n"
|
||||||
|
+ " var fp = re2.exec(w);\n"
|
||||||
|
+ " stem = fp[1] + fp[2];\n"
|
||||||
|
+ " re2 = new RegExp(mgr1);\n"
|
||||||
|
+ " if (re2.test(stem)) {\n"
|
||||||
|
+ " w = stem;\n"
|
||||||
|
+ " }\n"
|
||||||
|
+ " }\n"
|
||||||
|
+ "\n"
|
||||||
|
+ " // Step 5\n"
|
||||||
|
+ " re = /^(.+?)e$/;\n"
|
||||||
|
+ " if (re.test(w)) {\n"
|
||||||
|
+ " var fp = re.exec(w);\n"
|
||||||
|
+ " stem = fp[1];\n"
|
||||||
|
+ " re = new RegExp(mgr1);\n"
|
||||||
|
+ " re2 = new RegExp(meq1);\n"
|
||||||
|
+ " re3 = new RegExp(\"^\" + C + v + \"[^aeiouwxy]$\");\n"
|
||||||
|
+ " if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) {\n"
|
||||||
|
+ " w = stem;\n"
|
||||||
|
+ " }\n"
|
||||||
|
+ " }\n"
|
||||||
|
+ "\n"
|
||||||
|
+ " re = /ll$/;\n"
|
||||||
|
+ " re2 = new RegExp(mgr1);\n"
|
||||||
|
+ " if (re.test(w) && re2.test(w)) {\n"
|
||||||
|
+ " re = /.$/;\n"
|
||||||
|
+ " w = w.replace(re,\"\");\n"
|
||||||
|
+ " }\n"
|
||||||
|
+ "\n"
|
||||||
|
+ " // and turn initial Y back to y\n"
|
||||||
|
+ " if (firstch == \"y\") {\n"
|
||||||
|
+ " w = firstch.toLowerCase() + w.substr(1);\n"
|
||||||
|
+ " }\n"
|
||||||
|
+ "\n"
|
||||||
|
+ "\n"
|
||||||
|
+ " return w;"
|
||||||
|
+ ""
|
||||||
|
+ "}";
|
||||||
|
|
||||||
|
DimExtractionFn dimExtractionFn = new JavascriptDimExtractionFn(function);
|
||||||
|
|
||||||
|
Iterator<String> inputs = Iterators.forArray("introducing", "exploratory", "analytics", "on", "large", "datasets");
|
||||||
|
Iterator<String> it = Iterators.forArray("introduc", "exploratori", "analyt", "on", "larg", "dataset");
|
||||||
|
|
||||||
|
while(inputs.hasNext()) {
|
||||||
|
String res = dimExtractionFn.apply(inputs.next());
|
||||||
|
Assert.assertEquals(it.next(), res);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,4 @@
|
||||||
|
Download [MacTeX](http://tug.org/mactex/)
|
||||||
|
```bash
|
||||||
|
make
|
||||||
|
```
|
|
@ -1,4 +1,4 @@
|
||||||
@article{cattell2011scalable,
|
@article{cattell2011scalable,
|
||||||
title={Scalable SQL and NoSQL data stores},
|
title={Scalable SQL and NoSQL data stores},
|
||||||
author={Cattell, Rick},
|
author={Cattell, Rick},
|
||||||
journal={ACM SIGMOD Record},
|
journal={ACM SIGMOD Record},
|
||||||
|
@ -7,9 +7,9 @@
|
||||||
pages={12--27},
|
pages={12--27},
|
||||||
year={2011},
|
year={2011},
|
||||||
publisher={ACM}
|
publisher={ACM}
|
||||||
}
|
}
|
||||||
|
|
||||||
@article{chang2008bigtable,
|
@article{chang2008bigtable,
|
||||||
title={Bigtable: A distributed storage system for structured data},
|
title={Bigtable: A distributed storage system for structured data},
|
||||||
author={Chang, Fay and Dean, Jeffrey and Ghemawat, Sanjay and Hsieh, Wilson C and Wallach, Deborah A and Burrows, Mike and Chandra, Tushar and Fikes, Andrew and Gruber, Robert E},
|
author={Chang, Fay and Dean, Jeffrey and Ghemawat, Sanjay and Hsieh, Wilson C and Wallach, Deborah A and Burrows, Mike and Chandra, Tushar and Fikes, Andrew and Gruber, Robert E},
|
||||||
journal={ACM Transactions on Computer Systems (TOCS)},
|
journal={ACM Transactions on Computer Systems (TOCS)},
|
||||||
|
@ -18,9 +18,9 @@
|
||||||
pages={4},
|
pages={4},
|
||||||
year={2008},
|
year={2008},
|
||||||
publisher={ACM}
|
publisher={ACM}
|
||||||
}
|
}
|
||||||
|
|
||||||
@inproceedings{decandia2007dynamo,
|
@inproceedings{decandia2007dynamo,
|
||||||
title={Dynamo: amazon's highly available key-value store},
|
title={Dynamo: amazon's highly available key-value store},
|
||||||
author={DeCandia, Giuseppe and Hastorun, Deniz and Jampani, Madan and Kakulapati, Gunavardhan and Lakshman, Avinash and Pilchin, Alex and Sivasubramanian, Swaminathan and Vosshall, Peter and Vogels, Werner},
|
author={DeCandia, Giuseppe and Hastorun, Deniz and Jampani, Madan and Kakulapati, Gunavardhan and Lakshman, Avinash and Pilchin, Alex and Sivasubramanian, Swaminathan and Vosshall, Peter and Vogels, Werner},
|
||||||
booktitle={ACM SIGOPS Operating Systems Review},
|
booktitle={ACM SIGOPS Operating Systems Review},
|
||||||
|
@ -29,18 +29,18 @@
|
||||||
pages={205--220},
|
pages={205--220},
|
||||||
year={2007},
|
year={2007},
|
||||||
organization={ACM}
|
organization={ACM}
|
||||||
}
|
}
|
||||||
|
|
||||||
@inproceedings{bear2012vertica,
|
@inproceedings{bear2012vertica,
|
||||||
title={The vertica database: SQL RDBMS for managing big data},
|
title={The vertica database: SQL RDBMS for managing big data},
|
||||||
author={Bear, Chuck and Lamb, Andrew and Tran, Nga},
|
author={Bear, Chuck and Lamb, Andrew and Tran, Nga},
|
||||||
booktitle={Proceedings of the 2012 workshop on Management of big data systems},
|
booktitle={Proceedings of the 2012 workshop on Management of big data systems},
|
||||||
pages={37--38},
|
pages={37--38},
|
||||||
year={2012},
|
year={2012},
|
||||||
organization={ACM}
|
organization={ACM}
|
||||||
}
|
}
|
||||||
|
|
||||||
@article{lakshman2010cassandra,
|
@article{lakshman2010cassandra,
|
||||||
title={Cassandra—A decentralized structured storage system},
|
title={Cassandra—A decentralized structured storage system},
|
||||||
author={Lakshman, Avinash and Malik, Prashant},
|
author={Lakshman, Avinash and Malik, Prashant},
|
||||||
journal={Operating systems review},
|
journal={Operating systems review},
|
||||||
|
@ -48,9 +48,9 @@
|
||||||
number={2},
|
number={2},
|
||||||
pages={35},
|
pages={35},
|
||||||
year={2010}
|
year={2010}
|
||||||
}
|
}
|
||||||
|
|
||||||
@article{melnik2010dremel,
|
@article{melnik2010dremel,
|
||||||
title={Dremel: interactive analysis of web-scale datasets},
|
title={Dremel: interactive analysis of web-scale datasets},
|
||||||
author={Melnik, Sergey and Gubarev, Andrey and Long, Jing Jing and Romer, Geoffrey and Shivakumar, Shiva and Tolton, Matt and Vassilakis, Theo},
|
author={Melnik, Sergey and Gubarev, Andrey and Long, Jing Jing and Romer, Geoffrey and Shivakumar, Shiva and Tolton, Matt and Vassilakis, Theo},
|
||||||
journal={Proceedings of the VLDB Endowment},
|
journal={Proceedings of the VLDB Endowment},
|
||||||
|
@ -59,9 +59,9 @@
|
||||||
pages={330--339},
|
pages={330--339},
|
||||||
year={2010},
|
year={2010},
|
||||||
publisher={VLDB Endowment}
|
publisher={VLDB Endowment}
|
||||||
}
|
}
|
||||||
|
|
||||||
@article{hall2012processing,
|
@article{hall2012processing,
|
||||||
title={Processing a trillion cells per mouse click},
|
title={Processing a trillion cells per mouse click},
|
||||||
author={Hall, Alexander and Bachmann, Olaf and B{\"u}ssow, Robert and G{\u{a}}nceanu, Silviu and Nunkesser, Marc},
|
author={Hall, Alexander and Bachmann, Olaf and B{\"u}ssow, Robert and G{\u{a}}nceanu, Silviu and Nunkesser, Marc},
|
||||||
journal={Proceedings of the VLDB Endowment},
|
journal={Proceedings of the VLDB Endowment},
|
||||||
|
@ -70,18 +70,18 @@
|
||||||
pages={1436--1446},
|
pages={1436--1446},
|
||||||
year={2012},
|
year={2012},
|
||||||
publisher={VLDB Endowment}
|
publisher={VLDB Endowment}
|
||||||
}
|
}
|
||||||
|
|
||||||
@inproceedings{shvachko2010hadoop,
|
@inproceedings{shvachko2010hadoop,
|
||||||
title={The hadoop distributed file system},
|
title={The hadoop distributed file system},
|
||||||
author={Shvachko, Konstantin and Kuang, Hairong and Radia, Sanjay and Chansler, Robert},
|
author={Shvachko, Konstantin and Kuang, Hairong and Radia, Sanjay and Chansler, Robert},
|
||||||
booktitle={Mass Storage Systems and Technologies (MSST), 2010 IEEE 26th Symposium on},
|
booktitle={Mass Storage Systems and Technologies (MSST), 2010 IEEE 26th Symposium on},
|
||||||
pages={1--10},
|
pages={1--10},
|
||||||
year={2010},
|
year={2010},
|
||||||
organization={IEEE}
|
organization={IEEE}
|
||||||
}
|
}
|
||||||
|
|
||||||
@article{colantonio2010concise,
|
@article{colantonio2010concise,
|
||||||
title={Concise: Compressed ‘n’Composable Integer Set},
|
title={Concise: Compressed ‘n’Composable Integer Set},
|
||||||
author={Colantonio, Alessandro and Di Pietro, Roberto},
|
author={Colantonio, Alessandro and Di Pietro, Roberto},
|
||||||
journal={Information Processing Letters},
|
journal={Information Processing Letters},
|
||||||
|
@ -90,54 +90,52 @@
|
||||||
pages={644--650},
|
pages={644--650},
|
||||||
year={2010},
|
year={2010},
|
||||||
publisher={Elsevier}
|
publisher={Elsevier}
|
||||||
}
|
}
|
||||||
|
|
||||||
@article{lerner2010redis,
|
@inproceedings{stonebraker2005c,
|
||||||
title={At the Forge: Redis},
|
|
||||||
author={Lerner, Richard},
|
|
||||||
journal={Linux Journal},
|
|
||||||
volume={2010},
|
|
||||||
number={197},
|
|
||||||
pages={3},
|
|
||||||
year={2010}
|
|
||||||
}
|
|
||||||
|
|
||||||
@inproceedings{stonebraker2005c,
|
|
||||||
title={C-store: a column-oriented DBMS},
|
title={C-store: a column-oriented DBMS},
|
||||||
author={Stonebraker, Mike and Abadi, Daniel J and Batkin, Adam and Chen, Xuedong and Cherniack, Mitch and Ferreira, Miguel and Lau, Edmond and Lin, Amerson and Madden, Sam and O'Neil, Elizabeth and others},
|
author={Stonebraker, Mike and Abadi, Daniel J and Batkin, Adam and Chen, Xuedong and Cherniack, Mitch and Ferreira, Miguel and Lau, Edmond and Lin, Amerson and Madden, Sam and O'Neil, Elizabeth and others},
|
||||||
booktitle={Proceedings of the 31st international conference on Very large data bases},
|
booktitle={Proceedings of the 31st international conference on Very large data bases},
|
||||||
pages={553--564},
|
pages={553--564},
|
||||||
year={2005},
|
year={2005},
|
||||||
organization={VLDB Endowment}
|
organization={VLDB Endowment}
|
||||||
}
|
}
|
||||||
|
|
||||||
@inproceedings{engle2012shark,
|
@inproceedings{engle2012shark,
|
||||||
title={Shark: fast data analysis using coarse-grained distributed memory},
|
title={Shark: fast data analysis using coarse-grained distributed memory},
|
||||||
author={Engle, Cliff and Lupher, Antonio and Xin, Reynold and Zaharia, Matei and Franklin, Michael J and Shenker, Scott and Stoica, Ion},
|
author={Engle, Cliff and Lupher, Antonio and Xin, Reynold and Zaharia, Matei and Franklin, Michael J and Shenker, Scott and Stoica, Ion},
|
||||||
booktitle={Proceedings of the 2012 international conference on Management of Data},
|
booktitle={Proceedings of the 2012 international conference on Management of Data},
|
||||||
pages={689--692},
|
pages={689--692},
|
||||||
year={2012},
|
year={2012},
|
||||||
organization={ACM}
|
organization={ACM}
|
||||||
}
|
}
|
||||||
|
|
||||||
@inproceedings{zaharia2012discretized,
|
@inproceedings{zaharia2012discretized,
|
||||||
title={Discretized streams: an efficient and fault-tolerant model for stream processing on large clusters},
|
title={Discretized streams: an efficient and fault-tolerant model for stream processing on large clusters},
|
||||||
author={Zaharia, Matei and Das, Tathagata and Li, Haoyuan and Shenker, Scott and Stoica, Ion},
|
author={Zaharia, Matei and Das, Tathagata and Li, Haoyuan and Shenker, Scott and Stoica, Ion},
|
||||||
booktitle={Proceedings of the 4th USENIX conference on Hot Topics in Cloud Computing},
|
booktitle={Proceedings of the 4th USENIX conference on Hot Topics in Cloud Computing},
|
||||||
pages={10--10},
|
pages={10--10},
|
||||||
year={2012},
|
year={2012},
|
||||||
organization={USENIX Association}
|
organization={USENIX Association}
|
||||||
}
|
}
|
||||||
|
|
||||||
@misc{marz2013storm,
|
@misc{marz2013storm,
|
||||||
author = {Marz, Nathan},
|
author = {Marz, Nathan},
|
||||||
title = {Storm: Distributed and Fault-Tolerant Realtime Computation},
|
title = {Storm: Distributed and Fault-Tolerant Realtime Computation},
|
||||||
month = {February},
|
month = {February},
|
||||||
year = {2013},
|
year = {2013},
|
||||||
howpublished = "\url{http://storm-project.net/}"
|
howpublished = "\url{http://storm-project.net/}"
|
||||||
}
|
}
|
||||||
|
|
||||||
@article{farber2012sap,
|
@misc{tschetter2011druid,
|
||||||
|
author = {Eric Tschetter},
|
||||||
|
title = {Introducing Druid: Real-Time Analytics at a Billion Rows Per Second},
|
||||||
|
month = {April},
|
||||||
|
year = {2011},
|
||||||
|
howpublished = "\url{http://metamarkets.com/2011/druid-part-i-real-time-analytics-at-a-billion-rows-per-second/}"
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{farber2012sap,
|
||||||
title={SAP HANA database: data management for modern business applications},
|
title={SAP HANA database: data management for modern business applications},
|
||||||
author={F{\"a}rber, Franz and Cha, Sang Kyun and Primsch, J{\"u}rgen and Bornh{\"o}vd, Christof and Sigg, Stefan and Lehner, Wolfgang},
|
author={F{\"a}rber, Franz and Cha, Sang Kyun and Primsch, J{\"u}rgen and Bornh{\"o}vd, Christof and Sigg, Stefan and Lehner, Wolfgang},
|
||||||
journal={ACM Sigmod Record},
|
journal={ACM Sigmod Record},
|
||||||
|
@ -146,151 +144,119 @@ howpublished = "\url{http://storm-project.net/}"
|
||||||
pages={45--51},
|
pages={45--51},
|
||||||
year={2012},
|
year={2012},
|
||||||
publisher={ACM}
|
publisher={ACM}
|
||||||
}
|
}
|
||||||
|
|
||||||
@misc{voltdb2010voltdb,
|
@misc{voltdb2010voltdb,
|
||||||
title={VoltDB Technical Overview},
|
title={VoltDB Technical Overview},
|
||||||
author={VoltDB, LLC},
|
author={VoltDB, LLC},
|
||||||
year={2010},
|
year={2010},
|
||||||
howpublished = "\url{https://voltdb.com/}"
|
howpublished = "\url{https://voltdb.com/}"
|
||||||
}
|
}
|
||||||
|
|
||||||
@inproceedings{macnicol2004sybase,
|
@inproceedings{macnicol2004sybase,
|
||||||
title={Sybase IQ multiplex-designed for analytics},
|
title={Sybase IQ multiplex-designed for analytics},
|
||||||
author={MacNicol, Roger and French, Blaine},
|
author={MacNicol, Roger and French, Blaine},
|
||||||
booktitle={Proceedings of the Thirtieth international conference on Very large data bases-Volume 30},
|
booktitle={Proceedings of the Thirtieth international conference on Very large data bases-Volume 30},
|
||||||
pages={1227--1230},
|
pages={1227--1230},
|
||||||
year={2004},
|
year={2004},
|
||||||
organization={VLDB Endowment}
|
organization={VLDB Endowment}
|
||||||
}
|
}
|
||||||
|
|
||||||
@inproceedings{singh2011introduction,
|
@inproceedings{singh2011introduction,
|
||||||
title={Introduction to the IBM Netezza warehouse appliance},
|
title={Introduction to the IBM Netezza warehouse appliance},
|
||||||
author={Singh, Malcolm and Leonhardi, Ben},
|
author={Singh, Malcolm and Leonhardi, Ben},
|
||||||
booktitle={Proceedings of the 2011 Conference of the Center for Advanced Studies on Collaborative Research},
|
booktitle={Proceedings of the 2011 Conference of the Center for Advanced Studies on Collaborative Research},
|
||||||
pages={385--386},
|
pages={385--386},
|
||||||
year={2011},
|
year={2011},
|
||||||
organization={IBM Corp.}
|
organization={IBM Corp.}
|
||||||
}
|
}
|
||||||
|
|
||||||
@inproceedings{miner2012unified,
|
@inproceedings{miner2012unified,
|
||||||
title={Unified analytics platform for big data},
|
title={Unified analytics platform for big data},
|
||||||
author={Miner, Donald},
|
author={Miner, Donald},
|
||||||
booktitle={Proceedings of the WICSA/ECSA 2012 Companion Volume},
|
booktitle={Proceedings of the WICSA/ECSA 2012 Companion Volume},
|
||||||
pages={176--176},
|
pages={176--176},
|
||||||
year={2012},
|
year={2012},
|
||||||
organization={ACM}
|
organization={ACM}
|
||||||
}
|
}
|
||||||
|
|
||||||
@inproceedings{fink2012distributed,
|
@inproceedings{fink2012distributed,
|
||||||
title={Distributed computation on dynamo-style distributed storage: riak pipe},
|
title={Distributed computation on dynamo-style distributed storage: riak pipe},
|
||||||
author={Fink, Bryan},
|
author={Fink, Bryan},
|
||||||
booktitle={Proceedings of the eleventh ACM SIGPLAN workshop on Erlang workshop},
|
booktitle={Proceedings of the eleventh ACM SIGPLAN workshop on Erlang workshop},
|
||||||
pages={43--50},
|
pages={43--50},
|
||||||
year={2012},
|
year={2012},
|
||||||
organization={ACM}
|
organization={ACM}
|
||||||
}
|
}
|
||||||
|
|
||||||
@misc{paraccel2013,
|
@misc{paraccel2013,
|
||||||
key = {ParAccel Analytic Database},
|
key = {ParAccel Analytic Database},
|
||||||
title = {ParAccel Analytic Database},
|
title = {ParAccel Analytic Database},
|
||||||
month = {March},
|
month = {March},
|
||||||
year = {2013},
|
year = {2013},
|
||||||
howpublished = "\url{http://www.paraccel.com/resources/Datasheets/ParAccel-Core-Analytic-Database.pdf}"
|
howpublished = "\url{http://www.paraccel.com/resources/Datasheets/ParAccel-Core-Analytic-Database.pdf}"
|
||||||
}
|
}
|
||||||
|
|
||||||
@article{barroso2009datacenter,
|
@misc{cloudera2013,
|
||||||
title={The datacenter as a computer: An introduction to the design of warehouse-scale machines},
|
key = {Cloudera Impala},
|
||||||
author={Barroso, Luiz Andr{\'e} and H{\"o}lzle, Urs},
|
title = {Cloudera Impala},
|
||||||
journal={Synthesis Lectures on Computer Architecture},
|
month = {March},
|
||||||
volume={4},
|
year = {2013},
|
||||||
number={1},
|
url = {},
|
||||||
pages={1--108},
|
howpublished = "\url{http://blog.cloudera.com/blog}"
|
||||||
year={2009},
|
}
|
||||||
publisher={Morgan \& Claypool Publishers}
|
|
||||||
}
|
|
||||||
|
|
||||||
@article{chaudhuri1997overview,
|
@inproceedings{hunt2010zookeeper,
|
||||||
title={An overview of data warehousing and OLAP technology},
|
|
||||||
author={Chaudhuri, Surajit and Dayal, Umeshwar},
|
|
||||||
journal={ACM Sigmod record},
|
|
||||||
volume={26},
|
|
||||||
number={1},
|
|
||||||
pages={65--74},
|
|
||||||
year={1997}
|
|
||||||
}
|
|
||||||
|
|
||||||
@article{dewitt1992parallel,
|
|
||||||
title={Parallel database systems: the future of high performance database systems},
|
|
||||||
author={DeWitt, David and Gray, Jim},
|
|
||||||
journal={Communications of the ACM},
|
|
||||||
volume={35},
|
|
||||||
number={6},
|
|
||||||
pages={85--98},
|
|
||||||
year={1992},
|
|
||||||
publisher={ACM}
|
|
||||||
}
|
|
||||||
|
|
||||||
@misc{cloudera2013,
|
|
||||||
key = {Cloudera Impala},
|
|
||||||
title = {Cloudera Impala},
|
|
||||||
month = {March},
|
|
||||||
year = {2013},
|
|
||||||
url = {},
|
|
||||||
howpublished = "\url{http://blog.cloudera.com/blog}"
|
|
||||||
}
|
|
||||||
|
|
||||||
@inproceedings{hunt2010zookeeper,
|
|
||||||
title={ZooKeeper: Wait-free coordination for Internet-scale systems},
|
title={ZooKeeper: Wait-free coordination for Internet-scale systems},
|
||||||
author={Hunt, Patrick and Konar, Mahadev and Junqueira, Flavio P and Reed, Benjamin},
|
author={Hunt, Patrick and Konar, Mahadev and Junqueira, Flavio P and Reed, Benjamin},
|
||||||
booktitle={USENIX ATC},
|
booktitle={USENIX ATC},
|
||||||
volume={10},
|
volume={10},
|
||||||
year={2010}
|
year={2010}
|
||||||
}
|
}
|
||||||
|
|
||||||
@inproceedings{kreps2011kafka,
|
@inproceedings{kreps2011kafka,
|
||||||
title={Kafka: A distributed messaging system for log processing},
|
title={Kafka: A distributed messaging system for log processing},
|
||||||
author={Kreps, Jay and Narkhede, Neha and Rao, Jun},
|
author={Kreps, Jay and Narkhede, Neha and Rao, Jun},
|
||||||
booktitle={Proceedings of 6th International Workshop on Networking Meets Databases (NetDB), Athens, Greece},
|
booktitle={Proceedings of 6th International Workshop on Networking Meets Databases (NetDB), Athens, Greece},
|
||||||
year={2011}
|
year={2011}
|
||||||
}
|
}
|
||||||
|
|
||||||
@misc{liblzf2013,
|
@misc{liblzf2013,
|
||||||
title = {LibLZF},
|
title = {LibLZF},
|
||||||
key = {LibLZF},
|
key = {LibLZF},
|
||||||
month = {March},
|
month = {March},
|
||||||
year = {2013},
|
year = {2013},
|
||||||
howpublished = "\url{http://freecode.com/projects/liblzf}"
|
howpublished = "\url{http://freecode.com/projects/liblzf}"
|
||||||
}
|
}
|
||||||
|
|
||||||
@inproceedings{tomasic1993performance,
|
@inproceedings{tomasic1993performance,
|
||||||
title={Performance of inverted indices in shared-nothing distributed text document information retrieval systems},
|
title={Performance of inverted indices in shared-nothing distributed text document information retrieval systems},
|
||||||
author={Tomasic, Anthony and Garcia-Molina, Hector},
|
author={Tomasic, Anthony and Garcia-Molina, Hector},
|
||||||
booktitle={Parallel and Distributed Information Systems, 1993., Proceedings of the Second International Conference on},
|
booktitle={Parallel and Distributed Information Systems, 1993., Proceedings of the Second International Conference on},
|
||||||
pages={8--17},
|
pages={8--17},
|
||||||
year={1993},
|
year={1993},
|
||||||
organization={IEEE}
|
organization={IEEE}
|
||||||
}
|
}
|
||||||
|
|
||||||
@inproceedings{antoshenkov1995byte,
|
@inproceedings{antoshenkov1995byte,
|
||||||
title={Byte-aligned bitmap compression},
|
title={Byte-aligned bitmap compression},
|
||||||
author={Antoshenkov, Gennady},
|
author={Antoshenkov, Gennady},
|
||||||
booktitle={Data Compression Conference, 1995. DCC'95. Proceedings},
|
booktitle={Data Compression Conference, 1995. DCC'95. Proceedings},
|
||||||
pages={476},
|
pages={476},
|
||||||
year={1995},
|
year={1995},
|
||||||
organization={IEEE}
|
organization={IEEE}
|
||||||
}
|
}
|
||||||
|
|
||||||
@inproceedings{van2011memory,
|
@inproceedings{van2011memory,
|
||||||
title={A memory efficient reachability data structure through bit vector compression},
|
title={A memory efficient reachability data structure through bit vector compression},
|
||||||
author={van Schaik, Sebastiaan J and de Moor, Oege},
|
author={van Schaik, Sebastiaan J and de Moor, Oege},
|
||||||
booktitle={Proceedings of the 2011 international conference on Management of data},
|
booktitle={Proceedings of the 2011 international conference on Management of data},
|
||||||
pages={913--924},
|
pages={913--924},
|
||||||
year={2011},
|
year={2011},
|
||||||
organization={ACM}
|
organization={ACM}
|
||||||
}
|
}
|
||||||
|
|
||||||
@inproceedings{o1993lru,
|
@inproceedings{o1993lru,
|
||||||
title={The LRU-K page replacement algorithm for database disk buffering},
|
title={The LRU-K page replacement algorithm for database disk buffering},
|
||||||
author={O'neil, Elizabeth J and O'neil, Patrick E and Weikum, Gerhard},
|
author={O'neil, Elizabeth J and O'neil, Patrick E and Weikum, Gerhard},
|
||||||
booktitle={ACM SIGMOD Record},
|
booktitle={ACM SIGMOD Record},
|
||||||
|
@ -299,18 +265,18 @@ howpublished = "\url{http://freecode.com/projects/liblzf}"
|
||||||
pages={297--306},
|
pages={297--306},
|
||||||
year={1993},
|
year={1993},
|
||||||
organization={ACM}
|
organization={ACM}
|
||||||
}
|
}
|
||||||
|
|
||||||
@article{kim2001lrfu,
|
@article{kim2001lrfu,
|
||||||
title={LRFU: A spectrum of policies that subsumes the least recently used and least frequently used policies},
|
title={LRFU: A spectrum of policies that subsumes the least recently used and least frequently used policies},
|
||||||
author={Kim, Chong Sang},
|
author={Kim, Chong Sang},
|
||||||
journal={IEEE Transactions on Computers},
|
journal={IEEE Transactions on Computers},
|
||||||
volume={50},
|
volume={50},
|
||||||
number={12},
|
number={12},
|
||||||
year={2001}
|
year={2001}
|
||||||
}
|
}
|
||||||
|
|
||||||
@article{wu2006optimizing,
|
@article{wu2006optimizing,
|
||||||
title={Optimizing bitmap indices with efficient compression},
|
title={Optimizing bitmap indices with efficient compression},
|
||||||
author={Wu, Kesheng and Otoo, Ekow J and Shoshani, Arie},
|
author={Wu, Kesheng and Otoo, Ekow J and Shoshani, Arie},
|
||||||
journal={ACM Transactions on Database Systems (TODS)},
|
journal={ACM Transactions on Database Systems (TODS)},
|
||||||
|
@ -319,30 +285,84 @@ howpublished = "\url{http://freecode.com/projects/liblzf}"
|
||||||
pages={1--38},
|
pages={1--38},
|
||||||
year={2006},
|
year={2006},
|
||||||
publisher={ACM}
|
publisher={ACM}
|
||||||
}
|
}
|
||||||
|
|
||||||
@misc{twitter2013,
|
@misc{twitter2013,
|
||||||
key = {Twitter Public Streams},
|
key = {Twitter Public Streams},
|
||||||
title = {Twitter Public Streams},
|
title = {Twitter Public Streams},
|
||||||
month = {March},
|
month = {March},
|
||||||
year = {2013},
|
year = {2013},
|
||||||
howpublished = "\url{https://dev.twitter.com/docs/streaming-apis/streams/public}"
|
howpublished = "\url{https://dev.twitter.com/docs/streaming-apis/streams/public}"
|
||||||
}
|
}
|
||||||
|
|
||||||
@article{fitzpatrick2004distributed,
|
@article{fitzpatrick2004distributed,
|
||||||
title={Distributed caching with memcached},
|
title={Distributed caching with memcached},
|
||||||
author={Fitzpatrick, Brad},
|
author={Fitzpatrick, Brad},
|
||||||
journal={Linux journal},
|
journal={Linux journal},
|
||||||
number={124},
|
number={124},
|
||||||
pages={72--74},
|
pages={72--74},
|
||||||
year={2004}
|
year={2004}
|
||||||
}
|
}
|
||||||
|
|
||||||
@inproceedings{amdahl1967validity,
|
@inproceedings{amdahl1967validity,
|
||||||
title={Validity of the single processor approach to achieving large scale computing capabilities},
|
title={Validity of the single processor approach to achieving large scale computing capabilities},
|
||||||
author={Amdahl, Gene M},
|
author={Amdahl, Gene M},
|
||||||
booktitle={Proceedings of the April 18-20, 1967, spring joint computer conference},
|
booktitle={Proceedings of the April 18-20, 1967, spring joint computer conference},
|
||||||
pages={483--485},
|
pages={483--485},
|
||||||
year={1967},
|
year={1967},
|
||||||
organization={ACM}
|
organization={ACM}
|
||||||
}
|
}
|
||||||
|
@book{sarawagi1998discovery,
|
||||||
|
title={Discovery-driven exploration of OLAP data cubes},
|
||||||
|
author={Sarawagi, Sunita and Agrawal, Rakesh and Megiddo, Nimrod},
|
||||||
|
year={1998},
|
||||||
|
publisher={Springer}
|
||||||
|
}
|
||||||
|
@inproceedings{abadi2008column,
|
||||||
|
title={Column-Stores vs. Row-Stores: How different are they really?},
|
||||||
|
author={Abadi, Daniel J and Madden, Samuel R and Hachem, Nabil},
|
||||||
|
booktitle={Proceedings of the 2008 ACM SIGMOD international conference on Management of data},
|
||||||
|
pages={967--980},
|
||||||
|
year={2008},
|
||||||
|
organization={ACM}
|
||||||
|
}
|
||||||
|
@article{hu2011stream,
|
||||||
|
title={Stream Database Survey},
|
||||||
|
author={Hu, Bo},
|
||||||
|
year={2011}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{dean2008mapreduce,
|
||||||
|
title={MapReduce: simplified data processing on large clusters},
|
||||||
|
author={Dean, Jeffrey and Ghemawat, Sanjay},
|
||||||
|
journal={Communications of the ACM},
|
||||||
|
volume={51},
|
||||||
|
number={1},
|
||||||
|
pages={107--113},
|
||||||
|
year={2008},
|
||||||
|
publisher={ACM}
|
||||||
|
}
|
||||||
|
|
||||||
|
@misc{linkedin2013senseidb,
|
||||||
|
author = {LinkedIn},
|
||||||
|
title = {SenseiDB},
|
||||||
|
month = {July},
|
||||||
|
year = {2013},
|
||||||
|
howpublished = "\url{http://www.senseidb.com/}"
|
||||||
|
}
|
||||||
|
|
||||||
|
@misc{apache2013solr,
|
||||||
|
author = {Apache},
|
||||||
|
title = {Apache Solr},
|
||||||
|
month = {February},
|
||||||
|
year = {2013},
|
||||||
|
howpublished = "\url{http://lucene.apache.org/solr/}"
|
||||||
|
}
|
||||||
|
|
||||||
|
@misc{banon2013elasticsearch,
|
||||||
|
author = {Banon, Shay},
|
||||||
|
title = {ElasticSearch},
|
||||||
|
month = {July},
|
||||||
|
year = {2013},
|
||||||
|
howpublished = "\url{http://www.elasticseach.com/}"
|
||||||
|
}
|
||||||
|
|
After Width: | Height: | Size: 35 KiB |
After Width: | Height: | Size: 79 KiB |
After Width: | Height: | Size: 52 KiB |
After Width: | Height: | Size: 28 KiB |
After Width: | Height: | Size: 77 KiB |
After Width: | Height: | Size: 33 KiB |
After Width: | Height: | Size: 35 KiB |
After Width: | Height: | Size: 36 KiB |
After Width: | Height: | Size: 73 KiB |
|
@ -0,0 +1,43 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
<groupId>io.druid.extensions</groupId>
|
||||||
|
<artifactId>druid-rabbitmq</artifactId>
|
||||||
|
<name>druid-rabbitmq</name>
|
||||||
|
<description>druid-rabbitmq</description>
|
||||||
|
|
||||||
|
<parent>
|
||||||
|
<groupId>io.druid</groupId>
|
||||||
|
<artifactId>druid</artifactId>
|
||||||
|
<version>0.6.27-SNAPSHOT</version>
|
||||||
|
</parent>
|
||||||
|
|
||||||
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>io.druid</groupId>
|
||||||
|
<artifactId>druid-api</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.rabbitmq</groupId>
|
||||||
|
<artifactId>amqp-client</artifactId>
|
||||||
|
<version>3.2.1</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>net.jodah</groupId>
|
||||||
|
<artifactId>lyra</artifactId>
|
||||||
|
<version>0.3.1</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Tests -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>junit</groupId>
|
||||||
|
<artifactId>junit</artifactId>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>commons-cli</groupId>
|
||||||
|
<artifactId>commons-cli</artifactId>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
|
</project>
|
|
@ -17,7 +17,7 @@
|
||||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package io.druid.segment.realtime.firehose;
|
package io.druid.firehose.rabbitmq;
|
||||||
|
|
||||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||||
import com.rabbitmq.client.ConnectionFactory;
|
import com.rabbitmq.client.ConnectionFactory;
|
|
@ -0,0 +1,50 @@
|
||||||
|
/*
|
||||||
|
* Druid - a distributed column store.
|
||||||
|
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public License
|
||||||
|
* as published by the Free Software Foundation; either version 2
|
||||||
|
* of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||||
|
*/
|
||||||
|
package io.druid.firehose.rabbitmq;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.Module;
|
||||||
|
import com.fasterxml.jackson.databind.jsontype.NamedType;
|
||||||
|
import com.fasterxml.jackson.databind.module.SimpleModule;
|
||||||
|
import com.google.common.collect.ImmutableList;
|
||||||
|
import com.google.inject.Binder;
|
||||||
|
import io.druid.initialization.DruidModule;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
*/
|
||||||
|
public class RabbitMQDruidModule implements DruidModule
|
||||||
|
{
|
||||||
|
@Override
|
||||||
|
public List<? extends Module> getJacksonModules()
|
||||||
|
{
|
||||||
|
return ImmutableList.of(
|
||||||
|
new SimpleModule("RabbitMQFirehoseModule")
|
||||||
|
.registerSubtypes(
|
||||||
|
new NamedType(RabbitMQFirehoseFactory.class, "rabbitmq")
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void configure(Binder binder)
|
||||||
|
{
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
|
@ -17,7 +17,7 @@
|
||||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package io.druid.segment.realtime.firehose;
|
package io.druid.firehose.rabbitmq;
|
||||||
|
|
||||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||||
|
|
||||||
|
@ -33,6 +33,11 @@ public class RabbitMQFirehoseConfig
|
||||||
private boolean exclusive = false;
|
private boolean exclusive = false;
|
||||||
private boolean autoDelete = false;
|
private boolean autoDelete = false;
|
||||||
|
|
||||||
|
// Lyra (auto reconnect) properties
|
||||||
|
private int maxRetries = 100;
|
||||||
|
private int retryIntervalSeconds = 2;
|
||||||
|
private long maxDurationSeconds = 5 * 60;
|
||||||
|
|
||||||
@JsonProperty
|
@JsonProperty
|
||||||
public String getQueue()
|
public String getQueue()
|
||||||
{
|
{
|
||||||
|
@ -98,4 +103,31 @@ public class RabbitMQFirehoseConfig
|
||||||
{
|
{
|
||||||
this.autoDelete = autoDelete;
|
this.autoDelete = autoDelete;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonProperty
|
||||||
|
public int getMaxRetries() {
|
||||||
|
return maxRetries;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setMaxRetries(int maxRetries) {
|
||||||
|
this.maxRetries = maxRetries;
|
||||||
|
}
|
||||||
|
|
||||||
|
@JsonProperty
|
||||||
|
public int getRetryIntervalSeconds() {
|
||||||
|
return retryIntervalSeconds;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setRetryIntervalSeconds(int retryIntervalSeconds) {
|
||||||
|
this.retryIntervalSeconds = retryIntervalSeconds;
|
||||||
|
}
|
||||||
|
|
||||||
|
@JsonProperty
|
||||||
|
public long getMaxDurationSeconds() {
|
||||||
|
return maxDurationSeconds;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setMaxDurationSeconds(int maxDurationSeconds) {
|
||||||
|
this.maxDurationSeconds = maxDurationSeconds;
|
||||||
|
}
|
||||||
}
|
}
|
|
@ -17,11 +17,10 @@
|
||||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package io.druid.segment.realtime.firehose;
|
package io.druid.firehose.rabbitmq;
|
||||||
|
|
||||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||||
import com.google.api.client.repackaged.com.google.common.base.Throwables;
|
|
||||||
import com.metamx.common.logger.Logger;
|
import com.metamx.common.logger.Logger;
|
||||||
import com.rabbitmq.client.Channel;
|
import com.rabbitmq.client.Channel;
|
||||||
import com.rabbitmq.client.Connection;
|
import com.rabbitmq.client.Connection;
|
||||||
|
@ -33,6 +32,11 @@ import io.druid.data.input.Firehose;
|
||||||
import io.druid.data.input.FirehoseFactory;
|
import io.druid.data.input.FirehoseFactory;
|
||||||
import io.druid.data.input.InputRow;
|
import io.druid.data.input.InputRow;
|
||||||
import io.druid.data.input.impl.StringInputRowParser;
|
import io.druid.data.input.impl.StringInputRowParser;
|
||||||
|
import net.jodah.lyra.ConnectionOptions;
|
||||||
|
import net.jodah.lyra.Connections;
|
||||||
|
import net.jodah.lyra.config.Config;
|
||||||
|
import net.jodah.lyra.retry.RetryPolicy;
|
||||||
|
import net.jodah.lyra.util.Duration;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
|
@ -64,7 +68,11 @@ import java.io.IOException;
|
||||||
* "routingKey": "#", # The routing key to use to bind the queue to the exchange. No default
|
* "routingKey": "#", # The routing key to use to bind the queue to the exchange. No default
|
||||||
* "durable": "true", # Whether the queue should be durable. Default: 'false'
|
* "durable": "true", # Whether the queue should be durable. Default: 'false'
|
||||||
* "exclusive": "false", # Whether the queue should be exclusive. Default: 'false'
|
* "exclusive": "false", # Whether the queue should be exclusive. Default: 'false'
|
||||||
* "autoDelete": "false" # Whether the queue should auto-delete on disconnect. Default: 'false'
|
* "autoDelete": "false", # Whether the queue should auto-delete on disconnect. Default: 'false'
|
||||||
|
*
|
||||||
|
* "maxRetries": "10", # The max number of reconnection retry attempts
|
||||||
|
* "retryIntervalSeconds": "1", # The reconnection interval
|
||||||
|
* "maxDurationSeconds": "300" # The max duration of trying to reconnect
|
||||||
* },
|
* },
|
||||||
* "parser" : {
|
* "parser" : {
|
||||||
* "timestampSpec" : { "column" : "utcdt", "format" : "iso" },
|
* "timestampSpec" : { "column" : "utcdt", "format" : "iso" },
|
||||||
|
@ -113,6 +121,13 @@ public class RabbitMQFirehoseFactory implements FirehoseFactory
|
||||||
@Override
|
@Override
|
||||||
public Firehose connect() throws IOException
|
public Firehose connect() throws IOException
|
||||||
{
|
{
|
||||||
|
ConnectionOptions lyraOptions = new ConnectionOptions(this.connectionFactory);
|
||||||
|
Config lyraConfig = new Config()
|
||||||
|
.withRecoveryPolicy(new RetryPolicy()
|
||||||
|
.withMaxRetries(config.getMaxRetries())
|
||||||
|
.withRetryInterval(Duration.seconds(config.getRetryIntervalSeconds()))
|
||||||
|
.withMaxDuration(Duration.seconds(config.getMaxDurationSeconds())));
|
||||||
|
|
||||||
String queue = config.getQueue();
|
String queue = config.getQueue();
|
||||||
String exchange = config.getExchange();
|
String exchange = config.getExchange();
|
||||||
String routingKey = config.getRoutingKey();
|
String routingKey = config.getRoutingKey();
|
||||||
|
@ -121,13 +136,7 @@ public class RabbitMQFirehoseFactory implements FirehoseFactory
|
||||||
boolean exclusive = config.isExclusive();
|
boolean exclusive = config.isExclusive();
|
||||||
boolean autoDelete = config.isAutoDelete();
|
boolean autoDelete = config.isAutoDelete();
|
||||||
|
|
||||||
final Connection connection;
|
final Connection connection = Connections.create(lyraOptions, lyraConfig);
|
||||||
try {
|
|
||||||
connection = connectionFactory.newConnection();
|
|
||||||
} catch (Exception e) {
|
|
||||||
log.error("Unable to find a RabbitMQ broker. Are you sure you have one running?");
|
|
||||||
throw Throwables.propagate(e);
|
|
||||||
}
|
|
||||||
|
|
||||||
connection.addShutdownListener(new ShutdownListener()
|
connection.addShutdownListener(new ShutdownListener()
|
||||||
{
|
{
|
||||||
|
@ -135,7 +144,6 @@ public class RabbitMQFirehoseFactory implements FirehoseFactory
|
||||||
public void shutdownCompleted(ShutdownSignalException cause)
|
public void shutdownCompleted(ShutdownSignalException cause)
|
||||||
{
|
{
|
||||||
log.warn(cause, "Connection closed!");
|
log.warn(cause, "Connection closed!");
|
||||||
//FUTURE: we could try to re-establish the connection here. Not done in this version though.
|
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -148,7 +156,6 @@ public class RabbitMQFirehoseFactory implements FirehoseFactory
|
||||||
public void shutdownCompleted(ShutdownSignalException cause)
|
public void shutdownCompleted(ShutdownSignalException cause)
|
||||||
{
|
{
|
||||||
log.warn(cause, "Channel closed!");
|
log.warn(cause, "Channel closed!");
|
||||||
//FUTURE: we could try to re-establish the connection here. Not done in this version though.
|
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
io.druid.firehose.rabbitmq.RabbitMQDruidModule
|
|
@ -28,7 +28,7 @@
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>io.druid</groupId>
|
<groupId>io.druid</groupId>
|
||||||
<artifactId>druid</artifactId>
|
<artifactId>druid</artifactId>
|
||||||
<version>0.6.25-SNAPSHOT</version>
|
<version>0.6.27-SNAPSHOT</version>
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
|
|
@ -28,7 +28,7 @@
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>io.druid</groupId>
|
<groupId>io.druid</groupId>
|
||||||
<artifactId>druid</artifactId>
|
<artifactId>druid</artifactId>
|
||||||
<version>0.6.25-SNAPSHOT</version>
|
<version>0.6.27-SNAPSHOT</version>
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
@ -183,10 +183,6 @@
|
||||||
<groupId>org.eclipse.jetty</groupId>
|
<groupId>org.eclipse.jetty</groupId>
|
||||||
<artifactId>jetty-servlets</artifactId>
|
<artifactId>jetty-servlets</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
|
||||||
<groupId>com.rabbitmq</groupId>
|
|
||||||
<artifactId>amqp-client</artifactId>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.ircclouds.irc</groupId>
|
<groupId>com.ircclouds.irc</groupId>
|
||||||
<artifactId>irc-api</artifactId>
|
<artifactId>irc-api</artifactId>
|
||||||
|
|
|
@ -28,7 +28,6 @@ import io.druid.initialization.DruidModule;
|
||||||
import io.druid.segment.realtime.firehose.ClippedFirehoseFactory;
|
import io.druid.segment.realtime.firehose.ClippedFirehoseFactory;
|
||||||
import io.druid.segment.realtime.firehose.IrcFirehoseFactory;
|
import io.druid.segment.realtime.firehose.IrcFirehoseFactory;
|
||||||
import io.druid.segment.realtime.firehose.LocalFirehoseFactory;
|
import io.druid.segment.realtime.firehose.LocalFirehoseFactory;
|
||||||
import io.druid.segment.realtime.firehose.RabbitMQFirehoseFactory;
|
|
||||||
import io.druid.segment.realtime.firehose.TimedShutoffFirehoseFactory;
|
import io.druid.segment.realtime.firehose.TimedShutoffFirehoseFactory;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
@ -49,7 +48,6 @@ public class FirehoseModule implements DruidModule
|
||||||
return Arrays.<Module>asList(
|
return Arrays.<Module>asList(
|
||||||
new SimpleModule("FirehoseModule")
|
new SimpleModule("FirehoseModule")
|
||||||
.registerSubtypes(
|
.registerSubtypes(
|
||||||
new NamedType(RabbitMQFirehoseFactory.class, "rabbitmq"),
|
|
||||||
new NamedType(ClippedFirehoseFactory.class, "clipped"),
|
new NamedType(ClippedFirehoseFactory.class, "clipped"),
|
||||||
new NamedType(TimedShutoffFirehoseFactory.class, "timed"),
|
new NamedType(TimedShutoffFirehoseFactory.class, "timed"),
|
||||||
new NamedType(IrcFirehoseFactory.class, "irc"),
|
new NamedType(IrcFirehoseFactory.class, "irc"),
|
||||||
|
|
|
@ -87,6 +87,8 @@ public class QueryResource
|
||||||
@Context HttpServletResponse resp
|
@Context HttpServletResponse resp
|
||||||
) throws ServletException, IOException
|
) throws ServletException, IOException
|
||||||
{
|
{
|
||||||
|
final long start = System.currentTimeMillis();
|
||||||
|
|
||||||
Query query = null;
|
Query query = null;
|
||||||
byte[] requestQuery = null;
|
byte[] requestQuery = null;
|
||||||
|
|
||||||
|
@ -118,7 +120,7 @@ public class QueryResource
|
||||||
out = resp.getOutputStream();
|
out = resp.getOutputStream();
|
||||||
jsonWriter.writeValue(out, results);
|
jsonWriter.writeValue(out, results);
|
||||||
|
|
||||||
long requestTime = System.currentTimeMillis() - req.getSession().getCreationTime();
|
long requestTime = System.currentTimeMillis() - start;
|
||||||
|
|
||||||
emitter.emit(
|
emitter.emit(
|
||||||
new ServiceMetricEvent.Builder()
|
new ServiceMetricEvent.Builder()
|
||||||
|
|
|
@ -198,7 +198,8 @@ public class BatchServerInventoryViewTest
|
||||||
private void waitForSync() throws Exception
|
private void waitForSync() throws Exception
|
||||||
{
|
{
|
||||||
Stopwatch stopwatch = new Stopwatch().start();
|
Stopwatch stopwatch = new Stopwatch().start();
|
||||||
while (Iterables.get(batchServerInventoryView.getInventory(), 0).getSegments().size() != testSegments.size()) {
|
while (!Iterables.isEmpty(batchServerInventoryView.getInventory())
|
||||||
|
&& Iterables.get(batchServerInventoryView.getInventory(), 0).getSegments().size() != testSegments.size()) {
|
||||||
Thread.sleep(500);
|
Thread.sleep(500);
|
||||||
if (stopwatch.elapsed(TimeUnit.MILLISECONDS) > 5000) {
|
if (stopwatch.elapsed(TimeUnit.MILLISECONDS) > 5000) {
|
||||||
throw new ISE("BatchServerInventoryView is not updating");
|
throw new ISE("BatchServerInventoryView is not updating");
|
||||||
|
|
|
@ -27,7 +27,7 @@
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>io.druid</groupId>
|
<groupId>io.druid</groupId>
|
||||||
<artifactId>druid</artifactId>
|
<artifactId>druid</artifactId>
|
||||||
<version>0.6.25-SNAPSHOT</version>
|
<version>0.6.27-SNAPSHOT</version>
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
|
|
@ -53,7 +53,7 @@ import java.util.List;
|
||||||
*/
|
*/
|
||||||
@Command(
|
@Command(
|
||||||
name = "broker",
|
name = "broker",
|
||||||
description = "Runs a broker node, see http://druid.io/docs/0.6.24/Broker.html for a description"
|
description = "Runs a broker node, see http://druid.io/docs/0.6.26/Broker.html for a description"
|
||||||
)
|
)
|
||||||
public class CliBroker extends ServerRunnable
|
public class CliBroker extends ServerRunnable
|
||||||
{
|
{
|
||||||
|
|
|
@ -63,7 +63,7 @@ import java.util.List;
|
||||||
*/
|
*/
|
||||||
@Command(
|
@Command(
|
||||||
name = "coordinator",
|
name = "coordinator",
|
||||||
description = "Runs the Coordinator, see http://druid.io/docs/0.6.24/Coordinator.html for a description."
|
description = "Runs the Coordinator, see http://druid.io/docs/0.6.26/Coordinator.html for a description."
|
||||||
)
|
)
|
||||||
public class CliCoordinator extends ServerRunnable
|
public class CliCoordinator extends ServerRunnable
|
||||||
{
|
{
|
||||||
|
|
|
@ -41,7 +41,7 @@ import java.util.List;
|
||||||
*/
|
*/
|
||||||
@Command(
|
@Command(
|
||||||
name = "hadoop",
|
name = "hadoop",
|
||||||
description = "Runs the batch Hadoop Druid Indexer, see http://druid.io/docs/0.6.24/Batch-ingestion.html for a description."
|
description = "Runs the batch Hadoop Druid Indexer, see http://druid.io/docs/0.6.26/Batch-ingestion.html for a description."
|
||||||
)
|
)
|
||||||
public class CliHadoopIndexer implements Runnable
|
public class CliHadoopIndexer implements Runnable
|
||||||
{
|
{
|
||||||
|
|
|
@ -42,7 +42,7 @@ import java.util.List;
|
||||||
*/
|
*/
|
||||||
@Command(
|
@Command(
|
||||||
name = "historical",
|
name = "historical",
|
||||||
description = "Runs a Historical node, see http://druid.io/docs/0.6.24/Historical.html for a description"
|
description = "Runs a Historical node, see http://druid.io/docs/0.6.26/Historical.html for a description"
|
||||||
)
|
)
|
||||||
public class CliHistorical extends ServerRunnable
|
public class CliHistorical extends ServerRunnable
|
||||||
{
|
{
|
||||||
|
|
|
@ -93,7 +93,7 @@ import java.util.List;
|
||||||
*/
|
*/
|
||||||
@Command(
|
@Command(
|
||||||
name = "overlord",
|
name = "overlord",
|
||||||
description = "Runs an Overlord node, see http://druid.io/docs/0.6.24/Indexing-Service.html for a description"
|
description = "Runs an Overlord node, see http://druid.io/docs/0.6.26/Indexing-Service.html for a description"
|
||||||
)
|
)
|
||||||
public class CliOverlord extends ServerRunnable
|
public class CliOverlord extends ServerRunnable
|
||||||
{
|
{
|
||||||
|
|
|
@ -30,7 +30,7 @@ import java.util.List;
|
||||||
*/
|
*/
|
||||||
@Command(
|
@Command(
|
||||||
name = "realtime",
|
name = "realtime",
|
||||||
description = "Runs a realtime node, see http://druid.io/docs/0.6.24/Realtime.html for a description"
|
description = "Runs a realtime node, see http://druid.io/docs/0.6.26/Realtime.html for a description"
|
||||||
)
|
)
|
||||||
public class CliRealtime extends ServerRunnable
|
public class CliRealtime extends ServerRunnable
|
||||||
{
|
{
|
||||||
|
|
|
@ -42,7 +42,7 @@ import java.util.concurrent.Executor;
|
||||||
*/
|
*/
|
||||||
@Command(
|
@Command(
|
||||||
name = "realtime",
|
name = "realtime",
|
||||||
description = "Runs a standalone realtime node for examples, see http://druid.io/docs/0.6.24/Realtime.html for a description"
|
description = "Runs a standalone realtime node for examples, see http://druid.io/docs/0.6.26/Realtime.html for a description"
|
||||||
)
|
)
|
||||||
public class CliRealtimeExample extends ServerRunnable
|
public class CliRealtimeExample extends ServerRunnable
|
||||||
{
|
{
|
||||||
|
|
|
@ -72,7 +72,7 @@ public class ConvertProperties implements Runnable
|
||||||
new PrefixRename("com.metamx.emitter", "druid.emitter"),
|
new PrefixRename("com.metamx.emitter", "druid.emitter"),
|
||||||
new PrefixRename("com.metamx.druid.emitter", "druid.emitter"),
|
new PrefixRename("com.metamx.druid.emitter", "druid.emitter"),
|
||||||
new IndexCacheConverter(),
|
new IndexCacheConverter(),
|
||||||
new Rename("druid.paths.segmentInfoCache", "druid.segmentCache.infoPath"),
|
new Rename("druid.paths.segmentInfoCache", "druid.segmentCache.infoDir"),
|
||||||
new Rename("com.metamx.aws.accessKey", "druid.s3.accessKey"),
|
new Rename("com.metamx.aws.accessKey", "druid.s3.accessKey"),
|
||||||
new Rename("com.metamx.aws.secretKey", "druid.s3.secretKey"),
|
new Rename("com.metamx.aws.secretKey", "druid.s3.secretKey"),
|
||||||
new Rename("druid.bard.maxIntervalDuration", "druid.query.chunkDuration"),
|
new Rename("druid.bard.maxIntervalDuration", "druid.query.chunkDuration"),
|
||||||
|
|