diff --git a/build.sh b/build.sh index 9b6148b6c4c..50bcefbfe7b 100755 --- a/build.sh +++ b/build.sh @@ -30,4 +30,4 @@ echo "For examples, see: " echo " " ls -1 examples/*/*sh echo " " -echo "See also http://druid.io/docs/0.6.26" +echo "See also http://druid.io/docs/0.6.46" diff --git a/cassandra-storage/pom.xml b/cassandra-storage/pom.xml index c821f9aa69b..c56617fc659 100644 --- a/cassandra-storage/pom.xml +++ b/cassandra-storage/pom.xml @@ -28,7 +28,7 @@ io.druid druid - 0.6.27-SNAPSHOT + 0.6.48-SNAPSHOT @@ -53,4 +53,20 @@ test + + + + + maven-jar-plugin + + + + true + true + + + + + + diff --git a/common/pom.xml b/common/pom.xml index 24c37411ba3..9b732f714ee 100644 --- a/common/pom.xml +++ b/common/pom.xml @@ -28,7 +28,7 @@ io.druid druid - 0.6.27-SNAPSHOT + 0.6.48-SNAPSHOT @@ -59,14 +59,6 @@ org.skife.config config-magic - - org.apache.curator - curator-recipes - - - org.apache.curator - curator-x-discovery - org.hibernate hibernate-validator @@ -75,10 +67,6 @@ javax.validation validation-api - - it.uniroma3.mat - extendedset - com.google.guava guava @@ -127,16 +115,6 @@ log4j log4j - - mysql - mysql-connector-java - 5.1.18 - - - org.mozilla - rhino - 1.7R4 - @@ -168,7 +146,15 @@ - + + + + true + true + + + + diff --git a/docs/content/Booting-a-production-cluster.md b/docs/content/Booting-a-production-cluster.md index 61755b5733d..3f57ce13d1c 100644 --- a/docs/content/Booting-a-production-cluster.md +++ b/docs/content/Booting-a-production-cluster.md @@ -3,7 +3,7 @@ layout: doc_page --- # Booting a Single Node Cluster # -[Loading Your Data](Tutorial%3A-Loading-Your-Data-Part-2.html) and [All About Queries](Tutorial%3A-All-About-Queries.html) contain recipes to boot a small druid cluster on localhost. Here we will boot a small cluster on EC2. You can checkout the code, or download a tarball from [here](http://static.druid.io/artifacts/druid-services-0.6.26-bin.tar.gz). +[Loading Your Data](Tutorial%3A-Loading-Your-Data-Part-2.html) and [All About Queries](Tutorial%3A-All-About-Queries.html) contain recipes to boot a small druid cluster on localhost. Here we will boot a small cluster on EC2. You can checkout the code, or download a tarball from [here](http://static.druid.io/artifacts/druid-services-0.6.46-bin.tar.gz). The [ec2 run script](https://github.com/metamx/druid/blob/master/examples/bin/run_ec2.sh), run_ec2.sh, is located at 'examples/bin' if you have checked out the code, or at the root of the project if you've downloaded a tarball. The scripts rely on the [Amazon EC2 API Tools](http://aws.amazon.com/developertools/351), and you will need to set three environment variables: diff --git a/docs/content/Cluster-setup.md b/docs/content/Cluster-setup.md index aa142efc453..e4ba0e564f1 100644 --- a/docs/content/Cluster-setup.md +++ b/docs/content/Cluster-setup.md @@ -1,6 +1,9 @@ --- layout: doc_page --- + +# Setting Up a Druid Cluster + A Druid cluster consists of various node types that need to be set up depending on your use case. See our [Design](Design.html) docs for a description of the different node types. Minimum Physical Layout: Absolute Minimum @@ -74,7 +77,7 @@ Local disk ("ephemeral" on AWS EC2) for caching is recommended over network moun Setup ----- -Setting up a cluster is essentially just firing up all of the nodes you want with the proper [[configuration]]. One thing to be aware of is that there are a few properties in the configuration that potentially need to be set individually for each process: +Setting up a cluster is essentially just firing up all of the nodes you want with the proper [configuration](Configuration.html). One thing to be aware of is that there are a few properties in the configuration that potentially need to be set individually for each process: ``` druid.server.type=historical|realtime diff --git a/docs/content/Configuration.md b/docs/content/Configuration.md index 51afb07ae65..f01b5d41a0b 100644 --- a/docs/content/Configuration.md +++ b/docs/content/Configuration.md @@ -1,25 +1,28 @@ --- layout: doc_page --- + +# Configuring Druid + This describes the basic server configuration that is loaded by all the server processes; the same file is loaded by all. See also the json "specFile" descriptions in [Realtime](Realtime.html) and [Batch-ingestion](Batch-ingestion.html). -JVM Configuration Best Practices -================================ +## JVM Configuration Best Practices There are three JVM parameters that we set on all of our processes: -1. `-Duser.timezone=UTC` This sets the default timezone of the JVM to UTC. We always set this and do not test with other default timezones, so local timezones might work, but they also might uncover weird and interesting bugs -2. `-Dfile.encoding=UTF-8` This is similar to timezone, we test assuming UTF-8. Local encodings might work, but they also might result in weird and interesting bugs -3. `-Djava.io.tmpdir=` Various parts of the system that interact with the file system do it via temporary files, these files can get somewhat large. Many production systems are setup to have small (but fast) `/tmp` directories, these can be problematic with Druid so we recommend pointing the JVM’s tmp directory to something with a little more meat. +1. `-Duser.timezone=UTC` This sets the default timezone of the JVM to UTC. We always set this and do not test with other default timezones, so local timezones might work, but they also might uncover weird and interesting bugs. +2. `-Dfile.encoding=UTF-8` This is similar to timezone, we test assuming UTF-8. Local encodings might work, but they also might result in weird and interesting bugs. +3. `-Djava.io.tmpdir=` Various parts of the system that interact with the file system do it via temporary files, and these files can get somewhat large. Many production systems are set up to have small (but fast) `/tmp` directories, which can be problematic with Druid so we recommend pointing the JVM’s tmp directory to something with a little more meat. -Modules -======= +## Modules -As of Druid v0.6, most core Druid functionality has been compartmentalized into modules. There are a set of default modules that may apply to any node type, and there are specific modules for the different node types. Default modules are __lazily instantiated__. Each module has its own set of configuration. This page will describe the configuration of the default modules. +As of Druid v0.6, most core Druid functionality has been compartmentalized into modules. There are a set of default modules that may apply to any node type, and there are specific modules for the different node types. Default modules are __lazily instantiated__. Each module has its own set of configuration. + +This page describes the configuration of the default modules. Node-specific configuration is discussed on each node's respective page. In addition, you can add custom modules to [extend Druid](Modules.html). Configuration of the various modules is done via Java properties. These can either be provided as `-D` system properties on the java command line or they can be passed in via a file called `runtime.properties` that exists on the classpath. -Note: as a future item, we’d like to consolidate all of the various configuration into a yaml/JSON based configuration files. +Note: as a future item, we’d like to consolidate all of the various configuration into a yaml/JSON based configuration file. ### Emitter Module @@ -147,7 +150,7 @@ Druid storage nodes maintain information about segments they have already downlo |Property|Description|Default| |--------|-----------|-------| -|`druid.segmentCache.locations`|Segments assigned to a historical node are first stored on the local file system and then served by the historical node. These locations defines where that local cache resides|none| +|`druid.segmentCache.locations`|Segments assigned to a historical node are first stored on the local file system and then served by the historical node. These locations define where that local cache resides|none| |`druid.segmentCache.deleteOnRemove`|Delete segment files from cache once a node is no longer serving a segment.|true| |`druid.segmentCache.infoDir`|Historical nodes keep track of the segments they are serving so that when the process is restarted they can reload the same segments without waiting for the coordinator to reassign. This path defines where this metadata is kept. Directory will be created if needed.|${first_location}/info_dir| @@ -282,8 +285,10 @@ This deep storage is used to interface with Amazon's S3. |Property|Description|Default| |--------|-----------|-------| |`druid.storage.bucket`|S3 bucket name.|none| -|`druid.storage.basekey`|S3 base key.|none| +|`druid.storage.baseKey`|S3 object key prefix for storage.|none| |`druid.storage.disableAcl`|Boolean flag for ACL.|false| +|`druid.storage.archiveBucket`|S3 bucket name for archiving when running the indexing-service *archive task*.|none| +|`druid.storage.archiveBaseKey`|S3 object key prefix for archiving.|none| #### HDFS Deep Storage @@ -308,21 +313,29 @@ This module is used to configure the [Indexing Service](Indexing-Service.html) t |Property|Description|Default| |--------|-----------|-------| -|`druid.indexer.logs.type`|Choices:noop, S3. Where to store task logs|noop| +|`druid.indexer.logs.type`|Choices:noop, s3, file. Where to store task logs|file| -#### Noop Task Logs +#### File Task Logs -No task logs are actually stored. +Store task logs in the local filesystem. + +|Property|Description|Default| +|--------|-----------|-------| +|`druid.indexer.logs.directory`|Local filesystem path.|log| #### S3 Task Logs -Store Task Logs in S3. +Store task logs in S3. |Property|Description|Default| |--------|-----------|-------| |`druid.indexer.logs.s3Bucket`|S3 bucket name.|none| |`druid.indexer.logs.s3Prefix`|S3 key prefix.|none| +#### Noop Task Logs + +No task logs are actually stored. + ### Firehose Module The Firehose module lists all available firehoses. There are no configurations. diff --git a/docs/content/Examples.md b/docs/content/Examples.md index 81549a7ec2d..f0771408c75 100644 --- a/docs/content/Examples.md +++ b/docs/content/Examples.md @@ -19,13 +19,13 @@ Clone Druid and build it: git clone https://github.com/metamx/druid.git druid cd druid git fetch --tags -git checkout druid-0.6.26 +git checkout druid-0.6.46 ./build.sh ``` ### Downloading the DSK (Druid Standalone Kit) -[Download](http://static.druid.io/artifacts/releases/druid-services-0.6.26-bin.tar.gz) a stand-alone tarball and run it: +[Download](http://static.druid.io/artifacts/releases/druid-services-0.6.46-bin.tar.gz) a stand-alone tarball and run it: ``` bash tar -xzf druid-services-0.X.X-bin.tar.gz diff --git a/docs/content/Indexing-Service.md b/docs/content/Indexing-Service.md index aed200a7c8f..2f15b200025 100644 --- a/docs/content/Indexing-Service.md +++ b/docs/content/Indexing-Service.md @@ -56,6 +56,7 @@ With the following JVM configuration: -Ddruid.db.connector.password=diurd -Ddruid.selectors.indexing.serviceName=overlord +-Ddruid.indexer.queue.startDelay=PT0M -Ddruid.indexer.runner.javaOpts="-server -Xmx1g" -Ddruid.indexer.runner.startPort=8081 -Ddruid.indexer.fork.property.druid.computation.buffer.size=268435456 @@ -110,12 +111,17 @@ If autoscaling is enabled, new middle managers may be added when a task has been #### JVM Configuration -In addition to the configuration of some of the default modules in [Configuration](Configuration.html), the overlord module requires the following basic configs to run in remote mode: +In addition to the configuration of some of the default modules in [Configuration](Configuration.html), the overlord has the following basic configs: |Property|Description|Default| |--------|-----------|-------| |`druid.indexer.runner.type`|Choices "local" or "remote". Indicates whether tasks should be run locally or in a distributed environment.|local| -|`druid.indexer.storage.type`|Choices are "local" or "db". Indicates whether incoming tasks should be stored locally (in heap) or in a database. Storing incoming tasks in a database allows for tasks to be bootstrapped if the overlord should fail.|local| +|`druid.indexer.storage.type`|Choices are "local" or "db". Indicates whether incoming tasks should be stored locally (in heap) or in a database. Storing incoming tasks in a database allows for tasks to be resumed if the overlord should fail.|local| +|`druid.indexer.storage.recentlyFinishedThreshold`|A duration of time to store task results.|PT24H| +|`druid.indexer.queue.maxSize`|Maximum number of active tasks at one time.|Integer.MAX_VALUE| +|`druid.indexer.queue.startDelay`|Sleep this long before starting overlord queue management. This can be useful to give a cluster time to re-orient itself after e.g. a widespread network issue.|PT1M| +|`druid.indexer.queue.restartDelay`|Sleep this long when overlord queue management throws an exception before trying again.|PT30S| +|`druid.indexer.queue.storageSyncRate`|Sync overlord state this often with an underlying task persistence mechanism.|PT1M| The following configs only apply if the overlord is running in remote mode: diff --git a/docs/content/Modules.md b/docs/content/Modules.md index 17b8e538785..b5b8a693053 100644 --- a/docs/content/Modules.md +++ b/docs/content/Modules.md @@ -1,6 +1,9 @@ --- layout: doc_page --- + +# Extending Druid With Custom Modules + Druid version 0.6 introduces a new module system that allows for the addition of extensions at runtime. ## Specifying extensions @@ -164,4 +167,4 @@ Adding new Jersey resources to a module requires calling the following code to b ```java Jerseys.addResource(binder, NewResource.class); -``` \ No newline at end of file +``` diff --git a/docs/content/Realtime.md b/docs/content/Realtime.md index 3d6c432add1..a6111a8734b 100644 --- a/docs/content/Realtime.md +++ b/docs/content/Realtime.md @@ -27,7 +27,7 @@ druid.host=localhost druid.service=realtime druid.port=8083 -druid.extensions.coordinates=["io.druid.extensions:druid-kafka-seven:0.6.26"] +druid.extensions.coordinates=["io.druid.extensions:druid-kafka-seven:0.6.46"] druid.zk.service.host=localhost diff --git a/docs/content/TopNMetricSpec.md b/docs/content/TopNMetricSpec.md new file mode 100644 index 00000000000..672f5d352f6 --- /dev/null +++ b/docs/content/TopNMetricSpec.md @@ -0,0 +1,45 @@ +--- +layout: doc_page +--- +TopNMetricSpec +================== + +The topN metric spec specifies how topN values should be sorted. + +## Numeric TopNMetricSpec + +The simplest metric specification is a String value indicating the metric to sort topN results by. They are included in a topN query with: + +```json +"metric": +``` + +The metric field can also be given as a JSON object. The grammar for dimension values sorted by numeric value is shown below: + +```json +"metric": { + "type": "numeric", + "metric": "" +} +``` + +|property|description|required?| +|--------|-----------|---------| +|type|this indicates a numeric sort|yes| +|metric|the actual metric field in which results will be sorted by|yes| + +## Lexicographic TopNMetricSpec + +The grammar for dimension values sorted lexicographically is as follows: + +```json +"metric": { + "type": "lexicographic", + "previousStop": "" +} +``` + +|property|description|required?| +|--------|-----------|---------| +|type|this indicates a lexicographic sort|yes| +|previousStop|the starting point of the lexicographic sort. For example, if a previousStop value is 'b', all values before 'b' are discarded. This field can be used to paginate through all the dimension values.|no| diff --git a/docs/content/TopNQuery.md b/docs/content/TopNQuery.md new file mode 100644 index 00000000000..d418799d29c --- /dev/null +++ b/docs/content/TopNQuery.md @@ -0,0 +1,119 @@ +--- +layout: doc_page +--- +TopN queries +================== + +TopN queries return a sorted set of results for the values in a given dimension according to some criteria. Conceptually, they can be thought of as an approximate [GroupByQuery](GroupByQuery.html) over a single dimension with an [Ordering](Ordering.html) spec. TopNs are much faster and resource efficient than GroupBys for this use case. These types of queries take a topN query object and return an array of JSON objects where each object represents a value asked for by the topN query. + +A topN query object looks like: + +```json + "queryType": "topN", + "dataSource": "sample_data", + "dimension": "sample_dim", + "threshold": 5, + "metric": "count", + "granularity": "all", + "filter": { + "type": "and", + "fields": [ + { + "type": "selector", + "dimension": "dim1", + "value": "some_value" + }, + { + "type": "selector", + "dimension": "dim2", + "value": "some_other_val" + } + ] + }, + "aggregations": [ + { + "type": "longSum", + "name": "count", + "fieldName": "count" + }, + { + "type": "doubleSum", + "name": "some_metric", + "fieldName": "some_metric" + } + ], + "postAggregations": [ + { + "type": "arithmetic", + "name": "sample_divide", + "fn": "/", + "fields": [ + { + "type": "fieldAccess", + "name": "some_metric", + "fieldName": "some_metric" + }, + { + "type": "fieldAccess", + "name": "count", + "fieldName": "count" + } + ] + } + ], + "intervals": [ + "2013-08-31T00:00:00.000/2013-09-03T00:00:00.000" + ] +} +``` + +There are 10 parts to a topN query, but 7 of them are shared with [TimeseriesQuery](TimeseriesQuery.html). Please review [TimeseriesQuery](TimeseriesQuery.html) for meanings of fields not defined below. + +|property|description|required?| +|--------|-----------|---------| +|dimension|A JSON object defining the dimension that you want the top taken for. For more info, see [DimensionSpecs](DimensionSpecs.html)|yes| +|threshold|An integer defining the N in the topN (i.e. how many you want in the top list)|yes| +|metric|A JSON object specifying the metric to sort by for the top list. For more info, see [TopNMetricSpec](TopNMetricSpec.html).|yes| + +Please note the context JSON object is also available for topN queries and should be used with the same caution as the timeseries case. +The format of the results would look like so: + +```json +[ + { + "timestamp": "2013-08-31T00:00:00.000Z", + "result": [ + { + "dim1": "dim1_val", + "count": 111, + "some_metrics": 10669, + "average": 96.11711711711712 + }, + { + "dim1": "another_dim1_val", + "count": 88, + "some_metrics": 28344, + "average": 322.09090909090907 + }, + { + "dim1": "dim1_val3", + "count": 70, + "some_metrics": 871, + "average": 12.442857142857143 + }, + { + "dim1": "dim1_val4", + "count": 62, + "some_metrics": 815, + "average": 13.14516129032258 + }, + { + "dim1": "dim1_val5", + "count": 60, + "some_metrics": 2787, + "average": 46.45 + } + ] + } +] +``` diff --git a/docs/content/Tutorial:-A-First-Look-at-Druid.md b/docs/content/Tutorial:-A-First-Look-at-Druid.md index 548d573c6f1..1f6f43ac97e 100644 --- a/docs/content/Tutorial:-A-First-Look-at-Druid.md +++ b/docs/content/Tutorial:-A-First-Look-at-Druid.md @@ -49,7 +49,7 @@ There are two ways to setup Druid: download a tarball, or [Build From Source](Bu ### Download a Tarball -We've built a tarball that contains everything you'll need. You'll find it [here](http://static.druid.io/artifacts/releases/druid-services-0.6.26-bin.tar.gz). Download this file to a directory of your choosing. +We've built a tarball that contains everything you'll need. You'll find it [here](http://static.druid.io/artifacts/releases/druid-services-0.6.46-bin.tar.gz). Download this file to a directory of your choosing. You can extract the awesomeness within by issuing: @@ -60,7 +60,7 @@ tar -zxvf druid-services-*-bin.tar.gz Not too lost so far right? That's great! If you cd into the directory: ``` -cd druid-services-0.6.26 +cd druid-services-0.6.46 ``` You should see a bunch of files: @@ -205,7 +205,7 @@ You are probably wondering, what are these [Granularities](Granularities.html) a To issue the query and get some results, run the following in your command line: ``` -curl -X POST 'http://localhost:8083/druid/v2/?pretty' -H 'content-type: application/json' -d ````timeseries_query.body +curl -X POST 'http://localhost:8083/druid/v2/?pretty' -H 'content-type: application/json' -d @timeseries_query.body ``` Once again, you should get a JSON blob of text back with your results, that looks something like this: diff --git a/docs/content/Tutorial:-Loading-Your-Data-Part-1.md b/docs/content/Tutorial:-Loading-Your-Data-Part-1.md index 100df976de4..1f0992bb6bd 100644 --- a/docs/content/Tutorial:-Loading-Your-Data-Part-1.md +++ b/docs/content/Tutorial:-Loading-Your-Data-Part-1.md @@ -94,6 +94,7 @@ druid.db.connector.user=druid druid.db.connector.password=diurd druid.selectors.indexing.serviceName=overlord +druid.indexer.queue.startDelay=PT0M druid.indexer.runner.javaOpts="-server -Xmx1g" druid.indexer.runner.startPort=8088 druid.indexer.fork.property.druid.computation.buffer.size=268435456 @@ -246,6 +247,23 @@ Issuing a [TimeBoundaryQuery](TimeBoundaryQuery.html) should yield: } ] ``` +Console +-------- + +The indexing service overlord has a console located at: + +```bash +localhost:8087/console.html +``` + +On this console, you can look at statuses and logs of recently submitted and completed tasks. + +If you decide to reuse the local firehose to ingest your own data and if you run into problems, you can use the console to read the individual task logs. + +Task logs can be stored locally or uploaded to [Deep Storage](Deep-Storage.html). More information about how to configure this is [here](Configuration.html). + +Most common data ingestion problems are around timestamp formats and other malformed data issues. + Next Steps ---------- diff --git a/docs/content/Tutorial:-Loading-Your-Data-Part-2.md b/docs/content/Tutorial:-Loading-Your-Data-Part-2.md index 60d5487784d..cffb288f5b3 100644 --- a/docs/content/Tutorial:-Loading-Your-Data-Part-2.md +++ b/docs/content/Tutorial:-Loading-Your-Data-Part-2.md @@ -44,7 +44,7 @@ With real-world data, we recommend having a message bus such as [Apache Kafka](h #### Setting up Kafka -[KafkaFirehoseFactory](https://github.com/metamx/druid/blob/druid-0.6.26/realtime/src/main/java/com/metamx/druid/realtime/firehose/KafkaFirehoseFactory.java) is how druid communicates with Kafka. Using this [Firehose](Firehose.html) with the right configuration, we can import data into Druid in real-time without writing any code. To load data to a real-time node via Kafka, we'll first need to initialize Zookeeper and Kafka, and then configure and initialize a [Realtime](Realtime.html) node. +[KafkaFirehoseFactory](https://github.com/metamx/druid/blob/druid-0.6.46/realtime/src/main/java/com/metamx/druid/realtime/firehose/KafkaFirehoseFactory.java) is how druid communicates with Kafka. Using this [Firehose](Firehose.html) with the right configuration, we can import data into Druid in real-time without writing any code. To load data to a real-time node via Kafka, we'll first need to initialize Zookeeper and Kafka, and then configure and initialize a [Realtime](Realtime.html) node. Instructions for booting a Zookeeper and then Kafka cluster are available [here](http://kafka.apache.org/07/quickstart.html). diff --git a/docs/content/Tutorial:-The-Druid-Cluster.md b/docs/content/Tutorial:-The-Druid-Cluster.md index e954c3de257..b2f5b6975ec 100644 --- a/docs/content/Tutorial:-The-Druid-Cluster.md +++ b/docs/content/Tutorial:-The-Druid-Cluster.md @@ -13,7 +13,7 @@ In this tutorial, we will set up other types of Druid nodes as well as and exter If you followed the first tutorial, you should already have Druid downloaded. If not, let's go back and do that first. -You can download the latest version of druid [here](http://static.druid.io/artifacts/releases/druid-services-0.6.26-bin.tar.gz) +You can download the latest version of druid [here](http://static.druid.io/artifacts/releases/druid-services-0.6.46-bin.tar.gz) and untar the contents within by issuing: @@ -149,7 +149,7 @@ druid.port=8081 druid.zk.service.host=localhost -druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.26"] +druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.46"] # Dummy read only AWS account (used to download example data) druid.s3.secretKey=QyyfVZ7llSiRg6Qcrql1eEUG7buFpAK6T6engr1b @@ -238,7 +238,7 @@ druid.port=8083 druid.zk.service.host=localhost -druid.extensions.coordinates=["io.druid.extensions:druid-examples:0.6.26","io.druid.extensions:druid-kafka-seven:0.6.26"] +druid.extensions.coordinates=["io.druid.extensions:druid-examples:0.6.46","io.druid.extensions:druid-kafka-seven:0.6.46"] # Change this config to db to hand off to the rest of the Druid cluster druid.publish.type=noop @@ -253,5 +253,5 @@ druid.processing.buffer.sizeBytes=10000000 Next Steps ---------- -If you are intested in how data flows through the different Druid components, check out the [Druid data flow architecture](Design.html). Now that you have an understanding of what the Druid cluster looks like, why not load some of your own data? +If you are interested in how data flows through the different Druid components, check out the [Druid data flow architecture](Design.html). Now that you have an understanding of what the Druid cluster looks like, why not load some of your own data? Check out the next [tutorial](Tutorial%3A-Loading-Your-Data-Part-1.html) section for more info! diff --git a/docs/content/Tutorial:-Webstream.md b/docs/content/Tutorial:-Webstream.md index c8b83d1e00c..cdbbc5f7ee7 100644 --- a/docs/content/Tutorial:-Webstream.md +++ b/docs/content/Tutorial:-Webstream.md @@ -37,7 +37,7 @@ There are two ways to setup Druid: download a tarball, or [Build From Source](Bu h3. Download a Tarball -We've built a tarball that contains everything you'll need. You'll find it [here](http://static.druid.io/artifacts/releases/druid-services-0.6.26-bin.tar.gz) +We've built a tarball that contains everything you'll need. You'll find it [here](http://static.druid.io/artifacts/releases/druid-services-0.6.46-bin.tar.gz) Download this file to a directory of your choosing. You can extract the awesomeness within by issuing: @@ -48,7 +48,7 @@ tar zxvf druid-services-*-bin.tar.gz Not too lost so far right? That's great! If you cd into the directory: ``` -cd druid-services-0.6.26 +cd druid-services-0.6.46 ``` You should see a bunch of files: diff --git a/docs/content/Twitter-Tutorial.textile b/docs/content/Twitter-Tutorial.textile index 9e368b13f10..68780006068 100644 --- a/docs/content/Twitter-Tutorial.textile +++ b/docs/content/Twitter-Tutorial.textile @@ -9,7 +9,7 @@ There are two ways to setup Druid: download a tarball, or build it from source. h3. Download a Tarball -We've built a tarball that contains everything you'll need. You'll find it "here":http://static.druid.io/artifacts/releases/druid-services-0.6.26-bin.tar.gz. +We've built a tarball that contains everything you'll need. You'll find it "here":http://static.druid.io/artifacts/releases/druid-services-0.6.46-bin.tar.gz. Download this bad boy to a directory of your choosing. You can extract the awesomeness within by issuing: diff --git a/docs/content/toc.textile b/docs/content/toc.textile index 84fde036997..2a5f7ed185d 100644 --- a/docs/content/toc.textile +++ b/docs/content/toc.textile @@ -3,7 +3,7 @@ -h1. Introduction +h2. Introduction * "About Druid":./ * "Concepts and Terminology":./Concepts-and-Terminology.html @@ -14,16 +14,12 @@ h2. Getting Started * "Tutorial: Loading Your Data Part 2":./Tutorial:-Loading-Your-Data-Part-2.html * "Tutorial: All About Queries":./Tutorial:-All-About-Queries.html -h2. Evaluate Druid +h2. Operations +* "Configuration":Configuration.html +* "Extending Druid":./Modules.html * "Cluster Setup":./Cluster-setup.html * "Booting a Production Cluster":./Booting-a-production-cluster.html -h2. Configuration -* "Configuration":Configuration.html - -h2. Extend Druid -* "Modules":./Modules.html - h2. Data Ingestion * "Realtime":./Realtime.html * "Batch":./Batch-ingestion.html @@ -46,6 +42,8 @@ h2. Querying ** "SegmentMetadataQuery":./SegmentMetadataQuery.html ** "TimeBoundaryQuery":./TimeBoundaryQuery.html ** "TimeseriesQuery":./TimeseriesQuery.html +** "TopNQuery":./TopNQuery.html +*** "TopNMetricSpec":./TopNMetricSpec.html h2. Architecture * "Design":./Design.html diff --git a/examples/config/historical/runtime.properties b/examples/config/historical/runtime.properties index 67cc44f74d7..8d9f1f35096 100644 --- a/examples/config/historical/runtime.properties +++ b/examples/config/historical/runtime.properties @@ -4,7 +4,7 @@ druid.port=8081 druid.zk.service.host=localhost -druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.26"] +druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.46"] # Dummy read only AWS account (used to download example data) druid.s3.secretKey=QyyfVZ7llSiRg6Qcrql1eEUG7buFpAK6T6engr1b diff --git a/examples/config/overlord/runtime.properties b/examples/config/overlord/runtime.properties index c9c4478d4c4..f452a7d06d6 100644 --- a/examples/config/overlord/runtime.properties +++ b/examples/config/overlord/runtime.properties @@ -9,6 +9,7 @@ druid.db.connector.user=druid druid.db.connector.password=diurd druid.selectors.indexing.serviceName=overlord +druid.indexer.queue.startDelay=PT0M druid.indexer.runner.javaOpts="-server -Xmx1g" druid.indexer.runner.startPort=8088 druid.indexer.fork.property.druid.computation.buffer.size=268435456 diff --git a/examples/config/realtime/runtime.properties b/examples/config/realtime/runtime.properties index aefabeda473..6ebf2e5dcdf 100644 --- a/examples/config/realtime/runtime.properties +++ b/examples/config/realtime/runtime.properties @@ -4,7 +4,7 @@ druid.port=8083 druid.zk.service.host=localhost -druid.extensions.coordinates=["io.druid.extensions:druid-examples:0.6.26","io.druid.extensions:druid-kafka-seven:0.6.26","io.druid.extensions:druid-rabbitmq:0.6.26"] +druid.extensions.coordinates=["io.druid.extensions:druid-examples:0.6.46","io.druid.extensions:druid-kafka-seven:0.6.46","io.druid.extensions:druid-rabbitmq:0.6.46"] # Change this config to db to hand off to the rest of the Druid cluster druid.publish.type=noop diff --git a/examples/pom.xml b/examples/pom.xml index 30ee4e6f586..0e4f56ca1e8 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -28,7 +28,7 @@ io.druid druid - 0.6.27-SNAPSHOT + 0.6.48-SNAPSHOT @@ -104,6 +104,14 @@ + + + + true + true + + + diff --git a/hdfs-storage/pom.xml b/hdfs-storage/pom.xml index 71da35eff72..a59524271d5 100644 --- a/hdfs-storage/pom.xml +++ b/hdfs-storage/pom.xml @@ -28,7 +28,7 @@ io.druid druid - 0.6.27-SNAPSHOT + 0.6.48-SNAPSHOT @@ -71,4 +71,20 @@ test + + + + + maven-jar-plugin + + + + true + true + + + + + + diff --git a/indexing-hadoop/pom.xml b/indexing-hadoop/pom.xml index 3dfb59c2824..38d032fadba 100644 --- a/indexing-hadoop/pom.xml +++ b/indexing-hadoop/pom.xml @@ -28,7 +28,7 @@ io.druid druid - 0.6.27-SNAPSHOT + 0.6.48-SNAPSHOT @@ -101,6 +101,17 @@ + + maven-jar-plugin + + + + true + true + + + + maven-shade-plugin diff --git a/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidIndexerMapper.java b/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidIndexerMapper.java index 9f1bd030284..2eedaf76d31 100644 --- a/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidIndexerMapper.java +++ b/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidIndexerMapper.java @@ -62,7 +62,7 @@ public abstract class HadoopDruidIndexerMapper extends Mapper< try { inputRow = parser.parse(value.toString()); } - catch (IllegalArgumentException e) { + catch (Exception e) { if (config.isIgnoreInvalidRows()) { context.getCounter(HadoopDruidIndexerConfig.IndexJobCounters.INVALID_ROW_COUNTER).increment(1); return; // we're ignoring this invalid row diff --git a/indexing-service/pom.xml b/indexing-service/pom.xml index 9b718e7e172..e8cfbdf8fa1 100644 --- a/indexing-service/pom.xml +++ b/indexing-service/pom.xml @@ -28,7 +28,7 @@ io.druid druid - 0.6.27-SNAPSHOT + 0.6.48-SNAPSHOT @@ -47,95 +47,10 @@ druid-indexing-hadoop ${project.parent.version} - - com.metamx - emitter - - - com.metamx - http-client - - - com.metamx - java-util - - - com.metamx - server-metrics - - - - commons-codec - commons-codec - - - commons-io - commons-io - - - org.skife.config - config-magic - - - org.apache.curator - curator-framework - - - org.apache.curator - curator-recipes - - - com.google.guava - guava - - - com.google.inject - guice - - - com.google.inject.extensions - guice-servlet - - - com.fasterxml.jackson.core - jackson-core - - - com.fasterxml.jackson.jaxrs - jackson-jaxrs-json-provider - - - com.fasterxml.jackson.core - jackson-databind - - - javax.inject - javax.inject - - - org.jdbi - jdbi - - - com.sun.jersey - jersey-core - - - com.sun.jersey.contribs - jersey-guice - - - org.eclipse.jetty - jetty-server - - - joda-time - joda-time - - - com.google.code.findbugs - jsr305 + mysql + mysql-connector-java + 5.1.18 @@ -160,4 +75,20 @@ test + + + + + maven-jar-plugin + + + + true + true + + + + + + diff --git a/indexing-service/src/main/java/io/druid/guice/IndexingServiceFirehoseModule.java b/indexing-service/src/main/java/io/druid/guice/IndexingServiceFirehoseModule.java index 703179f4542..ed9f628452c 100644 --- a/indexing-service/src/main/java/io/druid/guice/IndexingServiceFirehoseModule.java +++ b/indexing-service/src/main/java/io/druid/guice/IndexingServiceFirehoseModule.java @@ -24,7 +24,6 @@ import com.fasterxml.jackson.databind.jsontype.NamedType; import com.fasterxml.jackson.databind.module.SimpleModule; import com.google.common.collect.ImmutableList; import com.google.inject.Binder; -import io.druid.indexing.common.config.EventReceiverFirehoseFactoryConfig; import io.druid.indexing.common.index.EventReceiverFirehoseFactory; import io.druid.initialization.DruidModule; @@ -46,7 +45,5 @@ public class IndexingServiceFirehoseModule implements DruidModule @Override public void configure(Binder binder) { - // backwards compatibility - ConfigProvider.bind(binder, EventReceiverFirehoseFactoryConfig.class); } } diff --git a/server/src/main/java/io/druid/guice/TaskLogsModule.java b/indexing-service/src/main/java/io/druid/guice/IndexingServiceTaskLogsModule.java similarity index 76% rename from server/src/main/java/io/druid/guice/TaskLogsModule.java rename to indexing-service/src/main/java/io/druid/guice/IndexingServiceTaskLogsModule.java index eedd12caabc..33452d0dfd9 100644 --- a/server/src/main/java/io/druid/guice/TaskLogsModule.java +++ b/indexing-service/src/main/java/io/druid/guice/IndexingServiceTaskLogsModule.java @@ -23,22 +23,27 @@ import com.google.inject.Binder; import com.google.inject.Key; import com.google.inject.Module; import com.google.inject.multibindings.MapBinder; +import io.druid.indexing.common.config.FileTaskLogsConfig; +import io.druid.indexing.common.tasklogs.FileTaskLogs; import io.druid.tasklogs.NoopTaskLogs; import io.druid.tasklogs.TaskLogPusher; import io.druid.tasklogs.TaskLogs; /** */ -public class TaskLogsModule implements Module +public class IndexingServiceTaskLogsModule implements Module { @Override public void configure(Binder binder) { - PolyBind.createChoice(binder, "druid.indexer.logs.type", Key.get(TaskLogs.class), Key.get(NoopTaskLogs.class)); + PolyBind.createChoice(binder, "druid.indexer.logs.type", Key.get(TaskLogs.class), Key.get(FileTaskLogs.class)); final MapBinder taskLogBinder = Binders.taskLogsBinder(binder); taskLogBinder.addBinding("noop").to(NoopTaskLogs.class).in(LazySingleton.class); + taskLogBinder.addBinding("file").to(FileTaskLogs.class).in(LazySingleton.class); binder.bind(NoopTaskLogs.class).in(LazySingleton.class); + binder.bind(FileTaskLogs.class).in(LazySingleton.class); + JsonConfigProvider.bind(binder, "druid.indexer.logs", FileTaskLogsConfig.class); binder.bind(TaskLogPusher.class).to(TaskLogs.class); } diff --git a/indexing-service/src/main/java/io/druid/indexing/common/TaskToolbox.java b/indexing-service/src/main/java/io/druid/indexing/common/TaskToolbox.java index 0a7a505d4ec..44f3c600f55 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/TaskToolbox.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/TaskToolbox.java @@ -20,25 +20,36 @@ package io.druid.indexing.common; import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.base.Function; +import com.google.common.collect.ImmutableSet; import com.google.common.collect.Maps; +import com.google.common.collect.Multimap; +import com.google.common.collect.Multimaps; import com.metamx.emitter.service.ServiceEmitter; import com.metamx.metrics.MonitorScheduler; import io.druid.client.ServerView; +import io.druid.indexing.common.actions.SegmentInsertAction; import io.druid.indexing.common.actions.TaskActionClient; import io.druid.indexing.common.actions.TaskActionClientFactory; import io.druid.indexing.common.config.TaskConfig; import io.druid.indexing.common.task.Task; import io.druid.query.QueryRunnerFactoryConglomerate; +import io.druid.segment.loading.DataSegmentArchiver; import io.druid.segment.loading.DataSegmentKiller; +import io.druid.segment.loading.DataSegmentMover; import io.druid.segment.loading.DataSegmentPusher; import io.druid.segment.loading.SegmentLoader; import io.druid.segment.loading.SegmentLoadingException; import io.druid.server.coordination.DataSegmentAnnouncer; import io.druid.timeline.DataSegment; +import org.joda.time.Interval; import java.io.File; +import java.io.IOException; +import java.util.Collection; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.concurrent.ExecutorService; /** @@ -52,6 +63,8 @@ public class TaskToolbox private final ServiceEmitter emitter; private final DataSegmentPusher segmentPusher; private final DataSegmentKiller dataSegmentKiller; + private final DataSegmentArchiver dataSegmentArchiver; + private final DataSegmentMover dataSegmentMover; private final DataSegmentAnnouncer segmentAnnouncer; private final ServerView newSegmentServerView; private final QueryRunnerFactoryConglomerate queryRunnerFactoryConglomerate; @@ -68,6 +81,8 @@ public class TaskToolbox ServiceEmitter emitter, DataSegmentPusher segmentPusher, DataSegmentKiller dataSegmentKiller, + DataSegmentMover dataSegmentMover, + DataSegmentArchiver dataSegmentArchiver, DataSegmentAnnouncer segmentAnnouncer, ServerView newSegmentServerView, QueryRunnerFactoryConglomerate queryRunnerFactoryConglomerate, @@ -84,6 +99,8 @@ public class TaskToolbox this.emitter = emitter; this.segmentPusher = segmentPusher; this.dataSegmentKiller = dataSegmentKiller; + this.dataSegmentMover = dataSegmentMover; + this.dataSegmentArchiver = dataSegmentArchiver; this.segmentAnnouncer = segmentAnnouncer; this.newSegmentServerView = newSegmentServerView; this.queryRunnerFactoryConglomerate = queryRunnerFactoryConglomerate; @@ -119,6 +136,16 @@ public class TaskToolbox return dataSegmentKiller; } + public DataSegmentMover getDataSegmentMover() + { + return dataSegmentMover; + } + + public DataSegmentArchiver getDataSegmentArchiver() + { + return dataSegmentArchiver; + } + public DataSegmentAnnouncer getSegmentAnnouncer() { return segmentAnnouncer; @@ -149,7 +176,7 @@ public class TaskToolbox return objectMapper; } - public Map getSegments(List segments) + public Map fetchSegments(List segments) throws SegmentLoadingException { Map retVal = Maps.newLinkedHashMap(); @@ -160,6 +187,25 @@ public class TaskToolbox return retVal; } + public void pushSegments(Iterable segments) throws IOException { + // Request segment pushes for each set + final Multimap segmentMultimap = Multimaps.index( + segments, + new Function() + { + @Override + public Interval apply(DataSegment segment) + { + return segment.getInterval(); + } + } + ); + for (final Collection segmentCollection : segmentMultimap.asMap().values()) { + getTaskActionClient().submit(new SegmentInsertAction(ImmutableSet.copyOf(segmentCollection))); + } + + } + public File getTaskWorkDir() { return taskWorkDir; diff --git a/indexing-service/src/main/java/io/druid/indexing/common/TaskToolboxFactory.java b/indexing-service/src/main/java/io/druid/indexing/common/TaskToolboxFactory.java index ca00dccaf91..d655edc34f0 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/TaskToolboxFactory.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/TaskToolboxFactory.java @@ -24,13 +24,14 @@ import com.google.inject.Inject; import com.metamx.emitter.service.ServiceEmitter; import com.metamx.metrics.MonitorScheduler; import io.druid.client.ServerView; -import io.druid.guice.annotations.Json; import io.druid.guice.annotations.Processing; import io.druid.indexing.common.actions.TaskActionClientFactory; import io.druid.indexing.common.config.TaskConfig; import io.druid.indexing.common.task.Task; import io.druid.query.QueryRunnerFactoryConglomerate; +import io.druid.segment.loading.DataSegmentArchiver; import io.druid.segment.loading.DataSegmentKiller; +import io.druid.segment.loading.DataSegmentMover; import io.druid.segment.loading.DataSegmentPusher; import io.druid.server.coordination.DataSegmentAnnouncer; @@ -47,6 +48,8 @@ public class TaskToolboxFactory private final ServiceEmitter emitter; private final DataSegmentPusher segmentPusher; private final DataSegmentKiller dataSegmentKiller; + private final DataSegmentMover dataSegmentMover; + private final DataSegmentArchiver dataSegmentArchiver; private final DataSegmentAnnouncer segmentAnnouncer; private final ServerView newSegmentServerView; private final QueryRunnerFactoryConglomerate queryRunnerFactoryConglomerate; @@ -62,6 +65,8 @@ public class TaskToolboxFactory ServiceEmitter emitter, DataSegmentPusher segmentPusher, DataSegmentKiller dataSegmentKiller, + DataSegmentMover dataSegmentMover, + DataSegmentArchiver dataSegmentArchiver, DataSegmentAnnouncer segmentAnnouncer, ServerView newSegmentServerView, QueryRunnerFactoryConglomerate queryRunnerFactoryConglomerate, @@ -76,6 +81,8 @@ public class TaskToolboxFactory this.emitter = emitter; this.segmentPusher = segmentPusher; this.dataSegmentKiller = dataSegmentKiller; + this.dataSegmentMover = dataSegmentMover; + this.dataSegmentArchiver = dataSegmentArchiver; this.segmentAnnouncer = segmentAnnouncer; this.newSegmentServerView = newSegmentServerView; this.queryRunnerFactoryConglomerate = queryRunnerFactoryConglomerate; @@ -96,6 +103,8 @@ public class TaskToolboxFactory emitter, segmentPusher, dataSegmentKiller, + dataSegmentMover, + dataSegmentArchiver, segmentAnnouncer, newSegmentServerView, queryRunnerFactoryConglomerate, diff --git a/indexing-service/src/main/java/io/druid/indexing/common/actions/LocalTaskActionClient.java b/indexing-service/src/main/java/io/druid/indexing/common/actions/LocalTaskActionClient.java index 8bb23918b01..4dd445df80d 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/actions/LocalTaskActionClient.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/actions/LocalTaskActionClient.java @@ -19,6 +19,7 @@ package io.druid.indexing.common.actions; +import com.metamx.common.ISE; import com.metamx.emitter.EmittingLogger; import io.druid.indexing.common.task.Task; import io.druid.indexing.overlord.TaskStorage; @@ -45,21 +46,21 @@ public class LocalTaskActionClient implements TaskActionClient { log.info("Performing action for task[%s]: %s", task.getId(), taskAction); - final RetType ret = taskAction.perform(task, toolbox); - if (taskAction.isAudited()) { // Add audit log try { storage.addAuditLog(task, taskAction); } catch (Exception e) { + final String actionClass = taskAction.getClass().getName(); log.makeAlert(e, "Failed to record action in audit log") .addData("task", task.getId()) - .addData("actionClass", taskAction.getClass().getName()) + .addData("actionClass", actionClass) .emit(); + throw new ISE(e, "Failed to record action [%s] in audit log", actionClass); } } - return ret; + return taskAction.perform(task, toolbox); } } diff --git a/indexing-service/src/main/java/io/druid/indexing/common/actions/LockAcquireAction.java b/indexing-service/src/main/java/io/druid/indexing/common/actions/LockAcquireAction.java index 5730e3be082..5d600dcd369 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/actions/LockAcquireAction.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/actions/LockAcquireAction.java @@ -68,7 +68,7 @@ public class LockAcquireAction implements TaskAction @Override public boolean isAudited() { - return true; + return false; } @Override diff --git a/indexing-service/src/main/java/io/druid/indexing/common/actions/LockReleaseAction.java b/indexing-service/src/main/java/io/druid/indexing/common/actions/LockReleaseAction.java index 97397666d2b..6179c5ee658 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/actions/LockReleaseAction.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/actions/LockReleaseAction.java @@ -60,7 +60,7 @@ public class LockReleaseAction implements TaskAction @Override public boolean isAudited() { - return true; + return false; } @Override diff --git a/indexing-service/src/main/java/io/druid/indexing/common/actions/SpawnTasksAction.java b/indexing-service/src/main/java/io/druid/indexing/common/actions/LockTryAcquireAction.java similarity index 64% rename from indexing-service/src/main/java/io/druid/indexing/common/actions/SpawnTasksAction.java rename to indexing-service/src/main/java/io/druid/indexing/common/actions/LockTryAcquireAction.java index 85b1a53c275..699460af82f 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/actions/SpawnTasksAction.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/actions/LockTryAcquireAction.java @@ -23,56 +23,54 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.core.type.TypeReference; -import com.google.common.collect.ImmutableList; +import com.google.common.base.Optional; +import io.druid.indexing.common.TaskLock; import io.druid.indexing.common.task.Task; +import org.joda.time.Interval; -import java.util.List; - -public class SpawnTasksAction implements TaskAction +public class LockTryAcquireAction implements TaskAction> { @JsonIgnore - private final List newTasks; + private final Interval interval; @JsonCreator - public SpawnTasksAction( - @JsonProperty("newTasks") List newTasks + public LockTryAcquireAction( + @JsonProperty("interval") Interval interval ) { - this.newTasks = ImmutableList.copyOf(newTasks); + this.interval = interval; } @JsonProperty - public List getNewTasks() + public Interval getInterval() { - return newTasks; + return interval; } - public TypeReference getReturnTypeReference() + public TypeReference> getReturnTypeReference() { - return new TypeReference() {}; + return new TypeReference>() + { + }; } @Override - public Void perform(Task task, TaskActionToolbox toolbox) + public Optional perform(Task task, TaskActionToolbox toolbox) { - for(final Task newTask : newTasks) { - toolbox.getTaskQueue().add(newTask); - } - - return null; + return toolbox.getTaskLockbox().tryLock(task, interval); } @Override public boolean isAudited() { - return true; + return false; } @Override public String toString() { - return "SpawnTasksAction{" + - "newTasks=" + newTasks + + return "LockTryAcquireAction{" + + "interval=" + interval + '}'; } } diff --git a/indexing-service/src/main/java/io/druid/indexing/common/actions/SegmentInsertAction.java b/indexing-service/src/main/java/io/druid/indexing/common/actions/SegmentInsertAction.java index aaad73b8a9f..5280e394f6f 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/actions/SegmentInsertAction.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/actions/SegmentInsertAction.java @@ -24,7 +24,6 @@ import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.core.type.TypeReference; import com.google.common.collect.ImmutableSet; -import com.metamx.common.ISE; import com.metamx.emitter.service.ServiceMetricEvent; import io.druid.indexing.common.task.Task; import io.druid.timeline.DataSegment; @@ -80,9 +79,7 @@ public class SegmentInsertAction implements TaskAction> @Override public Set perform(Task task, TaskActionToolbox toolbox) throws IOException { - if(!toolbox.taskLockCoversSegments(task, segments, allowOlderVersions)) { - throw new ISE("Segments not covered by locks for task[%s]: %s", task.getId(), segments); - } + toolbox.verifyTaskLocksAndSinglePartitionSettitude(task, segments, true); final Set retVal = toolbox.getIndexerDBCoordinator().announceHistoricalSegments(segments); diff --git a/indexing-service/src/main/java/io/druid/indexing/common/actions/SegmentMetadataUpdateAction.java b/indexing-service/src/main/java/io/druid/indexing/common/actions/SegmentMetadataUpdateAction.java new file mode 100644 index 00000000000..4356c80dc59 --- /dev/null +++ b/indexing-service/src/main/java/io/druid/indexing/common/actions/SegmentMetadataUpdateAction.java @@ -0,0 +1,73 @@ +package io.druid.indexing.common.actions; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.core.type.TypeReference; +import com.google.common.collect.ImmutableSet; +import com.metamx.emitter.service.ServiceMetricEvent; +import io.druid.indexing.common.task.Task; +import io.druid.timeline.DataSegment; + +import java.io.IOException; +import java.util.Set; + +public class SegmentMetadataUpdateAction implements TaskAction +{ + @JsonIgnore + private final Set segments; + + @JsonCreator + public SegmentMetadataUpdateAction( + @JsonProperty("segments") Set segments + ) + { + this.segments = ImmutableSet.copyOf(segments); + } + + @JsonProperty + public Set getSegments() + { + return segments; + } + + public TypeReference getReturnTypeReference() + { + return new TypeReference() {}; + } + + @Override + public Void perform( + Task task, TaskActionToolbox toolbox + ) throws IOException + { + toolbox.verifyTaskLocksAndSinglePartitionSettitude(task, segments, true); + toolbox.getIndexerDBCoordinator().updateSegmentMetadata(segments); + + // Emit metrics + final ServiceMetricEvent.Builder metricBuilder = new ServiceMetricEvent.Builder() + .setUser2(task.getDataSource()) + .setUser4(task.getType()); + + for (DataSegment segment : segments) { + metricBuilder.setUser5(segment.getInterval().toString()); + toolbox.getEmitter().emit(metricBuilder.build("indexer/segmentMoved/bytes", segment.getSize())); + } + + return null; + } + + @Override + public boolean isAudited() + { + return true; + } + + @Override + public String toString() + { + return "SegmentMetadataUpdateAction{" + + "segments=" + segments + + '}'; + } +} diff --git a/indexing-service/src/main/java/io/druid/indexing/common/actions/SegmentNukeAction.java b/indexing-service/src/main/java/io/druid/indexing/common/actions/SegmentNukeAction.java index 6ac8dd1ccc4..54258df1c2d 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/actions/SegmentNukeAction.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/actions/SegmentNukeAction.java @@ -24,7 +24,6 @@ import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.core.type.TypeReference; import com.google.common.collect.ImmutableSet; -import com.metamx.common.ISE; import com.metamx.emitter.service.ServiceMetricEvent; import io.druid.indexing.common.task.Task; import io.druid.timeline.DataSegment; @@ -59,10 +58,7 @@ public class SegmentNukeAction implements TaskAction @Override public Void perform(Task task, TaskActionToolbox toolbox) throws IOException { - if(!toolbox.taskLockCoversSegments(task, segments, true)) { - throw new ISE("Segments not covered by locks for task: %s", task.getId()); - } - + toolbox.verifyTaskLocksAndSinglePartitionSettitude(task, segments, true); toolbox.getIndexerDBCoordinator().deleteSegments(segments); // Emit metrics diff --git a/indexing-service/src/main/java/io/druid/indexing/common/actions/TaskAction.java b/indexing-service/src/main/java/io/druid/indexing/common/actions/TaskAction.java index 37d4346247e..038d06be3c6 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/actions/TaskAction.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/actions/TaskAction.java @@ -29,13 +29,14 @@ import java.io.IOException; @JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type") @JsonSubTypes(value = { @JsonSubTypes.Type(name = "lockAcquire", value = LockAcquireAction.class), + @JsonSubTypes.Type(name = "lockTryAcquire", value = LockTryAcquireAction.class), @JsonSubTypes.Type(name = "lockList", value = LockListAction.class), @JsonSubTypes.Type(name = "lockRelease", value = LockReleaseAction.class), @JsonSubTypes.Type(name = "segmentInsertion", value = SegmentInsertAction.class), @JsonSubTypes.Type(name = "segmentListUsed", value = SegmentListUsedAction.class), @JsonSubTypes.Type(name = "segmentListUnused", value = SegmentListUnusedAction.class), @JsonSubTypes.Type(name = "segmentNuke", value = SegmentNukeAction.class), - @JsonSubTypes.Type(name = "spawnTasks", value = SpawnTasksAction.class) + @JsonSubTypes.Type(name = "segmentMetadataUpdate", value = SegmentMetadataUpdateAction.class) }) public interface TaskAction { diff --git a/indexing-service/src/main/java/io/druid/indexing/common/actions/TaskActionToolbox.java b/indexing-service/src/main/java/io/druid/indexing/common/actions/TaskActionToolbox.java index b7e78e0c2be..d9b0520f40b 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/actions/TaskActionToolbox.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/actions/TaskActionToolbox.java @@ -19,15 +19,16 @@ package io.druid.indexing.common.actions; +import com.google.api.client.repackaged.com.google.common.base.Preconditions; import com.google.common.base.Predicate; import com.google.common.collect.Iterables; import com.google.inject.Inject; +import com.metamx.common.ISE; import com.metamx.emitter.service.ServiceEmitter; import io.druid.indexing.common.TaskLock; import io.druid.indexing.common.task.Task; import io.druid.indexing.overlord.IndexerDBCoordinator; import io.druid.indexing.overlord.TaskLockbox; -import io.druid.indexing.overlord.TaskQueue; import io.druid.timeline.DataSegment; import java.util.List; @@ -35,30 +36,22 @@ import java.util.Set; public class TaskActionToolbox { - private final TaskQueue taskQueue; private final TaskLockbox taskLockbox; private final IndexerDBCoordinator indexerDBCoordinator; private final ServiceEmitter emitter; @Inject public TaskActionToolbox( - TaskQueue taskQueue, TaskLockbox taskLockbox, IndexerDBCoordinator indexerDBCoordinator, ServiceEmitter emitter ) { - this.taskQueue = taskQueue; this.taskLockbox = taskLockbox; this.indexerDBCoordinator = indexerDBCoordinator; this.emitter = emitter; } - public TaskQueue getTaskQueue() - { - return taskQueue; - } - public TaskLockbox getTaskLockbox() { return taskLockbox; @@ -74,6 +67,38 @@ public class TaskActionToolbox return emitter; } + public boolean segmentsAreFromSamePartitionSet( + final Set segments + ) + { + // Verify that these segments are all in the same partition set + + Preconditions.checkArgument(!segments.isEmpty(), "segments nonempty"); + final DataSegment firstSegment = segments.iterator().next(); + for (final DataSegment segment : segments) { + if (!segment.getDataSource().equals(firstSegment.getDataSource()) + || !segment.getInterval().equals(firstSegment.getInterval()) + || !segment.getVersion().equals(firstSegment.getVersion())) { + return false; + } + } + return true; + } + + public void verifyTaskLocksAndSinglePartitionSettitude( + final Task task, + final Set segments, + final boolean allowOlderVersions + ) + { + if (!taskLockCoversSegments(task, segments, allowOlderVersions)) { + throw new ISE("Segments not covered by locks for task: %s", task.getId()); + } + if (!segmentsAreFromSamePartitionSet(segments)) { + throw new ISE("Segments are not in the same partition set: %s", segments); + } + } + public boolean taskLockCoversSegments( final Task task, final Set segments, diff --git a/indexing-service/src/main/java/io/druid/indexing/common/config/IndexerZkConfig.java b/indexing-service/src/main/java/io/druid/indexing/common/config/FileTaskLogsConfig.java similarity index 75% rename from indexing-service/src/main/java/io/druid/indexing/common/config/IndexerZkConfig.java rename to indexing-service/src/main/java/io/druid/indexing/common/config/FileTaskLogsConfig.java index 67a750ab535..dfc7c9a9951 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/config/IndexerZkConfig.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/config/FileTaskLogsConfig.java @@ -19,15 +19,19 @@ package io.druid.indexing.common.config; -import io.druid.server.initialization.ZkPathsConfig; -import org.skife.config.Config; -import org.skife.config.Default; +import com.fasterxml.jackson.annotation.JsonProperty; -/** - */ -public abstract class IndexerZkConfig extends ZkPathsConfig +import javax.validation.constraints.NotNull; +import java.io.File; + +public class FileTaskLogsConfig { - @Config("druid.zk.maxNumBytes") - @Default("512000") - public abstract long getMaxNumBytes(); + @JsonProperty + @NotNull + private File directory = new File("log"); + + public File getDirectory() + { + return directory; + } } diff --git a/indexing-service/src/main/java/io/druid/indexing/common/config/TaskStorageConfig.java b/indexing-service/src/main/java/io/druid/indexing/common/config/TaskStorageConfig.java new file mode 100644 index 00000000000..db40c7cb069 --- /dev/null +++ b/indexing-service/src/main/java/io/druid/indexing/common/config/TaskStorageConfig.java @@ -0,0 +1,19 @@ +package io.druid.indexing.common.config; + +import com.fasterxml.jackson.annotation.JsonProperty; +import org.joda.time.Duration; +import org.joda.time.Period; + +import javax.validation.constraints.NotNull; + +public class TaskStorageConfig +{ + @JsonProperty + @NotNull + public Duration recentlyFinishedThreshold = new Period("PT24H").toStandardDuration(); + + public Duration getRecentlyFinishedThreshold() + { + return recentlyFinishedThreshold; + } +} diff --git a/indexing-service/src/main/java/io/druid/indexing/common/index/EventReceiverFirehoseFactory.java b/indexing-service/src/main/java/io/druid/indexing/common/index/EventReceiverFirehoseFactory.java index 0f3732ed386..b9e420f7020 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/index/EventReceiverFirehoseFactory.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/index/EventReceiverFirehoseFactory.java @@ -33,7 +33,6 @@ import io.druid.data.input.Firehose; import io.druid.data.input.FirehoseFactory; import io.druid.data.input.InputRow; import io.druid.data.input.impl.MapInputRowParser; -import io.druid.indexing.common.config.EventReceiverFirehoseFactoryConfig; import javax.ws.rs.POST; import javax.ws.rs.Path; @@ -63,31 +62,15 @@ public class EventReceiverFirehoseFactory implements FirehoseFactory private final MapInputRowParser parser; private final Optional chatHandlerProvider; - @Deprecated - private final EventReceiverFirehoseFactoryConfig config; - @JsonCreator public EventReceiverFirehoseFactory( @JsonProperty("serviceName") String serviceName, - @JsonProperty("firehoseId") String firehoseId, @JsonProperty("bufferSize") Integer bufferSize, @JsonProperty("parser") MapInputRowParser parser, - @JacksonInject ChatHandlerProvider chatHandlerProvider, - @JacksonInject EventReceiverFirehoseFactoryConfig config + @JacksonInject ChatHandlerProvider chatHandlerProvider ) { - // This code is here for backwards compatibility - if (serviceName == null) { - this.serviceName = String.format( - "%s:%s", - config.getFirehoseIdPrefix(), - Preconditions.checkNotNull(firehoseId, "firehoseId") - ); - } else { - this.serviceName = serviceName; - } - this.config = config; - + this.serviceName = Preconditions.checkNotNull(serviceName, "serviceName"); this.bufferSize = bufferSize == null || bufferSize <= 0 ? DEFAULT_BUFFER_SIZE : bufferSize; this.parser = Preconditions.checkNotNull(parser, "parser"); this.chatHandlerProvider = Optional.fromNullable(chatHandlerProvider); @@ -117,13 +100,6 @@ public class EventReceiverFirehoseFactory implements FirehoseFactory return serviceName; } - @Deprecated - @JsonProperty("firehoseId") - public String getFirehoseId() - { - return serviceName.replaceFirst(String.format("%s:", config.getFirehoseIdPrefix()), ""); - } - @JsonProperty public int getBufferSize() { diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/AbstractFixedIntervalTask.java b/indexing-service/src/main/java/io/druid/indexing/common/task/AbstractFixedIntervalTask.java new file mode 100644 index 00000000000..2d6687ed920 --- /dev/null +++ b/indexing-service/src/main/java/io/druid/indexing/common/task/AbstractFixedIntervalTask.java @@ -0,0 +1,77 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012, 2013 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package io.druid.indexing.common.task; + +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Preconditions; +import io.druid.indexing.common.actions.LockTryAcquireAction; +import io.druid.indexing.common.actions.TaskActionClient; +import org.joda.time.Interval; + +public abstract class AbstractFixedIntervalTask extends AbstractTask +{ + @JsonIgnore + private final Interval interval; + + protected AbstractFixedIntervalTask( + String id, + String dataSource, + Interval interval + ) + { + this(id, id, new TaskResource(id, 1), dataSource, interval); + } + + protected AbstractFixedIntervalTask( + String id, + String groupId, + String dataSource, + Interval interval + ) + { + this(id, groupId, new TaskResource(id, 1), dataSource, interval); + } + + protected AbstractFixedIntervalTask( + String id, + String groupId, + TaskResource taskResource, + String dataSource, + Interval interval + ) + { + super(id, groupId, taskResource, dataSource); + this.interval = Preconditions.checkNotNull(interval, "interval"); + Preconditions.checkArgument(interval.toDurationMillis() > 0, "interval empty"); + } + + @Override + public boolean isReady(TaskActionClient taskActionClient) throws Exception + { + return taskActionClient.submit(new LockTryAcquireAction(interval)).isPresent(); + } + + @JsonProperty + public Interval getInterval() + { + return interval; + } +} diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/AbstractTask.java b/indexing-service/src/main/java/io/druid/indexing/common/task/AbstractTask.java index 1944243e7fe..eaff1b9b46f 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/task/AbstractTask.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/task/AbstractTask.java @@ -23,21 +23,15 @@ import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Joiner; import com.google.common.base.Objects; -import com.google.common.base.Optional; import com.google.common.base.Preconditions; import io.druid.indexing.common.TaskLock; import io.druid.indexing.common.TaskStatus; import io.druid.indexing.common.TaskToolbox; -import io.druid.indexing.common.actions.LockAcquireAction; import io.druid.indexing.common.actions.LockListAction; -import io.druid.indexing.common.actions.SegmentListUsedAction; -import io.druid.indexing.common.actions.TaskActionClient; import io.druid.query.Query; import io.druid.query.QueryRunner; -import org.joda.time.Interval; import java.io.IOException; -import java.util.List; public abstract class AbstractTask implements Task { @@ -55,26 +49,22 @@ public abstract class AbstractTask implements Task @JsonIgnore private final String dataSource; - @JsonIgnore - private final Optional interval; - - protected AbstractTask(String id, String dataSource, Interval interval) + protected AbstractTask(String id, String dataSource) { - this(id, id, new TaskResource(id, 1), dataSource, interval); + this(id, id, new TaskResource(id, 1), dataSource); } - protected AbstractTask(String id, String groupId, String dataSource, Interval interval) + protected AbstractTask(String id, String groupId, String dataSource) { - this(id, groupId, new TaskResource(id, 1), dataSource, interval); + this(id, groupId, new TaskResource(id, 1), dataSource); } - protected AbstractTask(String id, String groupId, TaskResource taskResource, String dataSource, Interval interval) + protected AbstractTask(String id, String groupId, TaskResource taskResource, String dataSource) { this.id = Preconditions.checkNotNull(id, "id"); this.groupId = Preconditions.checkNotNull(groupId, "groupId"); this.taskResource = Preconditions.checkNotNull(taskResource, "resource"); this.dataSource = Preconditions.checkNotNull(dataSource, "dataSource"); - this.interval = Optional.fromNullable(interval); } @JsonProperty @@ -111,25 +101,12 @@ public abstract class AbstractTask implements Task return dataSource; } - @JsonProperty("interval") - @Override - public Optional getImplicitLockInterval() - { - return interval; - } - @Override public QueryRunner getQueryRunner(Query query) { return null; } - @Override - public TaskStatus preflight(TaskActionClient taskActionClient) throws Exception - { - return TaskStatus.running(id); - } - @Override public String toString() { @@ -137,7 +114,6 @@ public abstract class AbstractTask implements Task .add("id", id) .add("type", getType()) .add("dataSource", dataSource) - .add("interval", getImplicitLockInterval()) .toString(); } @@ -149,11 +125,6 @@ public abstract class AbstractTask implements Task return ID_JOINER.join(objects); } - public SegmentListUsedAction defaultListUsedAction() - { - return new SegmentListUsedAction(getDataSource(), getImplicitLockInterval().get()); - } - public TaskStatus success() { return TaskStatus.success(getId()); @@ -186,14 +157,6 @@ public abstract class AbstractTask implements Task protected Iterable getTaskLocks(TaskToolbox toolbox) throws IOException { - final List locks = toolbox.getTaskActionClient().submit(new LockListAction()); - - if (locks.isEmpty() && getImplicitLockInterval().isPresent()) { - // In the Peon's local mode, the implicit lock interval is not pre-acquired, so we need to try it here. - toolbox.getTaskActionClient().submit(new LockAcquireAction(getImplicitLockInterval().get())); - return toolbox.getTaskActionClient().submit(new LockListAction()); - } else { - return locks; - } + return toolbox.getTaskActionClient().submit(new LockListAction()); } } diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/ArchiveTask.java b/indexing-service/src/main/java/io/druid/indexing/common/task/ArchiveTask.java new file mode 100644 index 00000000000..c863742e0ab --- /dev/null +++ b/indexing-service/src/main/java/io/druid/indexing/common/task/ArchiveTask.java @@ -0,0 +1,110 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012, 2013 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package io.druid.indexing.common.task; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; +import com.metamx.common.ISE; +import com.metamx.common.logger.Logger; +import io.druid.indexing.common.TaskLock; +import io.druid.indexing.common.TaskStatus; +import io.druid.indexing.common.TaskToolbox; +import io.druid.indexing.common.actions.SegmentListUnusedAction; +import io.druid.indexing.common.actions.SegmentMetadataUpdateAction; +import io.druid.timeline.DataSegment; +import org.joda.time.Interval; + +import java.util.List; + +public class ArchiveTask extends AbstractFixedIntervalTask +{ + private static final Logger log = new Logger(ArchiveTask.class); + + public ArchiveTask( + @JsonProperty("id") String id, + @JsonProperty("dataSource") String dataSource, + @JsonProperty("interval") Interval interval + ) + { + super( + TaskUtils.makeId(id, "archive", dataSource, interval), + dataSource, + interval + ); + } + + @Override + public String getType() + { + return "archive"; + } + + @Override + public TaskStatus run(TaskToolbox toolbox) throws Exception + { + // Confirm we have a lock (will throw if there isn't exactly one element) + final TaskLock myLock = Iterables.getOnlyElement(getTaskLocks(toolbox)); + + if (!myLock.getDataSource().equals(getDataSource())) { + throw new ISE("WTF?! Lock dataSource[%s] != task dataSource[%s]", myLock.getDataSource(), getDataSource()); + } + + if (!myLock.getInterval().equals(getInterval())) { + throw new ISE("WTF?! Lock interval[%s] != task interval[%s]", myLock.getInterval(), getInterval()); + } + + // List unused segments + final List unusedSegments = toolbox + .getTaskActionClient() + .submit(new SegmentListUnusedAction(myLock.getDataSource(), myLock.getInterval())); + + // Verify none of these segments have versions > lock version + for (final DataSegment unusedSegment : unusedSegments) { + if (unusedSegment.getVersion().compareTo(myLock.getVersion()) > 0) { + throw new ISE( + "WTF?! Unused segment[%s] has version[%s] > task version[%s]", + unusedSegment.getIdentifier(), + unusedSegment.getVersion(), + myLock.getVersion() + ); + } + + log.info("OK to archive segment: %s", unusedSegment.getIdentifier()); + } + + List archivedSegments = Lists.newLinkedList(); + + // Move segments + for (DataSegment segment : unusedSegments) { + archivedSegments.add(toolbox.getDataSegmentArchiver().archive(segment)); + } + + // Update metadata for moved segments + toolbox.getTaskActionClient().submit( + new SegmentMetadataUpdateAction( + ImmutableSet.copyOf(archivedSegments) + ) + ); + + return TaskStatus.success(getId()); + } +} diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/DeleteTask.java b/indexing-service/src/main/java/io/druid/indexing/common/task/DeleteTask.java index 32d3e49e618..970818a6e9d 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/task/DeleteTask.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/task/DeleteTask.java @@ -22,6 +22,7 @@ package io.druid.indexing.common.task; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; @@ -30,7 +31,6 @@ import io.druid.granularity.QueryGranularity; import io.druid.indexing.common.TaskLock; import io.druid.indexing.common.TaskStatus; import io.druid.indexing.common.TaskToolbox; -import io.druid.indexing.common.actions.LockListAction; import io.druid.indexing.common.actions.SegmentInsertAction; import io.druid.query.aggregation.AggregatorFactory; import io.druid.segment.IndexMerger; @@ -44,7 +44,7 @@ import org.joda.time.Interval; import java.io.File; -public class DeleteTask extends AbstractTask +public class DeleteTask extends AbstractFixedIntervalTask { private static final Logger log = new Logger(DeleteTask.class); @@ -78,16 +78,15 @@ public class DeleteTask extends AbstractTask public TaskStatus run(TaskToolbox toolbox) throws Exception { // Strategy: Create an empty segment covering the interval to be deleted - final TaskLock myLock = Iterables.getOnlyElement(getTaskLocks(toolbox)); - final Interval interval = this.getImplicitLockInterval().get(); + final TaskLock myLock = Iterables.getOnlyElement(getTaskLocks(toolbox)); final IncrementalIndex empty = new IncrementalIndex(0, QueryGranularity.NONE, new AggregatorFactory[0]); - final IndexableAdapter emptyAdapter = new IncrementalIndexAdapter(interval, empty); + final IndexableAdapter emptyAdapter = new IncrementalIndexAdapter(getInterval(), empty); // Create DataSegment final DataSegment segment = DataSegment.builder() .dataSource(this.getDataSource()) - .interval(interval) + .interval(getInterval()) .version(myLock.getVersion()) .shardSpec(new NoneShardSpec()) .build(); @@ -105,7 +104,7 @@ public class DeleteTask extends AbstractTask segment.getVersion() ); - toolbox.getTaskActionClient().submit(new SegmentInsertAction(ImmutableSet.of(uploadedSegment))); + toolbox.pushSegments(ImmutableList.of(uploadedSegment)); return TaskStatus.success(getId()); } diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopIndexTask.java b/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopIndexTask.java index e687875433b..233714f5c71 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopIndexTask.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopIndexTask.java @@ -24,10 +24,14 @@ import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.core.type.TypeReference; import com.google.api.client.util.Lists; +import com.google.common.base.Function; import com.google.common.base.Joiner; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; +import com.google.common.collect.Maps; +import com.google.common.collect.Multimap; +import com.google.common.collect.Multimaps; import com.metamx.common.logger.Logger; import io.druid.common.utils.JodaUtils; import io.druid.indexer.HadoopDruidIndexerConfig; @@ -37,21 +41,27 @@ import io.druid.indexer.HadoopDruidIndexerSchema; import io.druid.indexing.common.TaskLock; import io.druid.indexing.common.TaskStatus; import io.druid.indexing.common.TaskToolbox; +import io.druid.indexing.common.actions.LockTryAcquireAction; import io.druid.indexing.common.actions.SegmentInsertAction; +import io.druid.indexing.common.actions.TaskActionClient; import io.druid.initialization.Initialization; import io.druid.server.initialization.ExtensionsConfig; import io.druid.timeline.DataSegment; import io.tesla.aether.internal.DefaultTeslaAether; import org.joda.time.DateTime; +import org.joda.time.Interval; +import javax.annotation.Nullable; import java.io.File; import java.lang.reflect.Method; import java.net.URL; import java.net.URLClassLoader; import java.util.Arrays; +import java.util.Collection; import java.util.List; +import java.util.Map; -public class HadoopIndexTask extends AbstractTask +public class HadoopIndexTask extends AbstractFixedIntervalTask { private static final Logger log = new Logger(HadoopIndexTask.class); private static String defaultHadoopCoordinates = "org.apache.hadoop:hadoop-core:1.0.3"; @@ -88,10 +98,14 @@ public class HadoopIndexTask extends AbstractTask super( id != null ? id : String.format("index_hadoop_%s_%s", schema.getDataSource(), new DateTime()), schema.getDataSource(), - JodaUtils.umbrellaInterval(JodaUtils.condenseIntervals(schema.getGranularitySpec().bucketIntervals())) + JodaUtils.umbrellaInterval( + JodaUtils.condenseIntervals( + schema.getGranularitySpec() + .bucketIntervals() + ) + ) ); - // Some HadoopDruidIndexerSchema stuff doesn't make sense in the context of the indexing service Preconditions.checkArgument(schema.getSegmentOutputPath() == null, "segmentOutputPath must be absent"); Preconditions.checkArgument(schema.getWorkingPath() == null, "workingPath must be absent"); @@ -107,7 +121,6 @@ public class HadoopIndexTask extends AbstractTask return "index_hadoop"; } - @JsonProperty("config") public HadoopDruidIndexerSchema getSchema() { @@ -174,14 +187,10 @@ public class HadoopIndexTask extends AbstractTask if (segments != null) { List publishedSegments = toolbox.getObjectMapper().readValue( - segments, new TypeReference>() - { - } + segments, + new TypeReference>() {} ); - // Request segment pushes - toolbox.getTaskActionClient().submit(new SegmentInsertAction(ImmutableSet.copyOf(publishedSegments))); - - // Done + toolbox.pushSegments(publishedSegments); return TaskStatus.success(getId()); } else { return TaskStatus.failure(getId()); diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/IndexDeterminePartitionsTask.java b/indexing-service/src/main/java/io/druid/indexing/common/task/IndexDeterminePartitionsTask.java deleted file mode 100644 index a89cd475ff0..00000000000 --- a/indexing-service/src/main/java/io/druid/indexing/common/task/IndexDeterminePartitionsTask.java +++ /dev/null @@ -1,299 +0,0 @@ -/* - * Druid - a distributed column store. - * Copyright (C) 2012, 2013 Metamarkets Group Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -package io.druid.indexing.common.task; - -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonIgnore; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.google.common.base.Function; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import com.google.common.collect.Ordering; -import com.google.common.collect.Sets; -import com.google.common.collect.TreeMultiset; -import com.google.common.primitives.Ints; -import com.metamx.common.logger.Logger; -import io.druid.data.input.Firehose; -import io.druid.data.input.FirehoseFactory; -import io.druid.data.input.InputRow; -import io.druid.indexing.common.TaskStatus; -import io.druid.indexing.common.TaskToolbox; -import io.druid.indexing.common.actions.SpawnTasksAction; -import io.druid.segment.realtime.Schema; -import io.druid.timeline.partition.NoneShardSpec; -import io.druid.timeline.partition.ShardSpec; -import io.druid.timeline.partition.SingleDimensionShardSpec; -import org.joda.time.DateTime; -import org.joda.time.Interval; - -import java.util.List; -import java.util.Map; -import java.util.Set; - -public class IndexDeterminePartitionsTask extends AbstractTask -{ - private static String makeTaskId(String groupId, DateTime start, DateTime end) - { - return String.format( - "%s_partitions_%s_%s", - groupId, - start, - end - ); - } - - @JsonIgnore - private final FirehoseFactory firehoseFactory; - - @JsonIgnore - private final Schema schema; - - @JsonIgnore - private final long targetPartitionSize; - - @JsonIgnore - private final int rowFlushBoundary; - - private static final Logger log = new Logger(IndexTask.class); - - @JsonCreator - public IndexDeterminePartitionsTask( - @JsonProperty("id") String id, - @JsonProperty("groupId") String groupId, - @JsonProperty("interval") Interval interval, - @JsonProperty("firehose") FirehoseFactory firehoseFactory, - @JsonProperty("schema") Schema schema, - @JsonProperty("targetPartitionSize") long targetPartitionSize, - @JsonProperty("rowFlushBoundary") int rowFlushBoundary - ) - { - super( - id != null ? id : makeTaskId(groupId, interval.getStart(), interval.getEnd()), - groupId, - schema.getDataSource(), - Preconditions.checkNotNull(interval, "interval") - ); - - this.firehoseFactory = firehoseFactory; - this.schema = schema; - this.targetPartitionSize = targetPartitionSize; - this.rowFlushBoundary = rowFlushBoundary; - } - - @Override - public String getType() - { - return "index_partitions"; - } - - @Override - public TaskStatus run(TaskToolbox toolbox) throws Exception - { - log.info("Running with targetPartitionSize[%d]", targetPartitionSize); - - // The implementation of this determine partitions stuff is less than optimal. Should be done better. - - // We know this exists - final Interval interval = getImplicitLockInterval().get(); - - // Blacklist dimensions that have multiple values per row - final Set unusableDimensions = Sets.newHashSet(); - - // Track values of all non-blacklisted dimensions - final Map> dimensionValueMultisets = Maps.newHashMap(); - - // Load data - final Firehose firehose = firehoseFactory.connect(); - - try { - while (firehose.hasMore()) { - - final InputRow inputRow = firehose.nextRow(); - - if (interval.contains(inputRow.getTimestampFromEpoch())) { - - // Extract dimensions from event - for (final String dim : inputRow.getDimensions()) { - final List dimValues = inputRow.getDimension(dim); - - if (!unusableDimensions.contains(dim)) { - - if (dimValues.size() == 1) { - - // Track this value - TreeMultiset dimensionValueMultiset = dimensionValueMultisets.get(dim); - - if (dimensionValueMultiset == null) { - dimensionValueMultiset = TreeMultiset.create(); - dimensionValueMultisets.put(dim, dimensionValueMultiset); - } - - dimensionValueMultiset.add(dimValues.get(0)); - - } else { - - // Only single-valued dimensions can be used for partitions - unusableDimensions.add(dim); - dimensionValueMultisets.remove(dim); - - } - - } - } - - } - - } - } - finally { - firehose.close(); - } - - // ShardSpecs for index generator tasks - final List shardSpecs = Lists.newArrayList(); - - // Select highest-cardinality dimension - Ordering>> byCardinalityOrdering = new Ordering>>() - { - @Override - public int compare( - Map.Entry> left, - Map.Entry> right - ) - { - return Ints.compare(left.getValue().elementSet().size(), right.getValue().elementSet().size()); - } - }; - - if (dimensionValueMultisets.isEmpty()) { - // No suitable partition dimension. We'll make one big segment and hope for the best. - log.info("No suitable partition dimension found"); - shardSpecs.add(new NoneShardSpec()); - } else { - // Find best partition dimension (heuristic: highest cardinality). - final Map.Entry> partitionEntry = - byCardinalityOrdering.max(dimensionValueMultisets.entrySet()); - - final String partitionDim = partitionEntry.getKey(); - final TreeMultiset partitionDimValues = partitionEntry.getValue(); - - log.info( - "Partitioning on dimension[%s] with cardinality[%d] over rows[%d]", - partitionDim, - partitionDimValues.elementSet().size(), - partitionDimValues.size() - ); - - // Iterate over unique partition dimension values in sorted order - String currentPartitionStart = null; - int currentPartitionSize = 0; - for (final String partitionDimValue : partitionDimValues.elementSet()) { - currentPartitionSize += partitionDimValues.count(partitionDimValue); - if (currentPartitionSize >= targetPartitionSize) { - final ShardSpec shardSpec = new SingleDimensionShardSpec( - partitionDim, - currentPartitionStart, - partitionDimValue, - shardSpecs.size() - ); - - log.info("Adding shard: %s", shardSpec); - shardSpecs.add(shardSpec); - - currentPartitionSize = partitionDimValues.count(partitionDimValue); - currentPartitionStart = partitionDimValue; - } - } - - if (currentPartitionSize > 0) { - // One last shard to go - final ShardSpec shardSpec; - - if (shardSpecs.isEmpty()) { - shardSpec = new NoneShardSpec(); - } else { - shardSpec = new SingleDimensionShardSpec( - partitionDim, - currentPartitionStart, - null, - shardSpecs.size() - ); - } - - log.info("Adding shard: %s", shardSpec); - shardSpecs.add(shardSpec); - } - } - - List nextTasks = Lists.transform( - shardSpecs, - new Function() - { - @Override - public Task apply(ShardSpec shardSpec) - { - return new IndexGeneratorTask( - null, - getGroupId(), - getImplicitLockInterval().get(), - firehoseFactory, - new Schema( - schema.getDataSource(), - schema.getSpatialDimensions(), - schema.getAggregators(), - schema.getIndexGranularity(), - shardSpec - ), - rowFlushBoundary - ); - } - } - ); - - toolbox.getTaskActionClient().submit(new SpawnTasksAction(nextTasks)); - - return TaskStatus.success(getId()); - } - - @JsonProperty - public FirehoseFactory getFirehoseFactory() - { - return firehoseFactory; - } - - @JsonProperty - public Schema getSchema() - { - return schema; - } - - @JsonProperty - public long getTargetPartitionSize() - { - return targetPartitionSize; - } - - @JsonProperty - public int getRowFlushBoundary() - { - return rowFlushBoundary; - } -} diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/IndexGeneratorTask.java b/indexing-service/src/main/java/io/druid/indexing/common/task/IndexGeneratorTask.java deleted file mode 100644 index 84f6211ad52..00000000000 --- a/indexing-service/src/main/java/io/druid/indexing/common/task/IndexGeneratorTask.java +++ /dev/null @@ -1,240 +0,0 @@ -/* - * Druid - a distributed column store. - * Copyright (C) 2012, 2013 Metamarkets Group Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -package io.druid.indexing.common.task; - -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonIgnore; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableSet; -import com.google.common.collect.Iterables; -import com.metamx.common.logger.Logger; -import io.druid.data.input.Firehose; -import io.druid.data.input.FirehoseFactory; -import io.druid.data.input.InputRow; -import io.druid.indexing.common.TaskLock; -import io.druid.indexing.common.TaskStatus; -import io.druid.indexing.common.TaskToolbox; -import io.druid.indexing.common.actions.SegmentInsertAction; -import io.druid.indexing.common.index.YeOldePlumberSchool; -import io.druid.segment.loading.DataSegmentPusher; -import io.druid.segment.realtime.FireDepartmentMetrics; -import io.druid.segment.realtime.Schema; -import io.druid.segment.realtime.plumber.Plumber; -import io.druid.segment.realtime.plumber.Sink; -import io.druid.timeline.DataSegment; -import org.joda.time.DateTime; -import org.joda.time.Interval; - -import java.io.File; -import java.io.IOException; -import java.util.List; -import java.util.concurrent.CopyOnWriteArrayList; - -public class IndexGeneratorTask extends AbstractTask -{ - @JsonIgnore - private final FirehoseFactory firehoseFactory; - - @JsonIgnore - private final Schema schema; - - @JsonIgnore - private final int rowFlushBoundary; - - private static final Logger log = new Logger(IndexTask.class); - - @JsonCreator - public IndexGeneratorTask( - @JsonProperty("id") String id, - @JsonProperty("groupId") String groupId, - @JsonProperty("interval") Interval interval, - @JsonProperty("firehose") FirehoseFactory firehoseFactory, - @JsonProperty("schema") Schema schema, - @JsonProperty("rowFlushBoundary") int rowFlushBoundary - ) - { - super( - id != null - ? id - : String.format( - "%s_generator_%s_%s_%s", - groupId, - interval.getStart(), - interval.getEnd(), - schema.getShardSpec().getPartitionNum() - ), - groupId, - schema.getDataSource(), - Preconditions.checkNotNull(interval, "interval") - ); - - this.firehoseFactory = firehoseFactory; - this.schema = schema; - this.rowFlushBoundary = rowFlushBoundary; - } - - @Override - public String getType() - { - return "index_generator"; - } - - @Override - public TaskStatus run(final TaskToolbox toolbox) throws Exception - { - // We should have a lock from before we started running - final TaskLock myLock = Iterables.getOnlyElement(getTaskLocks(toolbox)); - - // We know this exists - final Interval interval = getImplicitLockInterval().get(); - - // Set up temporary directory for indexing - final File tmpDir = new File( - toolbox.getTaskWorkDir(), - String.format( - "%s_%s_%s_%s_%s", - this.getDataSource(), - interval.getStart(), - interval.getEnd(), - myLock.getVersion(), - schema.getShardSpec().getPartitionNum() - ) - ); - - // We need to track published segments. - final List pushedSegments = new CopyOnWriteArrayList(); - final DataSegmentPusher wrappedDataSegmentPusher = new DataSegmentPusher() - { - @Override - public String getPathForHadoop(String dataSource) - { - return toolbox.getSegmentPusher().getPathForHadoop(dataSource); - } - - @Override - public DataSegment push(File file, DataSegment segment) throws IOException - { - final DataSegment pushedSegment = toolbox.getSegmentPusher().push(file, segment); - pushedSegments.add(pushedSegment); - return pushedSegment; - } - }; - - // Create firehose + plumber - final FireDepartmentMetrics metrics = new FireDepartmentMetrics(); - final Firehose firehose = firehoseFactory.connect(); - final Plumber plumber = new YeOldePlumberSchool( - interval, - myLock.getVersion(), - wrappedDataSegmentPusher, - tmpDir - ).findPlumber(schema, metrics); - - // rowFlushBoundary for this job - final int myRowFlushBoundary = this.rowFlushBoundary > 0 - ? rowFlushBoundary - : toolbox.getConfig().getDefaultRowFlushBoundary(); - - try { - while (firehose.hasMore()) { - final InputRow inputRow = firehose.nextRow(); - - if (shouldIndex(inputRow)) { - final Sink sink = plumber.getSink(inputRow.getTimestampFromEpoch()); - if (sink == null) { - throw new NullPointerException( - String.format( - "Was expecting non-null sink for timestamp[%s]", - new DateTime(inputRow.getTimestampFromEpoch()) - ) - ); - } - - int numRows = sink.add(inputRow); - metrics.incrementProcessed(); - - if (numRows >= myRowFlushBoundary) { - plumber.persist(firehose.commit()); - } - } else { - metrics.incrementThrownAway(); - } - } - } - finally { - firehose.close(); - } - - plumber.persist(firehose.commit()); - plumber.finishJob(); - - // Output metrics - log.info( - "Task[%s] took in %,d rows (%,d processed, %,d unparseable, %,d thrown away) and output %,d rows", - getId(), - metrics.processed() + metrics.unparseable() + metrics.thrownAway(), - metrics.processed(), - metrics.unparseable(), - metrics.thrownAway(), - metrics.rowOutput() - ); - - // Request segment pushes - toolbox.getTaskActionClient().submit(new SegmentInsertAction(ImmutableSet.copyOf(pushedSegments))); - - // Done - return TaskStatus.success(getId()); - } - - /** - * Should we index this inputRow? Decision is based on our interval and shardSpec. - * - * @param inputRow the row to check - * - * @return true or false - */ - private boolean shouldIndex(InputRow inputRow) - { - if (getImplicitLockInterval().get().contains(inputRow.getTimestampFromEpoch())) { - return schema.getShardSpec().isInChunk(inputRow); - } else { - return false; - } - } - - @JsonProperty("firehose") - public FirehoseFactory getFirehoseFactory() - { - return firehoseFactory; - } - - @JsonProperty - public Schema getSchema() - { - return schema; - } - - @JsonProperty - public int getRowFlushBoundary() - { - return rowFlushBoundary; - } -} diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/IndexTask.java b/indexing-service/src/main/java/io/druid/indexing/common/task/IndexTask.java index 6e7d9f61f17..25603c981cd 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/task/IndexTask.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/task/IndexTask.java @@ -22,26 +22,48 @@ package io.druid.indexing.common.task; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.api.client.util.Sets; import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Iterables; import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.common.collect.Ordering; +import com.google.common.collect.TreeMultiset; +import com.google.common.primitives.Ints; import com.metamx.common.logger.Logger; +import io.druid.data.input.Firehose; import io.druid.data.input.FirehoseFactory; +import io.druid.data.input.InputRow; import io.druid.data.input.impl.SpatialDimensionSchema; import io.druid.granularity.QueryGranularity; import io.druid.indexer.granularity.GranularitySpec; +import io.druid.indexing.common.TaskLock; import io.druid.indexing.common.TaskStatus; import io.druid.indexing.common.TaskToolbox; -import io.druid.indexing.common.actions.SpawnTasksAction; -import io.druid.indexing.common.actions.TaskActionClient; +import io.druid.indexing.common.actions.SegmentInsertAction; +import io.druid.indexing.common.index.YeOldePlumberSchool; import io.druid.query.aggregation.AggregatorFactory; +import io.druid.segment.loading.DataSegmentPusher; +import io.druid.segment.realtime.FireDepartmentMetrics; import io.druid.segment.realtime.Schema; +import io.druid.segment.realtime.plumber.Plumber; +import io.druid.segment.realtime.plumber.Sink; +import io.druid.timeline.DataSegment; import io.druid.timeline.partition.NoneShardSpec; +import io.druid.timeline.partition.ShardSpec; +import io.druid.timeline.partition.SingleDimensionShardSpec; import org.joda.time.DateTime; import org.joda.time.Interval; +import java.io.File; +import java.io.IOException; import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.CopyOnWriteArrayList; -public class IndexTask extends AbstractTask +public class IndexTask extends AbstractFixedIntervalTask { private static final Logger log = new Logger(IndexTask.class); @@ -58,7 +80,7 @@ public class IndexTask extends AbstractTask private final QueryGranularity indexGranularity; @JsonIgnore - private final long targetPartitionSize; + private final int targetPartitionSize; @JsonIgnore private final FirehoseFactory firehoseFactory; @@ -74,7 +96,7 @@ public class IndexTask extends AbstractTask @JsonProperty("spatialDimensions") List spatialDimensions, @JsonProperty("aggregators") AggregatorFactory[] aggregators, @JsonProperty("indexGranularity") QueryGranularity indexGranularity, - @JsonProperty("targetPartitionSize") long targetPartitionSize, + @JsonProperty("targetPartitionSize") int targetPartitionSize, @JsonProperty("firehose") FirehoseFactory firehoseFactory, @JsonProperty("rowFlushBoundary") int rowFlushBoundary ) @@ -96,58 +118,10 @@ public class IndexTask extends AbstractTask this.aggregators = aggregators; this.indexGranularity = (indexGranularity == null) ? QueryGranularity.NONE : indexGranularity; this.targetPartitionSize = targetPartitionSize; - this.firehoseFactory = firehoseFactory; + this.firehoseFactory = Preconditions.checkNotNull(firehoseFactory, "firehoseFactory"); this.rowFlushBoundary = rowFlushBoundary; } - public List toSubtasks() - { - final List retVal = Lists.newArrayList(); - - for (final Interval interval : granularitySpec.bucketIntervals()) { - if (targetPartitionSize > 0) { - // Need to do one pass over the data before indexing in order to determine good partitions - retVal.add( - new IndexDeterminePartitionsTask( - null, - getGroupId(), - interval, - firehoseFactory, - new Schema( - getDataSource(), - spatialDimensions, - aggregators, - indexGranularity, - new NoneShardSpec() - ), - targetPartitionSize, - rowFlushBoundary - ) - ); - } else { - // Jump straight into indexing - retVal.add( - new IndexGeneratorTask( - null, - getGroupId(), - interval, - firehoseFactory, - new Schema( - getDataSource(), - spatialDimensions, - aggregators, - indexGranularity, - new NoneShardSpec() - ), - rowFlushBoundary - ) - ); - } - } - - return retVal; - } - @Override public String getType() { @@ -155,16 +129,278 @@ public class IndexTask extends AbstractTask } @Override - public TaskStatus preflight(TaskActionClient taskActionClient) throws Exception + public TaskStatus run(TaskToolbox toolbox) throws Exception { - taskActionClient.submit(new SpawnTasksAction(toSubtasks())); + final TaskLock myLock = Iterables.getOnlyElement(getTaskLocks(toolbox)); + final Set segments = Sets.newHashSet(); + for (final Interval bucket : granularitySpec.bucketIntervals()) { + final List shardSpecs; + if (targetPartitionSize > 0) { + shardSpecs = determinePartitions(bucket, targetPartitionSize); + } else { + shardSpecs = ImmutableList.of(new NoneShardSpec()); + } + for (final ShardSpec shardSpec : shardSpecs) { + final DataSegment segment = generateSegment( + toolbox, + new Schema( + getDataSource(), + spatialDimensions, + aggregators, + indexGranularity, + shardSpec + ), + bucket, + myLock.getVersion() + ); + segments.add(segment); + } + } + toolbox.pushSegments(segments); return TaskStatus.success(getId()); } - @Override - public TaskStatus run(TaskToolbox toolbox) throws Exception + private List determinePartitions( + final Interval interval, + final int targetPartitionSize + ) throws IOException { - throw new IllegalStateException("IndexTasks should not be run!"); + log.info("Determining partitions for interval[%s] with targetPartitionSize[%d]", interval, targetPartitionSize); + + // The implementation of this determine partitions stuff is less than optimal. Should be done better. + + // Blacklist dimensions that have multiple values per row + final Set unusableDimensions = com.google.common.collect.Sets.newHashSet(); + // Track values of all non-blacklisted dimensions + final Map> dimensionValueMultisets = Maps.newHashMap(); + + // Load data + try (Firehose firehose = firehoseFactory.connect()) { + while (firehose.hasMore()) { + final InputRow inputRow = firehose.nextRow(); + if (interval.contains(inputRow.getTimestampFromEpoch())) { + // Extract dimensions from event + for (final String dim : inputRow.getDimensions()) { + final List dimValues = inputRow.getDimension(dim); + if (!unusableDimensions.contains(dim)) { + if (dimValues.size() == 1) { + // Track this value + TreeMultiset dimensionValueMultiset = dimensionValueMultisets.get(dim); + if (dimensionValueMultiset == null) { + dimensionValueMultiset = TreeMultiset.create(); + dimensionValueMultisets.put(dim, dimensionValueMultiset); + } + dimensionValueMultiset.add(dimValues.get(0)); + } else { + // Only single-valued dimensions can be used for partitions + unusableDimensions.add(dim); + dimensionValueMultisets.remove(dim); + } + } + } + } + } + } + + // ShardSpecs we will return + final List shardSpecs = Lists.newArrayList(); + + // Select highest-cardinality dimension + Ordering>> byCardinalityOrdering = new Ordering>>() + { + @Override + public int compare( + Map.Entry> left, + Map.Entry> right + ) + { + return Ints.compare(left.getValue().elementSet().size(), right.getValue().elementSet().size()); + } + }; + + if (dimensionValueMultisets.isEmpty()) { + // No suitable partition dimension. We'll make one big segment and hope for the best. + log.info("No suitable partition dimension found"); + shardSpecs.add(new NoneShardSpec()); + } else { + // Find best partition dimension (heuristic: highest cardinality). + final Map.Entry> partitionEntry = + byCardinalityOrdering.max(dimensionValueMultisets.entrySet()); + + final String partitionDim = partitionEntry.getKey(); + final TreeMultiset partitionDimValues = partitionEntry.getValue(); + + log.info( + "Partitioning on dimension[%s] with cardinality[%d] over rows[%d]", + partitionDim, + partitionDimValues.elementSet().size(), + partitionDimValues.size() + ); + + // Iterate over unique partition dimension values in sorted order + String currentPartitionStart = null; + int currentPartitionSize = 0; + for (final String partitionDimValue : partitionDimValues.elementSet()) { + currentPartitionSize += partitionDimValues.count(partitionDimValue); + if (currentPartitionSize >= targetPartitionSize) { + final ShardSpec shardSpec = new SingleDimensionShardSpec( + partitionDim, + currentPartitionStart, + partitionDimValue, + shardSpecs.size() + ); + + log.info("Adding shard: %s", shardSpec); + shardSpecs.add(shardSpec); + + currentPartitionSize = partitionDimValues.count(partitionDimValue); + currentPartitionStart = partitionDimValue; + } + } + + if (currentPartitionSize > 0) { + // One last shard to go + final ShardSpec shardSpec; + + if (shardSpecs.isEmpty()) { + shardSpec = new NoneShardSpec(); + } else { + shardSpec = new SingleDimensionShardSpec( + partitionDim, + currentPartitionStart, + null, + shardSpecs.size() + ); + } + + log.info("Adding shard: %s", shardSpec); + shardSpecs.add(shardSpec); + } + } + + return shardSpecs; + } + + private DataSegment generateSegment( + final TaskToolbox toolbox, + final Schema schema, + final Interval interval, + final String version + ) throws IOException + { + // Set up temporary directory. + final File tmpDir = new File( + toolbox.getTaskWorkDir(), + String.format( + "%s_%s_%s_%s_%s", + this.getDataSource(), + interval.getStart(), + interval.getEnd(), + version, + schema.getShardSpec().getPartitionNum() + ) + ); + + // We need to track published segments. + final List pushedSegments = new CopyOnWriteArrayList(); + final DataSegmentPusher wrappedDataSegmentPusher = new DataSegmentPusher() + { + @Override + public String getPathForHadoop(String dataSource) + { + return toolbox.getSegmentPusher().getPathForHadoop(dataSource); + } + + @Override + public DataSegment push(File file, DataSegment segment) throws IOException + { + final DataSegment pushedSegment = toolbox.getSegmentPusher().push(file, segment); + pushedSegments.add(pushedSegment); + return pushedSegment; + } + }; + + // Create firehose + plumber + final FireDepartmentMetrics metrics = new FireDepartmentMetrics(); + final Firehose firehose = firehoseFactory.connect(); + final Plumber plumber = new YeOldePlumberSchool( + interval, + version, + wrappedDataSegmentPusher, + tmpDir + ).findPlumber(schema, metrics); + + // rowFlushBoundary for this job + final int myRowFlushBoundary = this.rowFlushBoundary > 0 + ? rowFlushBoundary + : toolbox.getConfig().getDefaultRowFlushBoundary(); + + try { + plumber.startJob(); + + while (firehose.hasMore()) { + final InputRow inputRow = firehose.nextRow(); + + if (shouldIndex(schema, interval, inputRow)) { + final Sink sink = plumber.getSink(inputRow.getTimestampFromEpoch()); + if (sink == null) { + throw new NullPointerException( + String.format( + "Was expecting non-null sink for timestamp[%s]", + new DateTime(inputRow.getTimestampFromEpoch()) + ) + ); + } + + int numRows = sink.add(inputRow); + metrics.incrementProcessed(); + + if (numRows >= myRowFlushBoundary) { + plumber.persist(firehose.commit()); + } + } else { + metrics.incrementThrownAway(); + } + } + } + finally { + firehose.close(); + } + + plumber.persist(firehose.commit()); + + try { + plumber.finishJob(); + } + finally { + log.info( + "Task[%s] interval[%s] partition[%d] took in %,d rows (%,d processed, %,d unparseable, %,d thrown away)" + + " and output %,d rows", + getId(), + interval, + schema.getShardSpec().getPartitionNum(), + metrics.processed() + metrics.unparseable() + metrics.thrownAway(), + metrics.processed(), + metrics.unparseable(), + metrics.thrownAway(), + metrics.rowOutput() + ); + } + + // We expect a single segment to have been created. + return Iterables.getOnlyElement(pushedSegments); + } + + /** + * Should we index this inputRow? Decision is based on our interval and shardSpec. + * + * @param inputRow the row to check + * + * @return true or false + */ + private boolean shouldIndex(final Schema schema, final Interval interval, final InputRow inputRow) + { + return interval.contains(inputRow.getTimestampFromEpoch()) && schema.getShardSpec().isInChunk(inputRow); } @JsonProperty @@ -191,7 +427,7 @@ public class IndexTask extends AbstractTask return targetPartitionSize; } - @JsonProperty + @JsonProperty("firehose") public FirehoseFactory getFirehoseFactory() { return firehoseFactory; @@ -202,4 +438,10 @@ public class IndexTask extends AbstractTask { return rowFlushBoundary; } + + @JsonProperty + public List getSpatialDimensions() + { + return spatialDimensions; + } } diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/KillTask.java b/indexing-service/src/main/java/io/druid/indexing/common/task/KillTask.java index 8f4068a5e46..b4858342981 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/task/KillTask.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/task/KillTask.java @@ -28,7 +28,6 @@ import com.metamx.common.logger.Logger; import io.druid.indexing.common.TaskLock; import io.druid.indexing.common.TaskStatus; import io.druid.indexing.common.TaskToolbox; -import io.druid.indexing.common.actions.LockListAction; import io.druid.indexing.common.actions.SegmentListUnusedAction; import io.druid.indexing.common.actions.SegmentNukeAction; import io.druid.timeline.DataSegment; @@ -38,7 +37,7 @@ import java.util.List; /** */ -public class KillTask extends AbstractTask +public class KillTask extends AbstractFixedIntervalTask { private static final Logger log = new Logger(KillTask.class); @@ -68,12 +67,12 @@ public class KillTask extends AbstractTask // Confirm we have a lock (will throw if there isn't exactly one element) final TaskLock myLock = Iterables.getOnlyElement(getTaskLocks(toolbox)); - if(!myLock.getDataSource().equals(getDataSource())) { + if (!myLock.getDataSource().equals(getDataSource())) { throw new ISE("WTF?! Lock dataSource[%s] != task dataSource[%s]", myLock.getDataSource(), getDataSource()); } - if(!myLock.getInterval().equals(getImplicitLockInterval().get())) { - throw new ISE("WTF?! Lock interval[%s] != task interval[%s]", myLock.getInterval(), getImplicitLockInterval().get()); + if (!myLock.getInterval().equals(getInterval())) { + throw new ISE("WTF?! Lock interval[%s] != task interval[%s]", myLock.getInterval(), getInterval()); } // List unused segments @@ -82,8 +81,8 @@ public class KillTask extends AbstractTask .submit(new SegmentListUnusedAction(myLock.getDataSource(), myLock.getInterval())); // Verify none of these segments have versions > lock version - for(final DataSegment unusedSegment : unusedSegments) { - if(unusedSegment.getVersion().compareTo(myLock.getVersion()) > 0) { + for (final DataSegment unusedSegment : unusedSegments) { + if (unusedSegment.getVersion().compareTo(myLock.getVersion()) > 0) { throw new ISE( "WTF?! Unused segment[%s] has version[%s] > task version[%s]", unusedSegment.getIdentifier(), @@ -98,11 +97,9 @@ public class KillTask extends AbstractTask // Kill segments for (DataSegment segment : unusedSegments) { toolbox.getDataSegmentKiller().kill(segment); + toolbox.getTaskActionClient().submit(new SegmentNukeAction(ImmutableSet.of(segment))); } - // Remove metadata for these segments - toolbox.getTaskActionClient().submit(new SegmentNukeAction(ImmutableSet.copyOf(unusedSegments))); - return TaskStatus.success(getId()); } } diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/MergeTaskBase.java b/indexing-service/src/main/java/io/druid/indexing/common/task/MergeTaskBase.java index 750509f9cec..40b07f72d71 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/task/MergeTaskBase.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/task/MergeTaskBase.java @@ -27,7 +27,7 @@ import com.google.common.base.Joiner; import com.google.common.base.Objects; import com.google.common.base.Preconditions; import com.google.common.base.Predicate; -import com.google.common.base.Throwables; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; @@ -41,9 +41,8 @@ import com.metamx.emitter.service.ServiceMetricEvent; import io.druid.indexing.common.TaskLock; import io.druid.indexing.common.TaskStatus; import io.druid.indexing.common.TaskToolbox; -import io.druid.indexing.common.actions.LockAcquireAction; -import io.druid.indexing.common.actions.LockListAction; import io.druid.indexing.common.actions.SegmentInsertAction; +import io.druid.indexing.common.actions.SegmentListUsedAction; import io.druid.indexing.common.actions.TaskActionClient; import io.druid.segment.IndexIO; import io.druid.timeline.DataSegment; @@ -53,14 +52,13 @@ import org.joda.time.Interval; import javax.annotation.Nullable; import java.io.File; -import java.io.IOException; import java.util.List; import java.util.Map; import java.util.Set; /** */ -public abstract class MergeTaskBase extends AbstractTask +public abstract class MergeTaskBase extends AbstractFixedIntervalTask { @JsonIgnore private final List segments; @@ -145,7 +143,7 @@ public abstract class MergeTaskBase extends AbstractTask ); // download segments to merge - final Map gettedSegments = toolbox.getSegments(segments); + final Map gettedSegments = toolbox.fetchSegments(segments); // merge files together final File fileToUpload = merge(gettedSegments, new File(taskDir, "merged")); @@ -168,7 +166,7 @@ public abstract class MergeTaskBase extends AbstractTask emitter.emit(builder.build("merger/uploadTime", System.currentTimeMillis() - uploadStart)); emitter.emit(builder.build("merger/mergeSize", uploadedSegment.getSize())); - toolbox.getTaskActionClient().submit(new SegmentInsertAction(ImmutableSet.of(uploadedSegment))); + toolbox.pushSegments(ImmutableList.of(uploadedSegment)); return TaskStatus.success(getId()); } @@ -186,9 +184,12 @@ public abstract class MergeTaskBase extends AbstractTask * we are operating on every segment that overlaps the chosen interval. */ @Override - public TaskStatus preflight(TaskActionClient taskActionClient) + public boolean isReady(TaskActionClient taskActionClient) throws Exception { - try { + // Try to acquire lock + if (!super.isReady(taskActionClient)) { + return false; + } else { final Function toIdentifier = new Function() { @Override @@ -199,7 +200,10 @@ public abstract class MergeTaskBase extends AbstractTask }; final Set current = ImmutableSet.copyOf( - Iterables.transform(taskActionClient.submit(defaultListUsedAction()), toIdentifier) + Iterables.transform( + taskActionClient.submit(new SegmentListUsedAction(getDataSource(), getInterval())), + toIdentifier + ) ); final Set requested = ImmutableSet.copyOf(Iterables.transform(segments, toIdentifier)); @@ -219,10 +223,7 @@ public abstract class MergeTaskBase extends AbstractTask ); } - return TaskStatus.running(getId()); - } - catch (IOException e) { - throw Throwables.propagate(e); + return true; } } @@ -241,7 +242,7 @@ public abstract class MergeTaskBase extends AbstractTask return Objects.toStringHelper(this) .add("id", getId()) .add("dataSource", getDataSource()) - .add("interval", getImplicitLockInterval()) + .add("interval", getInterval()) .add("segments", segments) .toString(); } diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/MoveTask.java b/indexing-service/src/main/java/io/druid/indexing/common/task/MoveTask.java new file mode 100644 index 00000000000..da82ffa6608 --- /dev/null +++ b/indexing-service/src/main/java/io/druid/indexing/common/task/MoveTask.java @@ -0,0 +1,115 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012, 2013 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package io.druid.indexing.common.task; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; +import com.metamx.common.ISE; +import com.metamx.common.logger.Logger; +import io.druid.indexing.common.TaskLock; +import io.druid.indexing.common.TaskStatus; +import io.druid.indexing.common.TaskToolbox; +import io.druid.indexing.common.actions.SegmentListUnusedAction; +import io.druid.indexing.common.actions.SegmentMetadataUpdateAction; +import io.druid.timeline.DataSegment; +import org.joda.time.Interval; + +import java.util.List; +import java.util.Map; + +public class MoveTask extends AbstractFixedIntervalTask +{ + private static final Logger log = new Logger(MoveTask.class); + + private final Map targetLoadSpec; + + @JsonCreator + public MoveTask( + @JsonProperty("id") String id, + @JsonProperty("dataSource") String dataSource, + @JsonProperty("interval") Interval interval, + @JsonProperty("target") Map targetLoadSpec + ) + { + super( + TaskUtils.makeId(id, "move", dataSource, interval), + dataSource, + interval + ); + this.targetLoadSpec = targetLoadSpec; + } + + @Override + public String getType() + { + return "move"; + } + + @Override + public TaskStatus run(TaskToolbox toolbox) throws Exception + { + // Confirm we have a lock (will throw if there isn't exactly one element) + final TaskLock myLock = Iterables.getOnlyElement(getTaskLocks(toolbox)); + + if(!myLock.getDataSource().equals(getDataSource())) { + throw new ISE("WTF?! Lock dataSource[%s] != task dataSource[%s]", myLock.getDataSource(), getDataSource()); + } + + if(!myLock.getInterval().equals(getInterval())) { + throw new ISE("WTF?! Lock interval[%s] != task interval[%s]", myLock.getInterval(), getInterval()); + } + + // List unused segments + final List unusedSegments = toolbox + .getTaskActionClient() + .submit(new SegmentListUnusedAction(myLock.getDataSource(), myLock.getInterval())); + + // Verify none of these segments have versions > lock version + for(final DataSegment unusedSegment : unusedSegments) { + if(unusedSegment.getVersion().compareTo(myLock.getVersion()) > 0) { + throw new ISE( + "WTF?! Unused segment[%s] has version[%s] > task version[%s]", + unusedSegment.getIdentifier(), + unusedSegment.getVersion(), + myLock.getVersion() + ); + } + + log.info("OK to move segment: %s", unusedSegment.getIdentifier()); + } + + // Move segments + for (DataSegment segment : unusedSegments) { + final DataSegment movedSegment = toolbox.getDataSegmentMover().move(segment, targetLoadSpec); + toolbox.getTaskActionClient().submit(new SegmentMetadataUpdateAction(ImmutableSet.of(movedSegment))); + } + + return TaskStatus.success(getId()); + } + + @JsonProperty + public Map getTargetLoadSpec() + { + return targetLoadSpec; + } +} diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/NoopTask.java b/indexing-service/src/main/java/io/druid/indexing/common/task/NoopTask.java index c6291fdb4c9..b4de3512fbe 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/task/NoopTask.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/task/NoopTask.java @@ -20,41 +20,63 @@ package io.druid.indexing.common.task; import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; +import com.metamx.common.ISE; import com.metamx.common.logger.Logger; import io.druid.data.input.FirehoseFactory; import io.druid.indexing.common.TaskStatus; import io.druid.indexing.common.TaskToolbox; +import io.druid.indexing.common.actions.TaskActionClient; import org.joda.time.DateTime; -import org.joda.time.Interval; -import org.joda.time.Period; /** */ public class NoopTask extends AbstractTask { private static final Logger log = new Logger(NoopTask.class); - private static int defaultRunTime = 2500; + private static final int defaultRunTime = 2500; + private static final int defaultIsReadyTime = 0; + private static final IsReadyResult defaultIsReadyResult = IsReadyResult.YES; - private final int runTime; + enum IsReadyResult + { + YES, + NO, + EXCEPTION + } + + @JsonIgnore + private final long runTime; + + @JsonIgnore + private final long isReadyTime; + + @JsonIgnore + private final IsReadyResult isReadyResult; + + @JsonIgnore private final FirehoseFactory firehoseFactory; @JsonCreator public NoopTask( @JsonProperty("id") String id, - @JsonProperty("interval") Interval interval, - @JsonProperty("runTime") int runTime, + @JsonProperty("runTime") long runTime, + @JsonProperty("isReadyTime") long isReadyTime, + @JsonProperty("isReadyResult") String isReadyResult, @JsonProperty("firehose") FirehoseFactory firehoseFactory ) { super( id == null ? String.format("noop_%s", new DateTime()) : id, - "none", - interval == null ? new Interval(Period.days(1), new DateTime()) : interval + "none" ); this.runTime = (runTime == 0) ? defaultRunTime : runTime; - + this.isReadyTime = (isReadyTime == 0) ? defaultIsReadyTime : isReadyTime; + this.isReadyResult = (isReadyResult == null) + ? defaultIsReadyResult + : IsReadyResult.valueOf(isReadyResult.toUpperCase()); this.firehoseFactory = firehoseFactory; } @@ -64,18 +86,45 @@ public class NoopTask extends AbstractTask return "noop"; } - @JsonProperty("runTime") - public int getRunTime() + @JsonProperty + public long getRunTime() { return runTime; } + @JsonProperty + public long getIsReadyTime() + { + return isReadyTime; + } + + @JsonProperty + public IsReadyResult getIsReadyResult() + { + return isReadyResult; + } + @JsonProperty("firehose") public FirehoseFactory getFirehoseFactory() { return firehoseFactory; } + @Override + public boolean isReady(TaskActionClient taskActionClient) throws Exception + { + switch (isReadyResult) { + case YES: + return true; + case NO: + return false; + case EXCEPTION: + throw new ISE("Not ready. Never will be ready. Go away!"); + default: + throw new AssertionError("#notreached"); + } + } + @Override public TaskStatus run(TaskToolbox toolbox) throws Exception { @@ -90,4 +139,9 @@ public class NoopTask extends AbstractTask log.info("Woke up!"); return TaskStatus.success(getId()); } + + public static NoopTask create() + { + return new NoopTask(null, 0, 0, null, null); + } } diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/RealtimeIndexTask.java b/indexing-service/src/main/java/io/druid/indexing/common/task/RealtimeIndexTask.java index bb30c351732..01fa6e69149 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/task/RealtimeIndexTask.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/task/RealtimeIndexTask.java @@ -24,7 +24,6 @@ import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Throwables; import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableSet; import com.google.common.io.Closeables; import com.metamx.common.exception.FormattedException; import com.metamx.emitter.EmittingLogger; @@ -35,9 +34,8 @@ import io.druid.indexing.common.TaskLock; import io.druid.indexing.common.TaskStatus; import io.druid.indexing.common.TaskToolbox; import io.druid.indexing.common.actions.LockAcquireAction; -import io.druid.indexing.common.actions.LockListAction; import io.druid.indexing.common.actions.LockReleaseAction; -import io.druid.indexing.common.actions.SegmentInsertAction; +import io.druid.indexing.common.actions.TaskActionClient; import io.druid.query.FinalizeResultsQueryRunner; import io.druid.query.Query; import io.druid.query.QueryRunner; @@ -130,8 +128,7 @@ public class RealtimeIndexTask extends AbstractTask ), 1 ) : taskResource, - schema.getDataSource(), - null + schema.getDataSource() ); this.schema = schema; @@ -167,6 +164,12 @@ public class RealtimeIndexTask extends AbstractTask } } + @Override + public boolean isReady(TaskActionClient taskActionClient) throws Exception + { + return true; + } + @Override public TaskStatus run(final TaskToolbox toolbox) throws Exception { @@ -206,7 +209,7 @@ public class RealtimeIndexTask extends AbstractTask @Override public void announceSegment(final DataSegment segment) throws IOException { - // NOTE: Side effect: Calling announceSegment causes a lock to be acquired + // Side effect: Calling announceSegment causes a lock to be acquired toolbox.getTaskActionClient().submit(new LockAcquireAction(segment.getInterval())); toolbox.getSegmentAnnouncer().announceSegment(segment); } @@ -225,6 +228,7 @@ public class RealtimeIndexTask extends AbstractTask @Override public void announceSegments(Iterable segments) throws IOException { + // Side effect: Calling announceSegments causes locks to be acquired for (DataSegment segment : segments) { toolbox.getTaskActionClient().submit(new LockAcquireAction(segment.getInterval())); } @@ -257,7 +261,7 @@ public class RealtimeIndexTask extends AbstractTask public String getVersion(final Interval interval) { try { - // NOTE: Side effect: Calling getVersion causes a lock to be acquired + // Side effect: Calling getVersion causes a lock to be acquired final TaskLock myLock = toolbox.getTaskActionClient() .submit(new LockAcquireAction(interval)); @@ -337,11 +341,11 @@ public class RealtimeIndexTask extends AbstractTask } } } - catch (Exception e) { + catch (Throwable e) { + normalExit = false; log.makeAlert(e, "Exception aborted realtime processing[%s]", schema.getDataSource()) .emit(); - normalExit = false; - throw Throwables.propagate(e); + throw e; } finally { if (normalExit) { @@ -412,7 +416,7 @@ public class RealtimeIndexTask extends AbstractTask @Override public void publishSegment(DataSegment segment) throws IOException { - taskToolbox.getTaskActionClient().submit(new SegmentInsertAction(ImmutableSet.of(segment))); + taskToolbox.pushSegments(ImmutableList.of(segment)); } } } diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/Task.java b/indexing-service/src/main/java/io/druid/indexing/common/task/Task.java index a3e42232865..8fa4b53bf10 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/task/Task.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/task/Task.java @@ -21,27 +21,22 @@ package io.druid.indexing.common.task; import com.fasterxml.jackson.annotation.JsonSubTypes; import com.fasterxml.jackson.annotation.JsonTypeInfo; -import com.google.common.base.Optional; import io.druid.indexing.common.TaskStatus; import io.druid.indexing.common.TaskToolbox; import io.druid.indexing.common.actions.TaskActionClient; import io.druid.query.Query; import io.druid.query.QueryRunner; -import org.joda.time.Interval; /** * Represents a task that can run on a worker. The general contracts surrounding Tasks are: *
    - *
  • Tasks must operate on a single datasource.
  • - *
  • Tasks should be immutable, since the task ID is used as a proxy for the task in many locations.
  • - *
  • Task IDs must be unique. This can be done by naming them using UUIDs or the current timestamp.
  • - *
  • Tasks are each part of a "task group", which is a set of tasks that can share interval locks. These are - * useful for producing sharded segments.
  • - *
  • Tasks can optionally have an "implicit lock interval". Tasks with this property are guaranteed to have - * a lock on that interval during their {@link #preflight(io.druid.indexing.common.actions.TaskActionClient)} - * and {@link #run(io.druid.indexing.common.TaskToolbox)} methods.
  • - *
  • Tasks do not need to explicitly release locks; they are released upon task completion. Tasks may choose - * to release locks early if they desire.
  • + *
  • Tasks must operate on a single datasource.
  • + *
  • Tasks should be immutable, since the task ID is used as a proxy for the task in many locations.
  • + *
  • Task IDs must be unique. This can be done by naming them using UUIDs or the current timestamp.
  • + *
  • Tasks are each part of a "task group", which is a set of tasks that can share interval locks. These are + * useful for producing sharded segments.
  • + *
  • Tasks do not need to explicitly release locks; they are released upon task completion. Tasks may choose + * to release locks early if they desire.
  • *
*/ @JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type") @@ -50,9 +45,9 @@ import org.joda.time.Interval; @JsonSubTypes.Type(name = "merge", value = MergeTask.class), @JsonSubTypes.Type(name = "delete", value = DeleteTask.class), @JsonSubTypes.Type(name = "kill", value = KillTask.class), + @JsonSubTypes.Type(name = "move", value = MoveTask.class), + @JsonSubTypes.Type(name = "archive", value = ArchiveTask.class), @JsonSubTypes.Type(name = "index", value = IndexTask.class), - @JsonSubTypes.Type(name = "index_partitions", value = IndexDeterminePartitionsTask.class), - @JsonSubTypes.Type(name = "index_generator", value = IndexGeneratorTask.class), @JsonSubTypes.Type(name = "index_hadoop", value = HadoopIndexTask.class), @JsonSubTypes.Type(name = "index_realtime", value = RealtimeIndexTask.class), @JsonSubTypes.Type(name = "noop", value = NoopTask.class), @@ -96,12 +91,6 @@ public interface Task */ public String getDataSource(); - /** - * Returns implicit lock interval for this task, if any. Tasks without implicit lock intervals are not granted locks - * when started and must explicitly request them. - */ - public Optional getImplicitLockInterval(); - /** * Returns query runners for this task. If this task is not meant to answer queries over its datasource, this method * should return null. @@ -109,18 +98,19 @@ public interface Task public QueryRunner getQueryRunner(Query query); /** - * Execute preflight checks for a task. This typically runs on the coordinator, and will be run while - * holding a lock on our dataSource and implicit lock interval (if any). If this method throws an exception, the - * task should be considered a failure. + * Execute preflight actions for a task. This can be used to acquire locks, check preconditions, and so on. The + * actions must be idempotent, since this method may be executed multiple times. This typically runs on the + * coordinator. If this method throws an exception, the task should be considered a failure. + * + * This method must be idempotent, as it may be run multiple times per task. * * @param taskActionClient action client for this task (not the full toolbox) * - * @return Some kind of status (runnable means continue on to a worker, non-runnable means we completed without - * using a worker). + * @return true if ready, false if not ready yet * - * @throws Exception + * @throws Exception if the task should be considered a failure */ - public TaskStatus preflight(TaskActionClient taskActionClient) throws Exception; + public boolean isReady(TaskActionClient taskActionClient) throws Exception; /** * Execute a task. This typically runs on a worker as determined by a TaskRunner, and will be run while diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/VersionConverterTask.java b/indexing-service/src/main/java/io/druid/indexing/common/task/VersionConverterTask.java index 5085bdbd2e3..75561f2408e 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/task/VersionConverterTask.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/task/VersionConverterTask.java @@ -23,16 +23,15 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Function; -import com.google.common.collect.Lists; +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; import com.google.common.collect.Sets; -import com.metamx.common.ISE; import com.metamx.common.guava.FunctionalIterable; import com.metamx.common.logger.Logger; import io.druid.indexing.common.TaskStatus; import io.druid.indexing.common.TaskToolbox; import io.druid.indexing.common.actions.SegmentInsertAction; import io.druid.indexing.common.actions.SegmentListUsedAction; -import io.druid.indexing.common.actions.SpawnTasksAction; import io.druid.indexing.common.actions.TaskActionClient; import io.druid.segment.IndexIO; import io.druid.segment.loading.SegmentLoadingException; @@ -48,10 +47,10 @@ import java.util.Map; /** */ -public class VersionConverterTask extends AbstractTask +public class VersionConverterTask extends AbstractFixedIntervalTask { private static final String TYPE = "version_converter"; - private static final Integer CURR_VERSION_INTEGER = new Integer(IndexIO.CURRENT_VERSION_ID); + private static final Integer CURR_VERSION_INTEGER = IndexIO.CURRENT_VERSION_ID; private static final Logger log = new Logger(VersionConverterTask.class); @@ -74,6 +73,8 @@ public class VersionConverterTask extends AbstractTask private static String makeId(String dataSource, Interval interval) { + Preconditions.checkNotNull(dataSource, "dataSource"); + Preconditions.checkNotNull(interval, "interval"); return joinId(TYPE, dataSource, interval.getStart(), interval.getEnd(), new DateTime()); } @@ -105,7 +106,6 @@ public class VersionConverterTask extends AbstractTask ) { super(id, groupId, dataSource, interval); - this.segment = segment; } @@ -125,45 +125,43 @@ public class VersionConverterTask extends AbstractTask public TaskStatus run(TaskToolbox toolbox) throws Exception { if (segment == null) { - throw new ISE("Segment was null, this should never run.", this.getClass().getSimpleName()); - } - - log.info("I'm in a subless mood."); - convertSegment(toolbox, segment); - return success(); - } - - @Override - public TaskStatus preflight(TaskActionClient taskActionClient) throws Exception - { - if (segment != null) { - return super.preflight(taskActionClient); - } - - List segments = taskActionClient.submit(defaultListUsedAction()); - - final FunctionalIterable tasks = FunctionalIterable - .create(segments) - .keep( - new Function() - { - @Override - public Task apply(DataSegment segment) + final List segments = toolbox.getTaskActionClient().submit( + new SegmentListUsedAction( + getDataSource(), + getInterval() + ) + ); + final FunctionalIterable tasks = FunctionalIterable + .create(segments) + .keep( + new Function() { - final Integer segmentVersion = segment.getBinaryVersion(); - if (!CURR_VERSION_INTEGER.equals(segmentVersion)) { - return new SubTask(getGroupId(), segment); + @Override + public Task apply(DataSegment segment) + { + final Integer segmentVersion = segment.getBinaryVersion(); + if (!CURR_VERSION_INTEGER.equals(segmentVersion)) { + return new SubTask(getGroupId(), segment); + } + + log.info("Skipping[%s], already version[%s]", segment.getIdentifier(), segmentVersion); + return null; } - - log.info("Skipping[%s], already version[%s]", segment.getIdentifier(), segmentVersion); - return null; } - } - ); + ); - taskActionClient.submit(new SpawnTasksAction(Lists.newArrayList(tasks))); - - return TaskStatus.success(getId()); + // Vestigial from a past time when this task spawned subtasks. + for (final Task subTask : tasks) { + final TaskStatus status = subTask.run(toolbox); + if (!status.isSuccess()) { + return status; + } + } + } else { + log.info("I'm in a subless mood."); + convertSegment(toolbox, segment); + } + return success(); } @Override @@ -185,7 +183,7 @@ public class VersionConverterTask extends AbstractTask return super.equals(o); } - public static class SubTask extends AbstractTask + public static class SubTask extends AbstractFixedIntervalTask { @JsonIgnore private final DataSegment segment; @@ -251,7 +249,7 @@ public class VersionConverterTask extends AbstractTask } } - final Map localSegments = toolbox.getSegments(Arrays.asList(segment)); + final Map localSegments = toolbox.fetchSegments(Arrays.asList(segment)); final File location = localSegments.get(segment); final File outLocation = new File(location, "v9_out"); diff --git a/indexing-service/src/main/java/io/druid/indexing/common/tasklogs/FileTaskLogs.java b/indexing-service/src/main/java/io/druid/indexing/common/tasklogs/FileTaskLogs.java new file mode 100644 index 00000000000..e1649b46f32 --- /dev/null +++ b/indexing-service/src/main/java/io/druid/indexing/common/tasklogs/FileTaskLogs.java @@ -0,0 +1,87 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012, 2013 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package io.druid.indexing.common.tasklogs; + +import com.google.common.base.Optional; +import com.google.common.io.ByteStreams; +import com.google.common.io.Files; +import com.google.common.io.InputSupplier; +import com.google.inject.Inject; +import com.metamx.common.logger.Logger; +import io.druid.indexing.common.config.FileTaskLogsConfig; +import io.druid.tasklogs.TaskLogs; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; + +public class FileTaskLogs implements TaskLogs +{ + private static final Logger log = new Logger(FileTaskLogs.class); + + private final FileTaskLogsConfig config; + + @Inject + public FileTaskLogs( + FileTaskLogsConfig config + ) + { + this.config = config; + } + + @Override + public void pushTaskLog(final String taskid, File file) throws IOException + { + if (!config.getDirectory().exists()) { + config.getDirectory().mkdir(); + } + final File outputFile = fileForTask(taskid); + Files.copy(file, outputFile); + log.info("Wrote task log to: %s", outputFile); + } + + @Override + public Optional> streamTaskLog(final String taskid, final long offset) throws IOException + { + final File file = fileForTask(taskid); + if (file.exists()) { + return Optional.>of( + new InputSupplier() + { + @Override + public InputStream getInput() throws IOException + { + final InputStream inputStream = new FileInputStream(file); + ByteStreams.skipFully(inputStream, offset); + return inputStream; + } + } + ); + } else { + return Optional.absent(); + } + } + + private File fileForTask(final String taskid) + { + return new File(config.getDirectory(), String.format("%s.log", taskid)); + } +} diff --git a/indexing-service/src/main/java/io/druid/indexing/overlord/DbTaskStorage.java b/indexing-service/src/main/java/io/druid/indexing/overlord/DbTaskStorage.java index ec2e2f99feb..cf0fb4f3e24 100644 --- a/indexing-service/src/main/java/io/druid/indexing/overlord/DbTaskStorage.java +++ b/indexing-service/src/main/java/io/druid/indexing/overlord/DbTaskStorage.java @@ -23,29 +23,41 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Function; import com.google.common.base.Optional; import com.google.common.base.Preconditions; +import com.google.common.base.Predicate; import com.google.common.base.Throwables; import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.inject.Inject; +import com.metamx.common.RetryUtils; import com.metamx.common.lifecycle.LifecycleStart; import com.metamx.common.lifecycle.LifecycleStop; import com.metamx.emitter.EmittingLogger; +import com.mysql.jdbc.exceptions.MySQLTransientException; import io.druid.db.DbConnector; import io.druid.db.DbTablesConfig; import io.druid.indexing.common.TaskLock; import io.druid.indexing.common.TaskStatus; import io.druid.indexing.common.actions.TaskAction; +import io.druid.indexing.common.config.TaskStorageConfig; import io.druid.indexing.common.task.Task; import org.joda.time.DateTime; +import org.joda.time.Period; import org.skife.jdbi.v2.Handle; import org.skife.jdbi.v2.IDBI; +import org.skife.jdbi.v2.exceptions.CallbackFailedException; +import org.skife.jdbi.v2.exceptions.DBIException; import org.skife.jdbi.v2.exceptions.StatementException; +import org.skife.jdbi.v2.exceptions.UnableToObtainConnectionException; import org.skife.jdbi.v2.tweak.HandleCallback; +import java.sql.SQLException; +import java.sql.SQLRecoverableException; +import java.sql.SQLTransientException; import java.util.List; import java.util.Map; +import java.util.concurrent.Callable; public class DbTaskStorage implements TaskStorage { @@ -53,16 +65,24 @@ public class DbTaskStorage implements TaskStorage private final DbConnector dbConnector; private final DbTablesConfig dbTables; private final IDBI dbi; + private final TaskStorageConfig config; private static final EmittingLogger log = new EmittingLogger(DbTaskStorage.class); @Inject - public DbTaskStorage(ObjectMapper jsonMapper, DbConnector dbConnector, DbTablesConfig dbTables, IDBI dbi) + public DbTaskStorage( + final ObjectMapper jsonMapper, + final DbConnector dbConnector, + final DbTablesConfig dbTables, + final IDBI dbi, + final TaskStorageConfig config + ) { this.jsonMapper = jsonMapper; this.dbConnector = dbConnector; this.dbTables = dbTables; this.dbi = dbi; + this.config = config; } @LifecycleStart @@ -92,7 +112,7 @@ public class DbTaskStorage implements TaskStorage log.info("Inserting task %s with status: %s", task.getId(), status); try { - dbi.withHandle( + retryingHandle( new HandleCallback() { @Override @@ -134,7 +154,7 @@ public class DbTaskStorage implements TaskStorage log.info("Updating task %s to status: %s", status.getId(), status); - int updated = dbi.withHandle( + int updated = retryingHandle( new HandleCallback() { @Override @@ -162,7 +182,7 @@ public class DbTaskStorage implements TaskStorage @Override public Optional getTask(final String taskid) { - return dbi.withHandle( + return retryingHandle( new HandleCallback>() { @Override @@ -192,7 +212,7 @@ public class DbTaskStorage implements TaskStorage @Override public Optional getStatus(final String taskid) { - return dbi.withHandle( + return retryingHandle( new HandleCallback>() { @Override @@ -220,9 +240,9 @@ public class DbTaskStorage implements TaskStorage } @Override - public List getRunningTasks() + public List getActiveTasks() { - return dbi.withHandle( + return retryingHandle( new HandleCallback>() { @Override @@ -231,7 +251,7 @@ public class DbTaskStorage implements TaskStorage final List> dbTasks = handle.createQuery( String.format( - "SELECT id, payload, status_payload FROM %s WHERE active = 1", + "SELECT id, payload, status_payload FROM %s WHERE active = 1 ORDER BY created_date", dbTables.getTasksTable() ) ) @@ -260,6 +280,45 @@ public class DbTaskStorage implements TaskStorage ); } + @Override + public List getRecentlyFinishedTaskStatuses() + { + final DateTime recent = new DateTime().minus(config.getRecentlyFinishedThreshold()); + return retryingHandle( + new HandleCallback>() + { + @Override + public List withHandle(Handle handle) throws Exception + { + final List> dbTasks = + handle.createQuery( + String.format( + "SELECT id, status_payload FROM %s WHERE active = 0 AND created_date >= :recent ORDER BY created_date DESC", + dbTables.getTasksTable() + ) + ).bind("recent", recent.toString()).list(); + + final ImmutableList.Builder statuses = ImmutableList.builder(); + for (final Map row : dbTasks) { + final String id = row.get("id").toString(); + + try { + final TaskStatus status = jsonMapper.readValue((byte[]) row.get("status_payload"), TaskStatus.class); + if (status.isComplete()) { + statuses.add(status); + } + } + catch (Exception e) { + log.makeAlert(e, "Failed to parse status payload").addData("task", id).emit(); + } + } + + return statuses.build(); + } + } + ); + } + @Override public void addLock(final String taskid, final TaskLock taskLock) { @@ -273,7 +332,7 @@ public class DbTaskStorage implements TaskStorage taskid ); - dbi.withHandle( + retryingHandle( new HandleCallback() { @Override @@ -308,7 +367,7 @@ public class DbTaskStorage implements TaskStorage if (taskLock.equals(taskLockToRemove)) { log.info("Deleting TaskLock with id[%d]: %s", id, taskLock); - dbi.withHandle( + retryingHandle( new HandleCallback() { @Override @@ -353,7 +412,7 @@ public class DbTaskStorage implements TaskStorage log.info("Logging action for task[%s]: %s", task.getId(), taskAction); - dbi.withHandle( + retryingHandle( new HandleCallback() { @Override @@ -376,7 +435,7 @@ public class DbTaskStorage implements TaskStorage @Override public List getAuditLogs(final String taskid) { - return dbi.withHandle( + return retryingHandle( new HandleCallback>() { @Override @@ -392,21 +451,19 @@ public class DbTaskStorage implements TaskStorage .bind("task_id", taskid) .list(); - return Lists.transform( - dbTaskLogs, new Function, TaskAction>() - { - @Override - public TaskAction apply(Map row) - { - try { - return jsonMapper.readValue((byte[]) row.get("log_payload"), TaskAction.class); - } - catch (Exception e) { - throw Throwables.propagate(e); - } + final List retList = Lists.newArrayList(); + for (final Map dbTaskLog : dbTaskLogs) { + try { + retList.add(jsonMapper.readValue((byte[]) dbTaskLog.get("log_payload"), TaskAction.class)); + } + catch (Exception e) { + log.makeAlert(e, "Failed to deserialize TaskLog") + .addData("task", taskid) + .addData("logPayload", dbTaskLog) + .emit(); } } - ); + return retList; } } ); @@ -414,7 +471,7 @@ public class DbTaskStorage implements TaskStorage private Map getLocksWithIds(final String taskid) { - return dbi.withHandle( + return retryingHandle( new HandleCallback>() { @Override @@ -432,11 +489,66 @@ public class DbTaskStorage implements TaskStorage final Map retMap = Maps.newHashMap(); for (final Map row : dbTaskLocks) { - retMap.put((Long) row.get("id"), jsonMapper.readValue((byte[]) row.get("lock_payload"), TaskLock.class)); + try { + retMap.put( + (Long) row.get("id"), + jsonMapper.readValue((byte[]) row.get("lock_payload"), TaskLock.class) + ); + } + catch (Exception e) { + log.makeAlert(e, "Failed to deserialize TaskLock") + .addData("task", taskid) + .addData("lockPayload", row) + .emit(); + } } return retMap; } } ); } + + /** + * Retry SQL operations + */ + private T retryingHandle(final HandleCallback callback) + { + final Callable call = new Callable() + { + @Override + public T call() throws Exception + { + return dbi.withHandle(callback); + } + }; + final Predicate shouldRetry = new Predicate() + { + @Override + public boolean apply(Throwable e) + { + return shouldRetryException(e); + } + }; + final int maxTries = 10; + try { + return RetryUtils.retry(call, shouldRetry, maxTries); + } + catch (RuntimeException e) { + throw Throwables.propagate(e); + } + catch (Exception e) { + throw new CallbackFailedException(e); + } + } + + private static boolean shouldRetryException(final Throwable e) + { + return e != null && (e instanceof SQLTransientException + || e instanceof MySQLTransientException + || e instanceof SQLRecoverableException + || e instanceof UnableToObtainConnectionException + || (e instanceof SQLException && ((SQLException) e).getErrorCode() == 1317) + || (e instanceof SQLException && shouldRetryException(e.getCause())) + || (e instanceof DBIException && shouldRetryException(e.getCause()))); + } } diff --git a/indexing-service/src/main/java/io/druid/indexing/overlord/ForkingTaskRunner.java b/indexing-service/src/main/java/io/druid/indexing/overlord/ForkingTaskRunner.java index 759149b7bd4..2be8a6a6a3b 100644 --- a/indexing-service/src/main/java/io/druid/indexing/overlord/ForkingTaskRunner.java +++ b/indexing-service/src/main/java/io/druid/indexing/overlord/ForkingTaskRunner.java @@ -101,12 +101,6 @@ public class ForkingTaskRunner implements TaskRunner, TaskLogStreamer this.exec = MoreExecutors.listeningDecorator(Executors.newFixedThreadPool(workerConfig.getCapacity())); } - @Override - public void bootstrap(List tasks) - { - // do nothing - } - @Override public ListenableFuture run(final Task task) { @@ -115,7 +109,7 @@ public class ForkingTaskRunner implements TaskRunner, TaskLogStreamer tasks.put( task.getId(), new ForkingTaskRunnerWorkItem( - task, + task.getId(), exec.submit( new Callable() { @@ -224,29 +218,20 @@ public class ForkingTaskRunner implements TaskRunner, TaskLogStreamer } log.info("Logging task %s output to: %s", task.getId(), logFile); - - final InputStream fromProc = processHolder.process.getInputStream(); - final OutputStream toLogfile = closer.register( - Files.newOutputStreamSupplier(logFile).getOutput() - ); - boolean runFailed = true; - ByteStreams.copy(fromProc, toLogfile); - final int statusCode = processHolder.process.waitFor(); - log.info("Process exited with status[%d] for task: %s", statusCode, task.getId()); - - if (statusCode == 0) { - runFailed = false; + try (final OutputStream toLogfile = Files.newOutputStreamSupplier(logFile).getOutput()) { + ByteStreams.copy(processHolder.process.getInputStream(), toLogfile); + final int statusCode = processHolder.process.waitFor(); + log.info("Process exited with status[%d] for task: %s", statusCode, task.getId()); + if (statusCode == 0) { + runFailed = false; + } + } + finally { + // Upload task logs + taskLogPusher.pushTaskLog(task.getId(), logFile); } - - // Upload task logs - - // XXX: Consider uploading periodically for very long-lived tasks to prevent - // XXX: bottlenecks at the end or the possibility of losing a lot of logs all - // XXX: at once. - - taskLogPusher.pushTaskLog(task.getId(), logFile); if (!runFailed) { // Process exited successfully @@ -261,9 +246,9 @@ public class ForkingTaskRunner implements TaskRunner, TaskLogStreamer closer.close(); } } - catch (Exception e) { - log.info(e, "Exception caught during execution"); - throw Throwables.propagate(e); + catch (Throwable t) { + log.info(t, "Exception caught during execution"); + throw Throwables.propagate(t); } finally { try { @@ -358,6 +343,14 @@ public class ForkingTaskRunner implements TaskRunner, TaskLogStreamer } } + @Override + public Collection getKnownTasks() + { + synchronized (tasks) { + return Lists.newArrayList(tasks.values()); + } + } + @Override public Collection getWorkers() { @@ -389,7 +382,7 @@ public class ForkingTaskRunner implements TaskRunner, TaskLogStreamer if (offset > 0) { raf.seek(offset); } else if (offset < 0 && offset < rafLength) { - raf.seek(rafLength + offset); + raf.seek(Math.max(0, rafLength + offset)); } return Channels.newInputStream(raf.getChannel()); } @@ -425,11 +418,11 @@ public class ForkingTaskRunner implements TaskRunner, TaskLogStreamer private volatile ProcessHolder processHolder = null; private ForkingTaskRunnerWorkItem( - Task task, + String taskId, ListenableFuture statusFuture ) { - super(task, statusFuture); + super(taskId, statusFuture); } } diff --git a/indexing-service/src/main/java/io/druid/indexing/overlord/HeapMemoryTaskStorage.java b/indexing-service/src/main/java/io/druid/indexing/overlord/HeapMemoryTaskStorage.java index 7ebaa69dab9..ef942e5c12f 100644 --- a/indexing-service/src/main/java/io/druid/indexing/overlord/HeapMemoryTaskStorage.java +++ b/indexing-service/src/main/java/io/druid/indexing/overlord/HeapMemoryTaskStorage.java @@ -19,6 +19,7 @@ package io.druid.indexing.overlord; +import com.google.api.client.util.Lists; import com.google.common.base.Optional; import com.google.common.base.Preconditions; import com.google.common.collect.ArrayListMultimap; @@ -26,11 +27,15 @@ import com.google.common.collect.HashMultimap; import com.google.common.collect.ImmutableList; import com.google.common.collect.Maps; import com.google.common.collect.Multimap; +import com.google.common.collect.Ordering; +import com.google.inject.Inject; import com.metamx.common.logger.Logger; import io.druid.indexing.common.TaskLock; import io.druid.indexing.common.TaskStatus; import io.druid.indexing.common.actions.TaskAction; +import io.druid.indexing.common.config.TaskStorageConfig; import io.druid.indexing.common.task.Task; +import org.joda.time.DateTime; import java.util.List; import java.util.Map; @@ -42,6 +47,8 @@ import java.util.concurrent.locks.ReentrantLock; */ public class HeapMemoryTaskStorage implements TaskStorage { + private final TaskStorageConfig config; + private final ReentrantLock giant = new ReentrantLock(); private final Map tasks = Maps.newHashMap(); private final Multimap taskLocks = HashMultimap.create(); @@ -49,6 +56,12 @@ public class HeapMemoryTaskStorage implements TaskStorage private static final Logger log = new Logger(HeapMemoryTaskStorage.class); + @Inject + public HeapMemoryTaskStorage(TaskStorageConfig config) + { + this.config = config; + } + @Override public void insert(Task task, TaskStatus status) { @@ -69,7 +82,7 @@ public class HeapMemoryTaskStorage implements TaskStorage } log.info("Inserting task %s with status: %s", task.getId(), status); - tasks.put(task.getId(), new TaskStuff(task, status)); + tasks.put(task.getId(), new TaskStuff(task, status, new DateTime())); } finally { giant.unlock(); } @@ -128,7 +141,7 @@ public class HeapMemoryTaskStorage implements TaskStorage } @Override - public List getRunningTasks() + public List getActiveTasks() { giant.lock(); @@ -139,13 +152,39 @@ public class HeapMemoryTaskStorage implements TaskStorage listBuilder.add(taskStuff.getTask()); } } - return listBuilder.build(); } finally { giant.unlock(); } } + @Override + public List getRecentlyFinishedTaskStatuses() + { + giant.lock(); + + try { + final List returns = Lists.newArrayList(); + final long recent = System.currentTimeMillis() - config.getRecentlyFinishedThreshold().getMillis(); + final Ordering createdDateDesc = new Ordering() + { + @Override + public int compare(TaskStuff a, TaskStuff b) + { + return a.getCreatedDate().compareTo(b.getCreatedDate()); + } + }.reverse(); + for(final TaskStuff taskStuff : createdDateDesc.sortedCopy(tasks.values())) { + if(taskStuff.getStatus().isComplete() && taskStuff.getCreatedDate().getMillis() > recent) { + returns.add(taskStuff.getStatus()); + } + } + return returns; + } finally { + giant.unlock(); + } + } + @Override public void addLock(final String taskid, final TaskLock taskLock) { @@ -212,8 +251,9 @@ public class HeapMemoryTaskStorage implements TaskStorage { final Task task; final TaskStatus status; + final DateTime createdDate; - private TaskStuff(Task task, TaskStatus status) + private TaskStuff(Task task, TaskStatus status, DateTime createdDate) { Preconditions.checkNotNull(task); Preconditions.checkNotNull(status); @@ -221,6 +261,7 @@ public class HeapMemoryTaskStorage implements TaskStorage this.task = task; this.status = status; + this.createdDate = Preconditions.checkNotNull(createdDate, "createdDate"); } public Task getTask() @@ -233,9 +274,14 @@ public class HeapMemoryTaskStorage implements TaskStorage return status; } + public DateTime getCreatedDate() + { + return createdDate; + } + private TaskStuff withStatus(TaskStatus _status) { - return new TaskStuff(task, _status); + return new TaskStuff(task, _status, createdDate); } } } diff --git a/indexing-service/src/main/java/io/druid/indexing/overlord/IndexerDBCoordinator.java b/indexing-service/src/main/java/io/druid/indexing/overlord/IndexerDBCoordinator.java index 2a4b5e8912d..7e5f3ef48dd 100644 --- a/indexing-service/src/main/java/io/druid/indexing/overlord/IndexerDBCoordinator.java +++ b/indexing-service/src/main/java/io/druid/indexing/overlord/IndexerDBCoordinator.java @@ -28,8 +28,6 @@ import com.google.common.collect.Ordering; import com.google.common.collect.Sets; import com.google.inject.Inject; import com.metamx.common.logger.Logger; -import io.druid.db.DbConnector; -import io.druid.db.DbConnectorConfig; import io.druid.db.DbTablesConfig; import io.druid.timeline.DataSegment; import io.druid.timeline.TimelineObjectHolder; @@ -44,6 +42,7 @@ import org.skife.jdbi.v2.ResultIterator; import org.skife.jdbi.v2.StatementContext; import org.skife.jdbi.v2.TransactionCallback; import org.skife.jdbi.v2.TransactionStatus; +import org.skife.jdbi.v2.exceptions.CallbackFailedException; import org.skife.jdbi.v2.tweak.HandleCallback; import java.io.IOException; @@ -171,39 +170,39 @@ public class IndexerDBCoordinator private boolean announceHistoricalSegment(final Handle handle, final DataSegment segment) throws IOException { try { - final List> exists = handle.createQuery( - String.format( - "SELECT id FROM %s WHERE id = :identifier", - dbTables.getSegmentsTable() - ) - ).bind( - "identifier", - segment.getIdentifier() - ).list(); - - if (!exists.isEmpty()) { + if (segmentExists(handle, segment)) { log.info("Found [%s] in DB, not updating DB", segment.getIdentifier()); return false; } - handle.createStatement( - String.format( - "INSERT INTO %s (id, dataSource, created_date, start, end, partitioned, version, used, payload) VALUES (:id, :dataSource, :created_date, :start, :end, :partitioned, :version, :used, :payload)", - dbTables.getSegmentsTable() - ) - ) - .bind("id", segment.getIdentifier()) - .bind("dataSource", segment.getDataSource()) - .bind("created_date", new DateTime().toString()) - .bind("start", segment.getInterval().getStart().toString()) - .bind("end", segment.getInterval().getEnd().toString()) - .bind("partitioned", segment.getShardSpec().getPartitionNum()) - .bind("version", segment.getVersion()) - .bind("used", true) - .bind("payload", jsonMapper.writeValueAsString(segment)) - .execute(); + // Try/catch to work around races due to SELECT -> INSERT. Avoid ON DUPLICATE KEY since it's not portable. + try { + handle.createStatement( + String.format( + "INSERT INTO %s (id, dataSource, created_date, start, end, partitioned, version, used, payload) " + + "VALUES (:id, :dataSource, :created_date, :start, :end, :partitioned, :version, :used, :payload)", + dbTables.getSegmentsTable() + ) + ) + .bind("id", segment.getIdentifier()) + .bind("dataSource", segment.getDataSource()) + .bind("created_date", new DateTime().toString()) + .bind("start", segment.getInterval().getStart().toString()) + .bind("end", segment.getInterval().getEnd().toString()) + .bind("partitioned", segment.getShardSpec().getPartitionNum()) + .bind("version", segment.getVersion()) + .bind("used", true) + .bind("payload", jsonMapper.writeValueAsString(segment)) + .execute(); - log.info("Published segment [%s] to DB", segment.getIdentifier()); + log.info("Published segment [%s] to DB", segment.getIdentifier()); + } catch (Exception e) { + if (e.getCause() instanceof SQLException && segmentExists(handle, segment)) { + log.info("Found [%s] in DB, not updating DB", segment.getIdentifier()); + } else { + throw e; + } + } } catch (IOException e) { log.error(e, "Exception inserting into DB"); @@ -213,6 +212,38 @@ public class IndexerDBCoordinator return true; } + private boolean segmentExists(final Handle handle, final DataSegment segment) { + final List> exists = handle.createQuery( + String.format( + "SELECT id FROM %s WHERE id = :identifier", + dbTables.getSegmentsTable() + ) + ).bind( + "identifier", + segment.getIdentifier() + ).list(); + + return !exists.isEmpty(); + } + + public void updateSegmentMetadata(final Set segments) throws IOException + { + dbi.inTransaction( + new TransactionCallback() + { + @Override + public Void inTransaction(Handle handle, TransactionStatus transactionStatus) throws Exception + { + for(final DataSegment segment : segments) { + updatePayload(handle, segment); + } + + return null; + } + } + ); + } + public void deleteSegments(final Set segments) throws IOException { dbi.inTransaction( @@ -235,10 +266,27 @@ public class IndexerDBCoordinator { handle.createStatement( String.format("DELETE from %s WHERE id = :id", dbTables.getSegmentsTable()) - ).bind("id", segment.getIdentifier()) + ) + .bind("id", segment.getIdentifier()) .execute(); } + private void updatePayload(final Handle handle, final DataSegment segment) throws IOException + { + try { + handle.createStatement( + String.format("UPDATE %s SET payload = :payload WHERE id = :id", dbTables.getSegmentsTable()) + ) + .bind("id", segment.getIdentifier()) + .bind("payload", jsonMapper.writeValueAsString(segment)) + .execute(); + } + catch (IOException e) { + log.error(e, "Exception inserting into DB"); + throw e; + } + } + public List getUnusedSegmentsForInterval(final String dataSource, final Interval interval) { List matchingSegments = dbi.withHandle( diff --git a/indexing-service/src/main/java/io/druid/indexing/overlord/RemoteTaskRunner.java b/indexing-service/src/main/java/io/druid/indexing/overlord/RemoteTaskRunner.java index be87cef7255..2c482b775f9 100644 --- a/indexing-service/src/main/java/io/druid/indexing/overlord/RemoteTaskRunner.java +++ b/indexing-service/src/main/java/io/druid/indexing/overlord/RemoteTaskRunner.java @@ -27,11 +27,14 @@ import com.google.common.base.Preconditions; import com.google.common.base.Stopwatch; import com.google.common.base.Supplier; import com.google.common.base.Throwables; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Iterables; import com.google.common.collect.Lists; -import com.google.common.collect.Maps; import com.google.common.collect.Sets; import com.google.common.io.InputSupplier; import com.google.common.primitives.Ints; +import com.google.common.util.concurrent.FutureCallback; +import com.google.common.util.concurrent.Futures; import com.google.common.util.concurrent.ListenableFuture; import com.google.common.util.concurrent.SettableFuture; import com.metamx.common.ISE; @@ -51,8 +54,8 @@ import io.druid.indexing.worker.TaskAnnouncement; import io.druid.indexing.worker.Worker; import io.druid.server.initialization.ZkPathsConfig; import io.druid.tasklogs.TaskLogStreamer; +import org.apache.commons.lang.mutable.MutableInt; import org.apache.curator.framework.CuratorFramework; -import org.apache.curator.framework.recipes.cache.ChildData; import org.apache.curator.framework.recipes.cache.PathChildrenCache; import org.apache.curator.framework.recipes.cache.PathChildrenCacheEvent; import org.apache.curator.framework.recipes.cache.PathChildrenCacheListener; @@ -72,6 +75,7 @@ import java.util.Map; import java.util.TreeSet; import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; @@ -108,11 +112,15 @@ public class RemoteTaskRunner implements TaskRunner, TaskLogStreamer private final HttpClient httpClient; // all workers that exist in ZK - private final Map zkWorkers = new ConcurrentHashMap(); + private final ConcurrentMap zkWorkers = new ConcurrentHashMap<>(); + // payloads of pending tasks, which we remember just long enough to assign to workers + private final ConcurrentMap pendingTaskPayloads = new ConcurrentHashMap<>(); + // tasks that have not yet been assigned to a worker + private final RemoteTaskRunnerWorkQueue pendingTasks = new RemoteTaskRunnerWorkQueue(); // all tasks that have been assigned to a worker private final RemoteTaskRunnerWorkQueue runningTasks = new RemoteTaskRunnerWorkQueue(); - // tasks that have not yet run - private final RemoteTaskRunnerWorkQueue pendingTasks = new RemoteTaskRunnerWorkQueue(); + // tasks that are complete but not cleaned up yet + private final RemoteTaskRunnerWorkQueue completeTasks = new RemoteTaskRunnerWorkQueue(); private final ExecutorService runPendingTasksExec = Executors.newSingleThreadExecutor(); @@ -148,6 +156,9 @@ public class RemoteTaskRunner implements TaskRunner, TaskLogStreamer return; } + final MutableInt waitingFor = new MutableInt(1); + final Object waitingForMonitor = new Object(); + // Add listener for creation/deletion of workers workerPathCache.getListenable().addListener( new PathChildrenCacheListener() @@ -162,7 +173,32 @@ public class RemoteTaskRunner implements TaskRunner, TaskLogStreamer event.getData().getData(), Worker.class ); - addWorker(worker, PathChildrenCache.StartMode.NORMAL); + synchronized (waitingForMonitor) { + waitingFor.increment(); + } + Futures.addCallback( + addWorker(worker), + new FutureCallback() + { + @Override + public void onSuccess(ZkWorker zkWorker) + { + synchronized (waitingForMonitor) { + waitingFor.decrement(); + waitingForMonitor.notifyAll(); + } + } + + @Override + public void onFailure(Throwable throwable) + { + synchronized (waitingForMonitor) { + waitingFor.decrement(); + waitingForMonitor.notifyAll(); + } + } + } + ); break; case CHILD_REMOVED: worker = jsonMapper.readValue( @@ -171,22 +207,23 @@ public class RemoteTaskRunner implements TaskRunner, TaskLogStreamer ); removeWorker(worker); break; + case INITIALIZED: + synchronized (waitingForMonitor) { + waitingFor.decrement(); + waitingForMonitor.notifyAll(); + } default: break; } } } ); - workerPathCache.start(PathChildrenCache.StartMode.BUILD_INITIAL_CACHE); - - for (ChildData childData : workerPathCache.getCurrentData()) { - final Worker worker = jsonMapper.readValue( - childData.getData(), - Worker.class - ); - addWorker(worker, PathChildrenCache.StartMode.BUILD_INITIAL_CACHE); + workerPathCache.start(PathChildrenCache.StartMode.POST_INITIALIZED_EVENT); + synchronized (waitingForMonitor) { + while (waitingFor.intValue() > 0) { + waitingForMonitor.wait(); + } } - started = true; } catch (Exception e) { @@ -201,7 +238,7 @@ public class RemoteTaskRunner implements TaskRunner, TaskLogStreamer if (!started) { return; } - + started = false; for (ZkWorker zkWorker : zkWorkers.values()) { zkWorker.close(); } @@ -210,27 +247,31 @@ public class RemoteTaskRunner implements TaskRunner, TaskLogStreamer catch (Exception e) { throw Throwables.propagate(e); } - finally { - started = false; - } } @Override public Collection getWorkers() { - return zkWorkers.values(); + return ImmutableList.copyOf(zkWorkers.values()); } @Override public Collection getRunningTasks() { - return runningTasks.values(); + return ImmutableList.copyOf(runningTasks.values()); } @Override public Collection getPendingTasks() { - return pendingTasks.values(); + return ImmutableList.copyOf(pendingTasks.values()); + } + + @Override + public Collection getKnownTasks() + { + // Racey, since there is a period of time during assignment when a task is neither pending nor running + return ImmutableList.copyOf(Iterables.concat(pendingTasks.values(), runningTasks.values(), completeTasks.values())); } public ZkWorker findWorkerRunningTask(String taskId) @@ -243,46 +284,10 @@ public class RemoteTaskRunner implements TaskRunner, TaskLogStreamer return null; } - public boolean isWorkerRunningTask(Worker worker, Task task) + public boolean isWorkerRunningTask(Worker worker, String taskId) { ZkWorker zkWorker = zkWorkers.get(worker.getHost()); - - return (zkWorker != null && zkWorker.isRunningTask(task.getId())); - } - - @Override - public void bootstrap(List tasks) - { - try { - if (!started) { - throw new ISE("Must start RTR first before calling bootstrap!"); - } - - Map existingTasks = Maps.newHashMap(); - for (ZkWorker zkWorker : zkWorkers.values()) { - for (String runningTask : zkWorker.getRunningTasks().keySet()) { - existingTasks.put(runningTask, zkWorker.getWorker()); - } - } - - for (Task task : tasks) { - Worker worker = existingTasks.get(task.getId()); - if (worker != null) { - log.info("Bootstrap found [%s] running on [%s].", task.getId(), worker.getHost()); - runningTasks.put( - task.getId(), - new RemoteTaskRunnerWorkItem( - task, - SettableFuture.create(), - worker - ) - ); - } - } - } - catch (Exception e) { - throw Throwables.propagate(e); - } + return (zkWorker != null && zkWorker.isRunningTask(taskId)); } /** @@ -293,8 +298,11 @@ public class RemoteTaskRunner implements TaskRunner, TaskLogStreamer @Override public ListenableFuture run(final Task task) { - RemoteTaskRunnerWorkItem runningTask = runningTasks.get(task.getId()); - if (runningTask != null) { + final RemoteTaskRunnerWorkItem completeTask, runningTask, pendingTask; + if ((pendingTask = pendingTasks.get(task.getId())) != null) { + log.info("Assigned a task[%s] that is already pending, not doing anything", task.getId()); + return pendingTask.getResult(); + } else if ((runningTask = runningTasks.get(task.getId())) != null) { ZkWorker zkWorker = findWorkerRunningTask(task.getId()); if (zkWorker == null) { log.warn("Told to run task[%s], but no worker has started running it yet.", task.getId()); @@ -302,26 +310,15 @@ public class RemoteTaskRunner implements TaskRunner, TaskLogStreamer log.info("Task[%s] already running on %s.", task.getId(), zkWorker.getWorker().getHost()); TaskAnnouncement announcement = zkWorker.getRunningTasks().get(task.getId()); if (announcement.getTaskStatus().isComplete()) { - taskComplete(runningTask, zkWorker, task.getId(), announcement.getTaskStatus()); + taskComplete(runningTask, zkWorker, announcement.getTaskStatus()); } } - return runningTask.getResult(); + } else if ((completeTask = completeTasks.get(task.getId())) != null) { + return completeTask.getResult(); + } else { + return addPendingTask(task).getResult(); } - - RemoteTaskRunnerWorkItem pendingTask = pendingTasks.get(task.getId()); - if (pendingTask != null) { - log.info("Assigned a task[%s] that is already pending, not doing anything", task.getId()); - return pendingTask.getResult(); - } - - RemoteTaskRunnerWorkItem taskRunnerWorkItem = new RemoteTaskRunnerWorkItem( - task, - SettableFuture.create(), - null - ); - addPendingTask(taskRunnerWorkItem); - return taskRunnerWorkItem.getResult(); } /** @@ -330,39 +327,43 @@ public class RemoteTaskRunner implements TaskRunner, TaskLogStreamer * @param taskId - task id to shutdown */ @Override - public void shutdown(String taskId) + public void shutdown(final String taskId) { - if (pendingTasks.containsKey(taskId)) { - pendingTasks.remove(taskId); - return; - } + if (!started) { + log.info("This TaskRunner is stopped. Ignoring shutdown command for task: %s", taskId); + } else if (pendingTasks.remove(taskId) != null) { + pendingTaskPayloads.remove(taskId); + log.info("Removed task from pending queue: %s", taskId); + } else if (completeTasks.containsKey(taskId)) { + cleanup(completeTasks.get(taskId).getWorker().getHost(), taskId); + } else { + final ZkWorker zkWorker = findWorkerRunningTask(taskId); - final ZkWorker zkWorker = findWorkerRunningTask(taskId); - - if (zkWorker == null) { - log.info("Can't shutdown! No worker running task %s", taskId); - return; - } - - try { - final URL url = makeWorkerURL(zkWorker.getWorker(), String.format("/task/%s/shutdown", taskId)); - final StatusResponseHolder response = httpClient.post(url) - .go(RESPONSE_HANDLER) - .get(); - - log.info( - "Sent shutdown message to worker: %s, status %s, response: %s", - zkWorker.getWorker().getHost(), - response.getStatus(), - response.getContent() - ); - - if (!response.getStatus().equals(HttpResponseStatus.ACCEPTED)) { - log.error("Shutdown failed for %s! Are you sure the task was running?", taskId); + if (zkWorker == null) { + log.info("Can't shutdown! No worker running task %s", taskId); + return; + } + + try { + final URL url = makeWorkerURL(zkWorker.getWorker(), String.format("/task/%s/shutdown", taskId)); + final StatusResponseHolder response = httpClient.post(url) + .go(RESPONSE_HANDLER) + .get(); + + log.info( + "Sent shutdown message to worker: %s, status %s, response: %s", + zkWorker.getWorker().getHost(), + response.getStatus(), + response.getContent() + ); + + if (!response.getStatus().equals(HttpResponseStatus.ACCEPTED)) { + log.error("Shutdown failed for %s! Are you sure the task was running?", taskId); + } + } + catch (Exception e) { + throw Throwables.propagate(e); } - } - catch (Exception e) { - throw Throwables.propagate(e); } } @@ -417,12 +418,18 @@ public class RemoteTaskRunner implements TaskRunner, TaskLogStreamer /** * Adds a task to the pending queue */ - private void addPendingTask(final RemoteTaskRunnerWorkItem taskRunnerWorkItem) + private RemoteTaskRunnerWorkItem addPendingTask(final Task task) { - log.info("Added pending task %s", taskRunnerWorkItem.getTask().getId()); - - pendingTasks.put(taskRunnerWorkItem.getTask().getId(), taskRunnerWorkItem); + log.info("Added pending task %s", task.getId()); + final RemoteTaskRunnerWorkItem taskRunnerWorkItem = new RemoteTaskRunnerWorkItem( + task.getId(), + SettableFuture.create(), + null + ); + pendingTaskPayloads.put(task.getId(), task); + pendingTasks.put(task.getId(), taskRunnerWorkItem); runPendingTasks(); + return taskRunnerWorkItem; } /** @@ -439,11 +446,14 @@ public class RemoteTaskRunner implements TaskRunner, TaskLogStreamer public Void call() throws Exception { try { - // make a copy of the pending tasks because assignTask may delete tasks from pending and move them + // make a copy of the pending tasks because tryAssignTask may delete tasks from pending and move them // into running status List copy = Lists.newArrayList(pendingTasks.values()); - for (RemoteTaskRunnerWorkItem taskWrapper : copy) { - assignTask(taskWrapper); + for (RemoteTaskRunnerWorkItem taskRunnerWorkItem : copy) { + String taskId = taskRunnerWorkItem.getTaskId(); + if (tryAssignTask(pendingTaskPayloads.get(taskId), taskRunnerWorkItem)) { + pendingTaskPayloads.remove(taskId); + } } } catch (Exception e) { @@ -457,21 +467,30 @@ public class RemoteTaskRunner implements TaskRunner, TaskLogStreamer } /** - * Removes a task from the running queue and clears out the ZK status path of the task. + * Removes a task from the complete queue and clears out the ZK status path of the task. * * @param workerId - the worker that was previously running the task * @param taskId - the task to cleanup */ private void cleanup(final String workerId, final String taskId) { - log.info("Cleaning up [%s]", taskId); - runningTasks.remove(taskId); - final String statusPath = JOINER.join(zkPaths.getIndexerStatusPath(), workerId, taskId); - try { - cf.delete().guaranteed().forPath(statusPath); + if (!started) { + return; } - catch (Exception e) { - log.info("Tried to delete status path[%s] that didn't exist! Must've gone away already?", statusPath); + if (completeTasks.remove(taskId) == null) { + log.makeAlert("WTF?! Asked to cleanup nonexistent task") + .addData("workerId", workerId) + .addData("taskId", taskId) + .emit(); + } else { + log.info("Cleaning up task[%s] on worker[%s]", taskId, workerId); + final String statusPath = JOINER.join(zkPaths.getIndexerStatusPath(), workerId, taskId); + try { + cf.delete().guaranteed().forPath(statusPath); + } + catch (Exception e) { + log.info("Tried to delete status path[%s] that didn't exist! Must've gone away already?", statusPath); + } } } @@ -481,26 +500,34 @@ public class RemoteTaskRunner implements TaskRunner, TaskLogStreamer * needs to bootstrap after a restart. * * @param taskRunnerWorkItem - the task to assign + * @return true iff the task is now assigned */ - private void assignTask(RemoteTaskRunnerWorkItem taskRunnerWorkItem) + private boolean tryAssignTask(final Task task, final RemoteTaskRunnerWorkItem taskRunnerWorkItem) { try { - final String taskId = taskRunnerWorkItem.getTask().getId(); + Preconditions.checkNotNull(task, "task"); + Preconditions.checkNotNull(taskRunnerWorkItem, "taskRunnerWorkItem"); + Preconditions.checkArgument(task.getId().equals(taskRunnerWorkItem.getTaskId()), "task id != workItem id"); - if (runningTasks.containsKey(taskId) || findWorkerRunningTask(taskId) != null) { - log.info("Task[%s] already running.", taskId); + if (runningTasks.containsKey(task.getId()) || findWorkerRunningTask(task.getId()) != null) { + log.info("Task[%s] already running.", task.getId()); + return true; } else { // Nothing running this task, announce it in ZK for a worker to run it - ZkWorker zkWorker = findWorkerForTask(taskRunnerWorkItem.getTask()); + ZkWorker zkWorker = findWorkerForTask(task); if (zkWorker != null) { - announceTask(zkWorker, taskRunnerWorkItem); + announceTask(task, zkWorker, taskRunnerWorkItem); + return true; + } else { + return false; } } } catch (Exception e) { log.makeAlert(e, "Exception while trying to run task") - .addData("taskId", taskRunnerWorkItem.getTask().getId()) + .addData("taskId", taskRunnerWorkItem.getTaskId()) .emit(); + return false; } } @@ -511,9 +538,13 @@ public class RemoteTaskRunner implements TaskRunner, TaskLogStreamer * @param theZkWorker The worker the task is assigned to * @param taskRunnerWorkItem The task to be assigned */ - private void announceTask(ZkWorker theZkWorker, RemoteTaskRunnerWorkItem taskRunnerWorkItem) throws Exception + private void announceTask( + final Task task, + final ZkWorker theZkWorker, + final RemoteTaskRunnerWorkItem taskRunnerWorkItem + ) throws Exception { - final Task task = taskRunnerWorkItem.getTask(); + Preconditions.checkArgument(task.getId().equals(taskRunnerWorkItem.getTaskId()), "task id != workItem id"); final Worker theWorker = theZkWorker.getWorker(); log.info("Coordinator asking Worker[%s] to add task[%s]", theWorker.getHost(), task.getId()); @@ -550,7 +581,7 @@ public class RemoteTaskRunner implements TaskRunner, TaskLogStreamer Stopwatch timeoutStopwatch = new Stopwatch(); timeoutStopwatch.start(); synchronized (statusLock) { - while (!isWorkerRunningTask(theWorker, task)) { + while (!isWorkerRunningTask(theWorker, task.getId())) { final long waitMs = config.getTaskAssignmentTimeout().toStandardDuration().getMillis(); statusLock.wait(waitMs); long elapsed = timeoutStopwatch.elapsed(TimeUnit.MILLISECONDS); @@ -563,7 +594,7 @@ public class RemoteTaskRunner implements TaskRunner, TaskLogStreamer config.getTaskAssignmentTimeout() ); - taskComplete(taskRunnerWorkItem, theZkWorker, task.getId(), TaskStatus.failure(task.getId())); + taskComplete(taskRunnerWorkItem, theZkWorker, TaskStatus.failure(task.getId())); break; } } @@ -575,15 +606,17 @@ public class RemoteTaskRunner implements TaskRunner, TaskLogStreamer * the worker. Status changes indicate the creation or completion of a task. * The RemoteTaskRunner updates state according to these changes. * - * @param worker - contains metadata for a worker that has appeared in ZK + * @param worker contains metadata for a worker that has appeared in ZK + * @return future that will contain a fully initialized worker */ - private ZkWorker addWorker(final Worker worker, PathChildrenCache.StartMode startMode) + private ListenableFuture addWorker(final Worker worker) { log.info("Worker[%s] reportin' for duty!", worker.getHost()); try { final String workerStatusPath = JOINER.join(zkPaths.getIndexerStatusPath(), worker.getHost()); final PathChildrenCache statusCache = pathChildrenCacheFactory.make(cf, workerStatusPath); + final SettableFuture retVal = SettableFuture.create(); final ZkWorker zkWorker = new ZkWorker( worker, statusCache, @@ -597,8 +630,8 @@ public class RemoteTaskRunner implements TaskRunner, TaskLogStreamer @Override public void childEvent(CuratorFramework client, PathChildrenCacheEvent event) throws Exception { - String taskId; - RemoteTaskRunnerWorkItem taskRunnerWorkItem; + final String taskId; + final RemoteTaskRunnerWorkItem taskRunnerWorkItem; synchronized (statusLock) { try { switch (event.getType()) { @@ -617,19 +650,27 @@ public class RemoteTaskRunner implements TaskRunner, TaskLogStreamer ); // Synchronizing state with ZK - statusLock.notify(); + statusLock.notifyAll(); - taskRunnerWorkItem = runningTasks.get(taskId); - if (taskRunnerWorkItem == null) { + final RemoteTaskRunnerWorkItem tmp; + if ((tmp = runningTasks.get(taskId)) != null) { + taskRunnerWorkItem = tmp; + } else { log.warn( - "WTF?! Worker[%s] announcing a status for a task I didn't know about: %s", + "Worker[%s] announced a status for a task I didn't know about, adding to runningTasks: %s", zkWorker.getWorker().getHost(), taskId ); + taskRunnerWorkItem = new RemoteTaskRunnerWorkItem( + taskId, + SettableFuture.create(), + zkWorker.getWorker() + ); + runningTasks.put(taskId, taskRunnerWorkItem); } if (taskStatus.isComplete()) { - taskComplete(taskRunnerWorkItem, zkWorker, taskId, taskStatus); + taskComplete(taskRunnerWorkItem, zkWorker, taskStatus); runPendingTasks(); } break; @@ -638,11 +679,26 @@ public class RemoteTaskRunner implements TaskRunner, TaskLogStreamer taskRunnerWorkItem = runningTasks.remove(taskId); if (taskRunnerWorkItem != null) { log.info("Task[%s] just disappeared!", taskId); - taskRunnerWorkItem.setResult(TaskStatus.failure(taskRunnerWorkItem.getTask().getId())); + taskRunnerWorkItem.setResult(TaskStatus.failure(taskRunnerWorkItem.getTaskId())); } else { log.info("Task[%s] went bye bye.", taskId); } break; + case INITIALIZED: + if (zkWorkers.putIfAbsent(worker.getHost(), zkWorker) == null) { + retVal.set(zkWorker); + } else { + final String message = String.format( + "WTF?! Tried to add already-existing worker[%s]", + worker.getHost() + ); + log.makeAlert(message) + .addData("workerHost", worker.getHost()) + .addData("workerIp", worker.getIp()) + .emit(); + retVal.setException(new IllegalStateException(message)); + } + runPendingTasks(); } } catch (Exception e) { @@ -655,13 +711,8 @@ public class RemoteTaskRunner implements TaskRunner, TaskLogStreamer } } ); - - zkWorker.start(startMode); - zkWorkers.put(worker.getHost(), zkWorker); - - runPendingTasks(); - - return zkWorker; + zkWorker.start(); + return retVal; } catch (Exception e) { throw Throwables.propagate(e); @@ -707,7 +758,7 @@ public class RemoteTaskRunner implements TaskRunner, TaskLogStreamer } log.info("Failing task[%s]", assignedTask); - taskRunnerWorkItem.setResult(TaskStatus.failure(taskRunnerWorkItem.getTask().getId())); + taskRunnerWorkItem.setResult(TaskStatus.failure(taskRunnerWorkItem.getTaskId())); } else { log.warn("RemoteTaskRunner has no knowledge of task[%s]", assignedTask); } @@ -763,19 +814,27 @@ public class RemoteTaskRunner implements TaskRunner, TaskLogStreamer private void taskComplete( RemoteTaskRunnerWorkItem taskRunnerWorkItem, ZkWorker zkWorker, - String taskId, TaskStatus taskStatus ) { - if (taskRunnerWorkItem != null) { - final ListenableFuture result = taskRunnerWorkItem.getResult(); - if (result != null) { - ((SettableFuture) result).set(taskStatus); - } - } - + Preconditions.checkNotNull(taskRunnerWorkItem, "taskRunnerWorkItem"); + Preconditions.checkNotNull(zkWorker, "zkWorker"); + Preconditions.checkNotNull(taskStatus, "taskStatus"); + log.info( + "Worker[%s] completed task[%s] with status[%s]", + zkWorker.getWorker().getHost(), + taskStatus.getId(), + taskStatus.getStatusCode() + ); // Worker is done with this task zkWorker.setLastCompletedTaskTime(new DateTime()); - cleanup(zkWorker.getWorker().getHost(), taskId); + // Move from running -> complete + completeTasks.put(taskStatus.getId(), taskRunnerWorkItem); + runningTasks.remove(taskStatus.getId()); + // Notify interested parties + final ListenableFuture result = taskRunnerWorkItem.getResult(); + if (result != null) { + ((SettableFuture) result).set(taskStatus); + } } } diff --git a/indexing-service/src/main/java/io/druid/indexing/overlord/RemoteTaskRunnerWorkItem.java b/indexing-service/src/main/java/io/druid/indexing/overlord/RemoteTaskRunnerWorkItem.java index 1c1dc7a17a9..76d373a049a 100644 --- a/indexing-service/src/main/java/io/druid/indexing/overlord/RemoteTaskRunnerWorkItem.java +++ b/indexing-service/src/main/java/io/druid/indexing/overlord/RemoteTaskRunnerWorkItem.java @@ -21,7 +21,6 @@ package io.druid.indexing.overlord; import com.google.common.util.concurrent.SettableFuture; import io.druid.indexing.common.TaskStatus; -import io.druid.indexing.common.task.Task; import io.druid.indexing.worker.Worker; import org.joda.time.DateTime; @@ -33,25 +32,25 @@ public class RemoteTaskRunnerWorkItem extends TaskRunnerWorkItem private final Worker worker; public RemoteTaskRunnerWorkItem( - Task task, + String taskId, SettableFuture result, Worker worker ) { - super(task, result); + super(taskId, result); this.result = result; this.worker = worker; } public RemoteTaskRunnerWorkItem( - Task task, + String taskId, SettableFuture result, DateTime createdTime, DateTime queueInsertionTime, Worker worker ) { - super(task, result, createdTime, queueInsertionTime); + super(taskId, result, createdTime, queueInsertionTime); this.result = result; this.worker = worker; } @@ -69,11 +68,11 @@ public class RemoteTaskRunnerWorkItem extends TaskRunnerWorkItem @Override public RemoteTaskRunnerWorkItem withQueueInsertionTime(DateTime time) { - return new RemoteTaskRunnerWorkItem(getTask(), result, getCreatedTime(), time, worker); + return new RemoteTaskRunnerWorkItem(getTaskId(), result, getCreatedTime(), time, worker); } public RemoteTaskRunnerWorkItem withWorker(Worker theWorker) { - return new RemoteTaskRunnerWorkItem(getTask(), result, getCreatedTime(), getQueueInsertionTime(), theWorker); + return new RemoteTaskRunnerWorkItem(getTaskId(), result, getCreatedTime(), getQueueInsertionTime(), theWorker); } } diff --git a/indexing-service/src/main/java/io/druid/indexing/overlord/TaskLockbox.java b/indexing-service/src/main/java/io/druid/indexing/overlord/TaskLockbox.java index 3dc024530b3..7b3ffb09087 100644 --- a/indexing-service/src/main/java/io/druid/indexing/overlord/TaskLockbox.java +++ b/indexing-service/src/main/java/io/druid/indexing/overlord/TaskLockbox.java @@ -19,17 +19,20 @@ package io.druid.indexing.overlord; +import com.google.api.client.repackaged.com.google.common.base.Preconditions; import com.google.common.base.Function; import com.google.common.base.Objects; import com.google.common.base.Optional; import com.google.common.base.Predicate; +import com.google.common.collect.ComparisonChain; import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.common.collect.Maps; +import com.google.common.collect.Ordering; import com.google.common.collect.Sets; import com.google.inject.Inject; -import com.metamx.common.IAE; +import com.metamx.common.Pair; import com.metamx.common.guava.Comparators; import com.metamx.common.guava.FunctionalIterable; import com.metamx.emitter.EmittingLogger; @@ -73,8 +76,91 @@ public class TaskLockbox } /** - * Locks a task without removing it from the queue. Blocks until the lock is acquired. Throws an exception - * if the lock cannot be acquired. + * Wipe out our current in-memory state and resync it from our bundled {@link io.druid.indexing.overlord.TaskStorage}. + */ + public void syncFromStorage() + { + giant.lock(); + + try { + // Load stuff from taskStorage first. If this fails, we don't want to lose all our locks. + final List> storedLocks = Lists.newArrayList(); + for (final Task task : taskStorage.getActiveTasks()) { + for (final TaskLock taskLock : taskStorage.getLocks(task.getId())) { + storedLocks.add(Pair.of(task, taskLock)); + } + } + // Sort locks by version, so we add them back in the order they were acquired. + final Ordering> byVersionOrdering = new Ordering>() + { + @Override + public int compare(Pair left, Pair right) + { + // The second compare shouldn't be necessary, but, whatever. + return ComparisonChain.start() + .compare(left.rhs.getVersion(), right.rhs.getVersion()) + .compare(left.lhs.getId(), right.lhs.getId()) + .result(); + } + }; + running.clear(); + // Bookkeeping for a log message at the end + final Set uniqueTaskIds = Sets.newHashSet(); + int taskLockCount = 0; + for (final Pair taskAndLock : byVersionOrdering.sortedCopy(storedLocks)) { + final Task task = taskAndLock.lhs; + final TaskLock savedTaskLock = taskAndLock.rhs; + if (savedTaskLock.getInterval().toDurationMillis() <= 0) { + // "Impossible", but you never know what crazy stuff can be restored from storage. + log.warn("WTF?! Got lock with empty interval for task: %s", task.getId()); + continue; + } + uniqueTaskIds.add(task.getId()); + final Optional acquiredTaskLock = tryLock( + task, + savedTaskLock.getInterval(), + Optional.of(savedTaskLock.getVersion()) + ); + if (acquiredTaskLock.isPresent() && savedTaskLock.getVersion().equals(acquiredTaskLock.get().getVersion())) { + taskLockCount ++; + log.info( + "Reacquired lock on interval[%s] version[%s] for task: %s", + savedTaskLock.getInterval(), + savedTaskLock.getVersion(), + task.getId() + ); + } else if (acquiredTaskLock.isPresent()) { + taskLockCount ++; + log.info( + "Could not reacquire lock on interval[%s] version[%s] (got version[%s] instead) for task: %s", + savedTaskLock.getInterval(), + savedTaskLock.getVersion(), + acquiredTaskLock.get().getVersion(), + task.getId() + ); + } else { + log.info( + "Could not reacquire lock on interval[%s] version[%s] for task: %s", + savedTaskLock.getInterval(), + savedTaskLock.getVersion(), + task.getId() + ); + } + } + log.info( + "Synced %,d locks for %,d tasks from storage (%,d locks ignored).", + taskLockCount, + uniqueTaskIds.size(), + storedLocks.size() - taskLockCount + ); + } finally { + giant.unlock(); + } + } + + /** + * Acquires a lock on behalf of a task. Blocks until the lock is acquired. Throws an exception if the lock + * cannot be acquired. */ public TaskLock lock(final Task task, final Interval interval) throws InterruptedException { @@ -97,7 +183,8 @@ public class TaskLockbox * Attempt to lock a task, without removing it from the queue. Equivalent to the long form of {@code tryLock} * with no preferred version. * - * @param task task to attempt to lock + * @param task task that wants a lock + * @param interval interval to lock * * @return lock version if lock was acquired, absent otherwise */ @@ -113,22 +200,18 @@ public class TaskLockbox * is only mostly guaranteed, however; we assume clock monotonicity and we assume that callers specifying * {@code preferredVersion} are doing the right thing. * - * @param task task to attempt to lock + * @param task task that wants a lock + * @param interval interval to lock * @param preferredVersion use this version string if one has not yet been assigned * * @return lock version if lock was acquired, absent otherwise */ - public Optional tryLock(final Task task, final Interval interval, final Optional preferredVersion) + private Optional tryLock(final Task task, final Interval interval, final Optional preferredVersion) { giant.lock(); try { - - if(task.getImplicitLockInterval().isPresent() && !task.getImplicitLockInterval().get().equals(interval)) { - // Task may only lock its fixed interval, if present - throw new IAE("Task must lock its fixed interval: %s", task.getId()); - } - + Preconditions.checkArgument(interval.toDurationMillis() > 0, "interval empty"); final String dataSource = task.getDataSource(); final List foundPosses = findLockPossesForInterval(dataSource, interval); final TaskLockPosse posseToUse; @@ -184,9 +267,10 @@ public class TaskLockbox if (posseToUse.getTaskIds().add(task.getId())) { log.info("Added task[%s] to TaskLock[%s]", task.getId(), posseToUse.getTaskLock().getGroupId()); - // Best effort to update task storage facility + // Update task storage facility. If it fails, revoke the lock. try { taskStorage.addLock(task.getId(), posseToUse.getTaskLock()); + return Optional.of(posseToUse.getTaskLock()); } catch(Exception e) { log.makeAlert("Failed to persist lock in storage") .addData("task", task.getId()) @@ -194,12 +278,13 @@ public class TaskLockbox .addData("interval", posseToUse.getTaskLock().getInterval()) .addData("version", posseToUse.getTaskLock().getVersion()) .emit(); + unlock(task, interval); + return Optional.absent(); } } else { log.info("Task[%s] already present in TaskLock[%s]", task.getId(), posseToUse.getTaskLock().getGroupId()); + return Optional.of(posseToUse.getTaskLock()); } - - return Optional.of(posseToUse.getTaskLock()); } finally { giant.unlock(); @@ -271,7 +356,7 @@ public class TaskLockbox // Wake up blocking-lock waiters lockReleaseCondition.signalAll(); - // Best effort to remove lock from storage + // Remove lock from storage. If it cannot be removed, just ignore the failure. try { taskStorage.removeLock(task.getId(), taskLock); } catch(Exception e) { @@ -315,20 +400,6 @@ public class TaskLockbox } } - /** - * Removes all locks from this lockbox. - */ - public void clear() - { - giant.lock(); - - try { - running.clear(); - } finally { - giant.unlock(); - } - } - /** * Return the currently-active lock posses for some task. * @@ -341,17 +412,12 @@ public class TaskLockbox try { final Iterable searchSpace; - if (task.getImplicitLockInterval().isPresent()) { - // Narrow down search using findLockPossesForInterval - searchSpace = findLockPossesForInterval(task.getDataSource(), task.getImplicitLockInterval().get()); + // Scan through all locks for this datasource + final NavigableMap dsRunning = running.get(task.getDataSource()); + if(dsRunning == null) { + searchSpace = ImmutableList.of(); } else { - // Scan through all locks for this datasource - final NavigableMap dsRunning = running.get(task.getDataSource()); - if(dsRunning == null) { - searchSpace = ImmutableList.of(); - } else { - searchSpace = dsRunning.values(); - } + searchSpace = dsRunning.values(); } return ImmutableList.copyOf( diff --git a/indexing-service/src/main/java/io/druid/indexing/overlord/TaskMaster.java b/indexing-service/src/main/java/io/druid/indexing/overlord/TaskMaster.java index 5d5cba4f200..96183d5ae64 100644 --- a/indexing-service/src/main/java/io/druid/indexing/overlord/TaskMaster.java +++ b/indexing-service/src/main/java/io/druid/indexing/overlord/TaskMaster.java @@ -34,7 +34,7 @@ import io.druid.guice.annotations.Self; import io.druid.indexing.common.actions.TaskActionClient; import io.druid.indexing.common.actions.TaskActionClientFactory; import io.druid.indexing.common.task.Task; -import io.druid.indexing.overlord.exec.TaskConsumer; +import io.druid.indexing.overlord.config.TaskQueueConfig; import io.druid.indexing.overlord.scaling.ResourceManagementScheduler; import io.druid.indexing.overlord.scaling.ResourceManagementSchedulerFactory; import io.druid.server.DruidNode; @@ -56,20 +56,22 @@ public class TaskMaster private final LeaderSelector leaderSelector; private final ReentrantLock giant = new ReentrantLock(); private final Condition mayBeStopped = giant.newCondition(); - private final TaskQueue taskQueue; private final TaskActionClientFactory taskActionClientFactory; - private final AtomicReference leaderLifecycleRef = new AtomicReference(null); + private final AtomicReference leaderLifecycleRef = new AtomicReference<>(null); private volatile boolean leading = false; private volatile TaskRunner taskRunner; + private volatile TaskQueue taskQueue; private volatile ResourceManagementScheduler resourceManagementScheduler; private static final EmittingLogger log = new EmittingLogger(TaskMaster.class); @Inject public TaskMaster( - final TaskQueue taskQueue, + final TaskQueueConfig taskQueueConfig, + final TaskLockbox taskLockbox, + final TaskStorage taskStorage, final TaskActionClientFactory taskActionClientFactory, @Self final DruidNode node, final ZkPathsConfig zkPaths, @@ -80,118 +82,99 @@ public class TaskMaster final ServiceEmitter emitter ) { - this.taskQueue = taskQueue; this.taskActionClientFactory = taskActionClientFactory; - this.leaderSelector = new LeaderSelector( - curator, zkPaths.getIndexerLeaderLatchPath(), new LeaderSelectorListener() - { - @Override - public void takeLeadership(CuratorFramework client) throws Exception - { - giant.lock(); + curator, + zkPaths.getIndexerLeaderLatchPath(), + new LeaderSelectorListener() + { + @Override + public void takeLeadership(CuratorFramework client) throws Exception + { + giant.lock(); - try { - log.info("By the power of Grayskull, I have the power!"); + try { + // Make sure the previous leadership cycle is really, really over. + stopLeading(); - taskRunner = runnerFactory.build(); - final TaskConsumer taskConsumer = new TaskConsumer( - taskQueue, - taskRunner, - taskActionClientFactory, - emitter - ); + // I AM THE MASTER OF THE UNIVERSE. + log.info("By the power of Grayskull, I have the power!"); + taskLockbox.syncFromStorage(); + taskRunner = runnerFactory.build(); + taskQueue = new TaskQueue( + taskQueueConfig, + taskStorage, + taskRunner, + taskActionClientFactory, + taskLockbox, + emitter + ); - // Bootstrap task queue and task lockbox (load state stuff from the database) - taskQueue.bootstrap(); - - // Sensible order to start stuff: - final Lifecycle leaderLifecycle = new Lifecycle(); - if (leaderLifecycleRef.getAndSet(leaderLifecycle) != null) { - log.makeAlert("TaskMaster set a new Lifecycle without the old one being cleared! Race condition") - .emit(); - } - - leaderLifecycle.addManagedInstance(taskRunner); - leaderLifecycle.addHandler( - new Lifecycle.Handler() - { - @Override - public void start() throws Exception - { - taskRunner.bootstrap(taskQueue.snapshot()); - } - - @Override - public void stop() - { + // Sensible order to start stuff: + final Lifecycle leaderLifecycle = new Lifecycle(); + if (leaderLifecycleRef.getAndSet(leaderLifecycle) != null) { + log.makeAlert("TaskMaster set a new Lifecycle without the old one being cleared! Race condition") + .emit(); + } + leaderLifecycle.addManagedInstance(taskRunner); + if (taskRunner instanceof RemoteTaskRunner) { + final ScheduledExecutorFactory executorFactory = ScheduledExecutors.createFactory(leaderLifecycle); + resourceManagementScheduler = managementSchedulerFactory.build( + (RemoteTaskRunner) taskRunner, + executorFactory + ); + leaderLifecycle.addManagedInstance(resourceManagementScheduler); + } + leaderLifecycle.addManagedInstance(taskQueue); + leaderLifecycle.addHandler( + new Lifecycle.Handler() + { + @Override + public void start() throws Exception + { + serviceAnnouncer.announce(node); + } + @Override + public void stop() + { + serviceAnnouncer.unannounce(node); + } + } + ); + try { + leaderLifecycle.start(); + leading = true; + while (leading && !Thread.currentThread().isInterrupted()) { + mayBeStopped.await(); } } - ); - leaderLifecycle.addManagedInstance(taskQueue); - - leaderLifecycle.addHandler( - new Lifecycle.Handler() - { - @Override - public void start() throws Exception - { - serviceAnnouncer.announce(node); - } - - @Override - public void stop() - { - serviceAnnouncer.unannounce(node); - } + catch (InterruptedException e) { + // Suppress so we can bow out gracefully } - ); - leaderLifecycle.addManagedInstance(taskConsumer); - - if (taskRunner instanceof RemoteTaskRunner) { - final ScheduledExecutorFactory executorFactory = ScheduledExecutors.createFactory(leaderLifecycle); - resourceManagementScheduler = managementSchedulerFactory.build( - (RemoteTaskRunner) taskRunner, - executorFactory - ); - leaderLifecycle.addManagedInstance(resourceManagementScheduler); - } - - try { - leaderLifecycle.start(); - leading = true; - - while (leading && !Thread.currentThread().isInterrupted()) { - mayBeStopped.await(); + finally { + log.info("Bowing out!"); + stopLeading(); + } + } + catch (Exception e) { + log.makeAlert(e, "Failed to lead").emit(); + throw Throwables.propagate(e); + } + finally { + giant.unlock(); } } - catch (InterruptedException e) { - // Suppress so we can bow out gracefully - } - finally { - log.info("Bowing out!"); - stopLeading(); - } - } - catch (Exception e) { - log.makeAlert(e, "Failed to lead").emit(); - throw Throwables.propagate(e); - } - finally { - giant.unlock(); - } - } - @Override - public void stateChanged(CuratorFramework client, ConnectionState newState) - { - if (newState == ConnectionState.LOST || newState == ConnectionState.SUSPENDED) { - // disconnected from zk. assume leadership is gone - stopLeading(); + @Override + public void stateChanged(CuratorFramework client, ConnectionState newState) + { + if (newState == ConnectionState.LOST || newState == ConnectionState.SUSPENDED) { + // disconnected from zk. assume leadership is gone + stopLeading(); + } + } } - } - } ); leaderSelector.setId(node.getHost()); diff --git a/indexing-service/src/main/java/io/druid/indexing/overlord/TaskQueue.java b/indexing-service/src/main/java/io/druid/indexing/overlord/TaskQueue.java index 580dfd02a0d..693a504542c 100644 --- a/indexing-service/src/main/java/io/druid/indexing/overlord/TaskQueue.java +++ b/indexing-service/src/main/java/io/druid/indexing/overlord/TaskQueue.java @@ -19,172 +19,102 @@ package io.druid.indexing.overlord; +import com.google.api.client.util.Maps; +import com.google.common.base.Function; import com.google.common.base.Optional; import com.google.common.base.Preconditions; import com.google.common.base.Throwables; -import com.google.common.collect.ArrayListMultimap; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; import com.google.common.collect.Lists; -import com.google.common.collect.Multimap; -import com.google.common.collect.Ordering; +import com.google.common.collect.Sets; +import com.google.common.util.concurrent.FutureCallback; +import com.google.common.util.concurrent.Futures; +import com.google.common.util.concurrent.ListenableFuture; +import com.google.common.util.concurrent.ThreadFactoryBuilder; import com.google.inject.Inject; +import com.metamx.common.ISE; +import com.metamx.common.concurrent.ScheduledExecutors; import com.metamx.common.lifecycle.LifecycleStart; import com.metamx.common.lifecycle.LifecycleStop; import com.metamx.emitter.EmittingLogger; -import io.druid.indexing.common.TaskLock; +import com.metamx.emitter.service.ServiceEmitter; +import com.metamx.emitter.service.ServiceMetricEvent; import io.druid.indexing.common.TaskStatus; +import io.druid.indexing.common.actions.TaskActionClientFactory; import io.druid.indexing.common.task.Task; +import io.druid.indexing.overlord.config.TaskQueueConfig; +import org.joda.time.DateTime; +import org.joda.time.Duration; import java.util.List; import java.util.Map; +import java.util.Set; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.locks.Condition; import java.util.concurrent.locks.ReentrantLock; /** - * Interface between task producers and task consumers. + * Interface between task producers and the task runner. *

- * The queue accepts tasks from producers using {@link #add} and delivers tasks to consumers using either - * {@link #take} or {@link #poll}. Ordering is mostly-FIFO, with deviations when the natural next task would conflict - * with a currently-running task. In that case, tasks are skipped until a runnable one is found. + * This object accepts tasks from producers using {@link #add} and manages delivery of these tasks to a + * {@link TaskRunner}. Tasks will run in a mostly-FIFO order, with deviations when the natural next task is not ready + * in time (based on its {@link Task#isReady} method). *

- * To manage locking, the queue keeps track of currently-running tasks as {@link io.druid.indexing.common.TaskLock} objects. The idea is that - * only one TaskLock can be running on a particular dataSource + interval, and that TaskLock has a single version - * string that all tasks in the group must use to publish segments. Tasks in the same TaskLock may run concurrently. - *

- * For persistence, the queue saves new tasks from {@link #add} and task status updates from {@link #notify} using a - * {@link TaskStorage} obj - *

- * To support leader election of our containing system, the queue can be stopped (in which case it will not accept - * any new tasks, or hand out any more tasks, until started again). + * For persistence, we save all new tasks and task status changes using a {@link TaskStorage} object. */ public class TaskQueue { - private final List queue = Lists.newLinkedList(); + private final List tasks = Lists.newArrayList(); + private final Map> taskFutures = Maps.newHashMap(); + + private final TaskQueueConfig config; private final TaskStorage taskStorage; + private final TaskRunner taskRunner; + private final TaskActionClientFactory taskActionClientFactory; private final TaskLockbox taskLockbox; + private final ServiceEmitter emitter; + private final ReentrantLock giant = new ReentrantLock(); - private final Condition workMayBeAvailable = giant.newCondition(); + private final Condition managementMayBeNecessary = giant.newCondition(); + private final ExecutorService managerExec = Executors.newSingleThreadExecutor( + new ThreadFactoryBuilder() + .setDaemon(false) + .setNameFormat("TaskQueue-Manager").build() + ); + private final ScheduledExecutorService storageSyncExec = Executors.newSingleThreadScheduledExecutor( + new ThreadFactoryBuilder() + .setDaemon(false) + .setNameFormat("TaskQueue-StorageSync").build() + ); private volatile boolean active = false; private static final EmittingLogger log = new EmittingLogger(TaskQueue.class); @Inject - public TaskQueue(TaskStorage taskStorage, TaskLockbox taskLockbox) + public TaskQueue( + TaskQueueConfig config, + TaskStorage taskStorage, + TaskRunner taskRunner, + TaskActionClientFactory taskActionClientFactory, + TaskLockbox taskLockbox, + ServiceEmitter emitter + ) { + this.config = Preconditions.checkNotNull(config, "config"); this.taskStorage = Preconditions.checkNotNull(taskStorage, "taskStorage"); + this.taskRunner = Preconditions.checkNotNull(taskRunner, "taskRunner"); + this.taskActionClientFactory = Preconditions.checkNotNull(taskActionClientFactory, "taskActionClientFactory"); this.taskLockbox = Preconditions.checkNotNull(taskLockbox, "taskLockbox"); + this.emitter = Preconditions.checkNotNull(emitter, "emitter"); } /** - * Bootstraps this task queue and associated task lockbox. Clears the lockbox before running. Should be called - * while the queue is stopped. It is not a good idea to start the queue if this method fails. - */ - public void bootstrap() - { - // NOTE: Bootstraps can resurrect bogus stuff caused by leader races or whatevs. - - // We may want to periodically fixup the database to refer to what we think is happening, to prevent - // this from occurring and also so that bogus stuff is detected by clients in a timely manner. - - giant.lock(); - - try { - Preconditions.checkState(!active, "queue must be stopped"); - - log.info("Bootstrapping queue (and associated lockbox)"); - - queue.clear(); - taskLockbox.clear(); - - // Get all running tasks and their locks - final Multimap tasksByLock = ArrayListMultimap.create(); - - for (final Task task : taskStorage.getRunningTasks()) { - try { - final List taskLocks = taskStorage.getLocks(task.getId()); - - queue.add(task); - - for (final TaskLock taskLock : taskLocks) { - tasksByLock.put(taskLock, task); - } - } - catch (Exception e) { - log.makeAlert("Failed to bootstrap task").addData("task", task.getId()).emit(); - throw Throwables.propagate(e); - } - } - - // Sort locks by version - final Ordering> byVersionOrdering = new Ordering>() - { - @Override - public int compare(Map.Entry left, Map.Entry right) - { - return left.getKey().getVersion().compareTo(right.getKey().getVersion()); - } - }; - - // Acquire as many locks as possible, in version order - for(final Map.Entry taskAndLock : byVersionOrdering.sortedCopy(tasksByLock.entries())) { - final Task task = taskAndLock.getValue(); - final TaskLock savedTaskLock = taskAndLock.getKey(); - - final Optional acquiredTaskLock = taskLockbox.tryLock( - task, - savedTaskLock.getInterval(), - Optional.of(savedTaskLock.getVersion()) - ); - - if(acquiredTaskLock.isPresent() && savedTaskLock.getVersion().equals(acquiredTaskLock.get().getVersion())) { - log.info( - "Reacquired lock on interval[%s] version[%s] for task: %s", - savedTaskLock.getInterval(), - savedTaskLock.getVersion(), - task.getId() - ); - } else if(acquiredTaskLock.isPresent()) { - log.info( - "Could not reacquire lock on interval[%s] version[%s] (got version[%s] instead) for task: %s", - savedTaskLock.getInterval(), - savedTaskLock.getVersion(), - acquiredTaskLock.get().getVersion(), - task.getId() - ); - } else { - log.info( - "Could not reacquire lock on interval[%s] version[%s] for task: %s", - savedTaskLock.getInterval(), - savedTaskLock.getVersion(), - task.getId() - ); - } - } - - log.info("Bootstrapped %,d tasks with %,d locks. Ready to go!", queue.size(), tasksByLock.keySet().size()); - } finally { - giant.unlock(); - } - } - - /** - * Returns an immutable snapshot of the current status of this queue. - */ - public List snapshot() - { - giant.lock(); - - try { - return ImmutableList.copyOf(queue); - } finally { - giant.unlock(); - } - } - - /** - * Starts this task queue. Allows {@link #add(Task)} to accept new tasks. This should not be called on - * an already-started queue. + * Starts this task queue. Allows {@link #add(Task)} to accept new tasks. */ @LifecycleStart public void start() @@ -193,9 +123,63 @@ public class TaskQueue try { Preconditions.checkState(!active, "queue must be stopped"); - active = true; - workMayBeAvailable.signalAll(); + syncFromStorage(); + managerExec.submit( + new Runnable() + { + @Override + public void run() + { + while (true) { + try { + manage(); + break; + } + catch (InterruptedException e) { + log.info("Interrupted, exiting!"); + break; + } + catch (Exception e) { + final long restartDelay = config.getRestartDelay().getMillis(); + log.makeAlert(e, "Failed to manage").addData("restartDelay", restartDelay).emit(); + try { + Thread.sleep(restartDelay); + } + catch (InterruptedException e2) { + log.info("Interrupted, exiting!"); + break; + } + } + } + } + } + ); + ScheduledExecutors.scheduleAtFixedRate( + storageSyncExec, + config.getStorageSyncRate(), + new Callable() + { + @Override + public ScheduledExecutors.Signal call() + { + try { + syncFromStorage(); + } + catch (Exception e) { + if (active) { + log.makeAlert(e, "Failed to sync with storage").emit(); + } + } + if (active) { + return ScheduledExecutors.Signal.REPEAT; + } else { + return ScheduledExecutors.Signal.STOP; + } + } + } + ); + managementMayBeNecessary.signalAll(); } finally { giant.unlock(); @@ -203,8 +187,7 @@ public class TaskQueue } /** - * Shuts down the queue, for now. This may safely be called on an already-stopped queue. The queue may be restarted - * if desired. + * Shuts down the queue. */ @LifecycleStop public void stop() @@ -212,16 +195,100 @@ public class TaskQueue giant.lock(); try { - log.info("Naptime! Shutting down until we are started again."); - queue.clear(); - taskLockbox.clear(); + tasks.clear(); + taskFutures.clear(); active = false; + managerExec.shutdownNow(); + storageSyncExec.shutdownNow(); + managementMayBeNecessary.signalAll(); } finally { giant.unlock(); } } + /** + * Main task runner management loop. Meant to run forever, or, at least until we're stopped. + */ + private void manage() throws InterruptedException + { + log.info("Beginning management in %s.", config.getStartDelay()); + Thread.sleep(config.getStartDelay().getMillis()); + + while (active) { + giant.lock(); + + try { + // Task futures available from the taskRunner + final Map> runnerTaskFutures = Maps.newHashMap(); + for (final TaskRunnerWorkItem workItem : taskRunner.getKnownTasks()) { + runnerTaskFutures.put(workItem.getTaskId(), workItem.getResult()); + } + // Attain futures for all active tasks (assuming they are ready to run). + // Copy tasks list, as notifyStatus may modify it. + for (final Task task : ImmutableList.copyOf(tasks)) { + if (!taskFutures.containsKey(task.getId())) { + final ListenableFuture runnerTaskFuture; + if (runnerTaskFutures.containsKey(task.getId())) { + runnerTaskFuture = runnerTaskFutures.get(task.getId()); + } else { + // Task should be running, so run it. + final boolean taskIsReady; + try { + taskIsReady = task.isReady(taskActionClientFactory.create(task)); + } + catch (Exception e) { + log.warn(e, "Exception thrown during isReady for task: %s", task.getId()); + notifyStatus(task, TaskStatus.failure(task.getId())); + continue; + } + if (taskIsReady) { + log.info("Asking taskRunner to run: %s", task.getId()); + runnerTaskFuture = taskRunner.run(task); + } else { + continue; + } + } + taskFutures.put(task.getId(), attachCallbacks(task, runnerTaskFuture)); + } + } + // Kill tasks that shouldn't be running + final Set tasksToKill = Sets.difference( + runnerTaskFutures.keySet(), + ImmutableSet.copyOf( + Lists.transform( + tasks, + new Function() + { + @Override + public String apply(Task task) + { + return task.getId(); + } + } + ) + ) + ); + if (!tasksToKill.isEmpty()) { + log.info("Asking taskRunner to clean up %,d tasks.", tasksToKill.size()); + for (final String taskId : tasksToKill) { + try { + taskRunner.shutdown(taskId); + } catch (Exception e) { + log.warn(e, "TaskRunner failed to clean up task: %s", taskId); + } + } + } + // awaitNanos because management may become necessary without this condition signalling, + // due to e.g. tasks becoming ready when other folks mess with the TaskLockbox. + managementMayBeNecessary.awaitNanos(60000000000L /* 60 seconds */); + } + finally { + giant.unlock(); + } + } + } + /** * Adds some work to the queue and the underlying task storage facility with a generic "running" status. * @@ -236,26 +303,20 @@ public class TaskQueue try { Preconditions.checkState(active, "Queue is not active!"); Preconditions.checkNotNull(task, "task"); + Preconditions.checkState(tasks.size() < config.getMaxSize(), "Too many tasks (max = %,d)", config.getMaxSize()); // If this throws with any sort of exception, including TaskExistsException, we don't want to // insert the task into our queue. try { taskStorage.insert(task, TaskStatus.running(task.getId())); - } catch (TaskExistsException e) { + } + catch (TaskExistsException e) { log.warn("Attempt to add task twice: %s", task.getId()); throw Throwables.propagate(e); } - queue.add(task); - workMayBeAvailable.signalAll(); - - // Attempt to add this task to a running task group. Silently continue if this is not possible. - // The main reason this is here is so when subtasks are added, they end up in the same task group - // as their parent whenever possible. - if(task.getImplicitLockInterval().isPresent()) { - taskLockbox.tryLock(task, task.getImplicitLockInterval().get()); - } - + tasks.add(task); + managementMayBeNecessary.signalAll(); return true; } finally { @@ -264,62 +325,22 @@ public class TaskQueue } /** - * Locks and returns next doable work from the queue. Blocks if there is no doable work. - * - * @return runnable task + * Shuts down a task if it has not yet finished. + * @param taskId task to kill */ - public Task take() throws InterruptedException + public void shutdown(final String taskId) { giant.lock(); try { - Task task; - - log.info("Waiting for work..."); - - while ((task = poll()) == null) { - // awaitNanos because work may become available without this condition signalling, - // due to other folks messing with the taskLockbox - workMayBeAvailable.awaitNanos(1000000000L /* 1 second */); - } - - return task; - } - finally { - giant.unlock(); - } - } - - /** - * Locks and removes next doable work from the queue. Returns null if there is no doable work. - * - * @return runnable task or null - */ - public Task poll() - { - giant.lock(); - - try { - for (final Task task : queue) { - if(task.getImplicitLockInterval().isPresent()) { - // If this task has a fixed interval, attempt to lock it right now. - final Optional maybeLock = taskLockbox.tryLock(task, task.getImplicitLockInterval().get()); - if(maybeLock.isPresent()) { - log.info("Task claimed with fixed interval lock: %s", task.getId()); - queue.remove(task); - return task; - } - } else { - // No fixed interval. Let's just run this and see what happens. - log.info("Task claimed with no fixed interval lock: %s", task.getId()); - queue.remove(task); - return task; + Preconditions.checkNotNull(taskId, "taskId"); + for (final Task task : tasks) { + if (task.getId().equals(taskId)) { + notifyStatus(task, TaskStatus.failure(taskId)); + break; } } - - return null; - } - finally { + } finally { giant.unlock(); } } @@ -329,14 +350,14 @@ public class TaskQueue * the task storage facility. If the status is a completed status, the task will be unlocked and no further * updates will be accepted. * - * @param task task to update + * @param task task to update * @param taskStatus new task status * * @throws NullPointerException if task or status is null * @throws IllegalArgumentException if the task ID does not match the status ID * @throws IllegalStateException if this queue is currently shut down */ - public void notify(final Task task, final TaskStatus taskStatus) + private void notifyStatus(final Task task, final TaskStatus taskStatus) { giant.lock(); @@ -350,38 +371,156 @@ public class TaskQueue task.getId(), taskStatus.getId() ); - - // Save status to DB - boolean didPersistStatus = false; + // Inform taskRunner that this task can be shut down try { - final Optional previousStatus = taskStorage.getStatus(task.getId()); - if (!previousStatus.isPresent() || !previousStatus.get().isRunnable()) { - log.makeAlert("Ignoring notification for dead task").addData("task", task.getId()).emit(); - return; - } else { - taskStorage.setStatus(taskStatus); - didPersistStatus = true; - } - } catch(Exception e) { - log.makeAlert(e, "Failed to persist status for task") - .addData("task", task.getId()) - .addData("statusCode", taskStatus.getStatusCode()) - .emit(); + taskRunner.shutdown(task.getId()); + } catch (Exception e) { + log.warn(e, "TaskRunner failed to cleanup task after completion: %s", task.getId()); } - - if(taskStatus.isComplete()) { - if(didPersistStatus) { - log.info("Task done: %s", task); - taskLockbox.unlock(task); - } else { - log.warn("Status could not be persisted! Reinserting task: %s", task.getId()); - queue.add(task); + // Remove from running tasks + int removed = 0; + for (int i = tasks.size() - 1 ; i >= 0 ; i--) { + if (tasks.get(i).getId().equals(task.getId())) { + removed ++; + tasks.remove(i); + break; + } + } + if (removed == 0) { + log.warn("Unknown task completed: %s", task.getId()); + } else if (removed > 1) { + log.makeAlert("Removed multiple copies of task").addData("count", removed).addData("task", task.getId()).emit(); + } + // Remove from futures list + taskFutures.remove(task.getId()); + if (removed > 0) { + // If we thought this task should be running, save status to DB + try { + final Optional previousStatus = taskStorage.getStatus(task.getId()); + if (!previousStatus.isPresent() || !previousStatus.get().isRunnable()) { + log.makeAlert("Ignoring notification for already-complete task").addData("task", task.getId()).emit(); + } else { + taskStorage.setStatus(taskStatus); + taskLockbox.unlock(task); + log.info("Task done: %s", task); + managementMayBeNecessary.signalAll(); + } + } + catch (Exception e) { + log.makeAlert(e, "Failed to persist status for task") + .addData("task", task.getId()) + .addData("statusCode", taskStatus.getStatusCode()) + .emit(); } - workMayBeAvailable.signalAll(); } } finally { giant.unlock(); } } + + /** + * Attach success and failure handlers to a task status future, such that when it completes, we perform the + * appropriate updates. + * + * @param statusFuture a task status future + * + * @return the same future, for convenience + */ + private ListenableFuture attachCallbacks(final Task task, final ListenableFuture statusFuture) + { + final ServiceMetricEvent.Builder metricBuilder = new ServiceMetricEvent.Builder() + .setUser2(task.getDataSource()) + .setUser4(task.getType()); + Futures.addCallback( + statusFuture, + new FutureCallback() + { + @Override + public void onSuccess(final TaskStatus status) + { + log.info("Received %s status for task: %s", status.getStatusCode(), status.getId()); + handleStatus(status); + } + + @Override + public void onFailure(final Throwable t) + { + log.makeAlert(t, "Failed to run task") + .addData("task", task.getId()) + .addData("type", task.getType()) + .addData("dataSource", task.getDataSource()) + .emit(); + handleStatus(TaskStatus.failure(task.getId())); + } + + private void handleStatus(final TaskStatus status) + { + try { + // If we're not supposed to be running anymore, don't do anything. Somewhat racey if the flag gets set + // after we check and before we commit the database transaction, but better than nothing. + if (!active) { + log.info("Abandoning task due to shutdown: %s", task.getId()); + return; + } + + notifyStatus(task, status); + + // Emit event and log, if the task is done + if (status.isComplete()) { + metricBuilder.setUser3(status.getStatusCode().toString()); + emitter.emit(metricBuilder.build("indexer/time/run/millis", status.getDuration())); + + log.info( + "Task %s: %s (%d run duration)", + status.getStatusCode(), + task, + status.getDuration() + ); + } + } + catch (Exception e) { + log.makeAlert(e, "Failed to handle task status") + .addData("task", task.getId()) + .addData("statusCode", status.getStatusCode()) + .emit(); + } + } + } + ); + return statusFuture; + } + + /** + * Resync the contents of this task queue with our storage facility. Useful to make sure our in-memory state + * corresponds to the storage facility even if the latter is manually modified. + */ + private void syncFromStorage() + { + giant.lock(); + + try { + if (active) { + final List newTasks = taskStorage.getActiveTasks(); + log.info( + "Synced %,d tasks from storage (%,d tasks added, %,d tasks removed).", + newTasks.size(), + Sets.difference(Sets.newHashSet(newTasks), Sets.newHashSet(tasks)).size(), + Sets.difference(Sets.newHashSet(tasks), Sets.newHashSet(newTasks)).size() + ); + tasks.clear(); + tasks.addAll(newTasks); + managementMayBeNecessary.signalAll(); + } else { + log.info("Not active. Skipping storage sync."); + } + } + catch (Exception e) { + log.warn(e, "Failed to sync tasks from storage!"); + throw Throwables.propagate(e); + } + finally { + giant.unlock(); + } + } } diff --git a/indexing-service/src/main/java/io/druid/indexing/overlord/TaskRunner.java b/indexing-service/src/main/java/io/druid/indexing/overlord/TaskRunner.java index 6509c975cdf..0b4b5e3ff89 100644 --- a/indexing-service/src/main/java/io/druid/indexing/overlord/TaskRunner.java +++ b/indexing-service/src/main/java/io/druid/indexing/overlord/TaskRunner.java @@ -24,34 +24,24 @@ import io.druid.indexing.common.TaskStatus; import io.druid.indexing.common.task.Task; import java.util.Collection; -import java.util.List; /** - * Interface for handing off tasks. Used by a {@link io.druid.indexing.overlord.exec.TaskConsumer} to - * run tasks that have been locked. + * Interface for handing off tasks. Managed by a {@link io.druid.indexing.overlord.TaskQueue}. */ public interface TaskRunner { - /** - * Provide a new task runner with a list of tasks that may already be running. Will be called once shortly - * after instantiation and before any calls to {@link #run}. Bootstrapping should not be construed as a command - * to run the tasks; they will be passed to {@link #run} one-by-one when this is desired. Some bootstrapped tasks - * may not actually be running (for example, if they are currently held back due to not having a lock). - * - * @param tasks the tasks - */ - public void bootstrap(List tasks); - /** * Run a task. The returned status should be some kind of completed status. * * @param task task to run + * * @return task status, eventually */ public ListenableFuture run(Task task); /** - * Best-effort task shutdown. May or may not do anything. + * Inform the task runner it can clean up any resources associated with a task. This implies shutdown of any + * currently-running tasks. */ public void shutdown(String taskid); @@ -59,5 +49,7 @@ public interface TaskRunner public Collection getPendingTasks(); + public Collection getKnownTasks(); + public Collection getWorkers(); } diff --git a/indexing-service/src/main/java/io/druid/indexing/overlord/TaskRunnerWorkItem.java b/indexing-service/src/main/java/io/druid/indexing/overlord/TaskRunnerWorkItem.java index 4d4cac6ef70..2963c875257 100644 --- a/indexing-service/src/main/java/io/druid/indexing/overlord/TaskRunnerWorkItem.java +++ b/indexing-service/src/main/java/io/druid/indexing/overlord/TaskRunnerWorkItem.java @@ -19,11 +19,11 @@ package io.druid.indexing.overlord; +import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.collect.ComparisonChain; import com.google.common.util.concurrent.ListenableFuture; import io.druid.indexing.common.TaskStatus; -import io.druid.indexing.common.task.Task; import org.joda.time.DateTime; import org.joda.time.DateTimeComparator; @@ -32,38 +32,39 @@ import org.joda.time.DateTimeComparator; */ public class TaskRunnerWorkItem implements Comparable { - private final Task task; + private final String taskId; private final ListenableFuture result; private final DateTime createdTime; private final DateTime queueInsertionTime; public TaskRunnerWorkItem( - Task task, + String taskId, ListenableFuture result ) { - this(task, result, new DateTime(), new DateTime()); + this(taskId, result, new DateTime(), new DateTime()); } public TaskRunnerWorkItem( - Task task, + String taskId, ListenableFuture result, DateTime createdTime, DateTime queueInsertionTime ) { - this.task = task; + this.taskId = taskId; this.result = result; this.createdTime = createdTime; this.queueInsertionTime = queueInsertionTime; } @JsonProperty - public Task getTask() + public String getTaskId() { - return task; + return taskId; } + @JsonIgnore public ListenableFuture getResult() { return result; @@ -83,7 +84,7 @@ public class TaskRunnerWorkItem implements Comparable public TaskRunnerWorkItem withQueueInsertionTime(DateTime time) { - return new TaskRunnerWorkItem(task, result, createdTime, time); + return new TaskRunnerWorkItem(taskId, result, createdTime, time); } @Override @@ -91,7 +92,7 @@ public class TaskRunnerWorkItem implements Comparable { return ComparisonChain.start() .compare(createdTime, taskRunnerWorkItem.getCreatedTime(), DateTimeComparator.getInstance()) - .compare(task.getId(), taskRunnerWorkItem.getTask().getId()) + .compare(taskId, taskRunnerWorkItem.getTaskId()) .result(); } @@ -99,9 +100,10 @@ public class TaskRunnerWorkItem implements Comparable public String toString() { return "TaskRunnerWorkItem{" + - "task=" + task + + "taskId='" + taskId + '\'' + ", result=" + result + ", createdTime=" + createdTime + + ", queueInsertionTime=" + queueInsertionTime + '}'; } } diff --git a/indexing-service/src/main/java/io/druid/indexing/overlord/TaskStorage.java b/indexing-service/src/main/java/io/druid/indexing/overlord/TaskStorage.java index b74dc0d9c1a..fb289459256 100644 --- a/indexing-service/src/main/java/io/druid/indexing/overlord/TaskStorage.java +++ b/indexing-service/src/main/java/io/druid/indexing/overlord/TaskStorage.java @@ -77,9 +77,17 @@ public interface TaskStorage public List getAuditLogs(String taskid); /** - * Returns a list of currently-running tasks as stored in the storage facility, in no particular order. + * Returns a list of currently running or pending tasks as stored in the storage facility. No particular order + * is guaranteed, but implementations are encouraged to return tasks in ascending order of creation. */ - public List getRunningTasks(); + public List getActiveTasks(); + + /** + * Returns a list of recently finished task statuses as stored in the storage facility. No particular order + * is guaranteed, but implementations are encouraged to return tasks in descending order of creation. No particular + * standard of "recent" is guaranteed, and in fact, this method is permitted to simply return nothing. + */ + public List getRecentlyFinishedTaskStatuses(); /** * Returns a list of locks for a particular task. diff --git a/indexing-service/src/main/java/io/druid/indexing/overlord/TaskStorageQueryAdapter.java b/indexing-service/src/main/java/io/druid/indexing/overlord/TaskStorageQueryAdapter.java index db03ab67ff7..67ea11dcf33 100644 --- a/indexing-service/src/main/java/io/druid/indexing/overlord/TaskStorageQueryAdapter.java +++ b/indexing-service/src/main/java/io/druid/indexing/overlord/TaskStorageQueryAdapter.java @@ -21,21 +21,17 @@ package io.druid.indexing.overlord; import com.google.common.base.Function; import com.google.common.base.Optional; -import com.google.common.base.Predicate; -import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import com.google.common.collect.Sets; import com.google.inject.Inject; -import com.metamx.common.guava.FunctionalIterable; import io.druid.indexing.common.TaskStatus; import io.druid.indexing.common.actions.SegmentInsertAction; -import io.druid.indexing.common.actions.SpawnTasksAction; import io.druid.indexing.common.actions.TaskAction; import io.druid.indexing.common.task.Task; import io.druid.timeline.DataSegment; +import javax.annotation.Nullable; import java.util.List; -import java.util.Map; import java.util.Set; /** @@ -51,132 +47,41 @@ public class TaskStorageQueryAdapter this.storage = storage; } + public List getActiveTasks() + { + return storage.getActiveTasks(); + } + + public List getRecentlyFinishedTaskStatuses() + { + return storage.getRecentlyFinishedTaskStatuses(); + } + + public Optional getTask(final String taskid) + { + return storage.getTask(taskid); + } + public Optional getStatus(final String taskid) { return storage.getStatus(taskid); } /** - * Returns all recursive task statuses for a particular task, staying within the same task group. Includes that - * task, plus any tasks it spawned, and so on. Does not include spawned tasks that ended up in a different task - * group. Does not include this task's parents or siblings. - */ - public Map> getSameGroupChildStatuses(final String taskid) - { - final Optional taskOptional = storage.getTask(taskid); - final Optional statusOptional = storage.getStatus(taskid); - final ImmutableMap.Builder> resultBuilder = ImmutableMap.builder(); - - resultBuilder.put(taskid, statusOptional); - - final Iterable nextTasks = FunctionalIterable - .create(storage.getAuditLogs(taskid)).filter( - new Predicate() - { - @Override - public boolean apply(TaskAction taskAction) - { - return taskAction instanceof SpawnTasksAction; - } - } - ).transformCat( - new Function>() - { - @Override - public Iterable apply(TaskAction taskAction) - { - return ((SpawnTasksAction) taskAction).getNewTasks(); - } - } - ); - - if(taskOptional.isPresent() && statusOptional.isPresent()) { - for(final Task nextTask : nextTasks) { - if(nextTask.getGroupId().equals(taskOptional.get().getGroupId())) { - resultBuilder.putAll(getSameGroupChildStatuses(nextTask.getId())); - } - } - } - - return resultBuilder.build(); - } - - /** - * Like {@link #getSameGroupChildStatuses}, but flattens the recursive statuses into a single, merged status. - */ - public Optional getSameGroupMergedStatus(final String taskid) - { - final Map> statuses = getSameGroupChildStatuses(taskid); - - int nSuccesses = 0; - int nFailures = 0; - int nTotal = 0; - int nPresent = 0; - - for(final Optional statusOption : statuses.values()) { - nTotal ++; - - if(statusOption.isPresent()) { - nPresent ++; - - final TaskStatus status = statusOption.get(); - - if(status.isSuccess()) { - nSuccesses ++; - } else if(status.isFailure()) { - nFailures ++; - } - } - } - - final Optional status; - - if(nPresent == 0) { - status = Optional.absent(); - } else if(nSuccesses == nTotal) { - status = Optional.of(TaskStatus.success(taskid)); - } else if(nFailures > 0) { - status = Optional.of(TaskStatus.failure(taskid)); - } else { - status = Optional.of(TaskStatus.running(taskid)); - } - - return status; - } - - /** - * Returns all segments created by descendants for a particular task that stayed within the same task group. Includes - * that task, plus any tasks it spawned, and so on. Does not include spawned tasks that ended up in a different task - * group. Does not include this task's parents or siblings. + * Returns all segments created by this task. * * This method is useful when you want to figure out all of the things a single task spawned. It does pose issues * with the result set perhaps growing boundlessly and we do not do anything to protect against that. Use at your * own risk and know that at some point, we might adjust this to actually enforce some sort of limits. */ - public Set getSameGroupNewSegments(final String taskid) + public Set getInsertedSegments(final String taskid) { - final Optional taskOptional = storage.getTask(taskid); final Set segments = Sets.newHashSet(); - final List nextTasks = Lists.newArrayList(); - - for(final TaskAction action : storage.getAuditLogs(taskid)) { - if(action instanceof SpawnTasksAction) { - nextTasks.addAll(((SpawnTasksAction) action).getNewTasks()); - } - - if(action instanceof SegmentInsertAction) { + for (final TaskAction action : storage.getAuditLogs(taskid)) { + if (action instanceof SegmentInsertAction) { segments.addAll(((SegmentInsertAction) action).getSegments()); } } - - if(taskOptional.isPresent()) { - for(final Task nextTask : nextTasks) { - if(nextTask.getGroupId().equals(taskOptional.get().getGroupId())) { - segments.addAll(getSameGroupNewSegments(nextTask.getId())); - } - } - } - return segments; } } diff --git a/indexing-service/src/main/java/io/druid/indexing/overlord/ThreadPoolTaskRunner.java b/indexing-service/src/main/java/io/druid/indexing/overlord/ThreadPoolTaskRunner.java index 78e4b9e30ec..2cc94ac7400 100644 --- a/indexing-service/src/main/java/io/druid/indexing/overlord/ThreadPoolTaskRunner.java +++ b/indexing-service/src/main/java/io/druid/indexing/overlord/ThreadPoolTaskRunner.java @@ -19,7 +19,7 @@ package io.druid.indexing.overlord; -import com.google.common.base.Function; +import com.google.api.client.repackaged.com.google.common.base.Preconditions; import com.google.common.base.Throwables; import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; @@ -46,7 +46,6 @@ import org.joda.time.Interval; import java.io.File; import java.util.Collection; -import java.util.List; import java.util.Set; import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentSkipListSet; @@ -58,7 +57,7 @@ public class ThreadPoolTaskRunner implements TaskRunner, QuerySegmentWalker { private final TaskToolboxFactory toolboxFactory; private final ListeningExecutorService exec; - private final Set runningItems = new ConcurrentSkipListSet(); + private final Set runningItems = new ConcurrentSkipListSet<>(); private static final EmittingLogger log = new EmittingLogger(ThreadPoolTaskRunner.class); @@ -67,7 +66,7 @@ public class ThreadPoolTaskRunner implements TaskRunner, QuerySegmentWalker TaskToolboxFactory toolboxFactory ) { - this.toolboxFactory = toolboxFactory; + this.toolboxFactory = Preconditions.checkNotNull(toolboxFactory, "toolboxFactory"); this.exec = MoreExecutors.listeningDecorator(Execs.singleThreaded("task-runner-%d")); } @@ -77,19 +76,12 @@ public class ThreadPoolTaskRunner implements TaskRunner, QuerySegmentWalker exec.shutdownNow(); } - @Override - public void bootstrap(List tasks) - { - // do nothing - } - @Override public ListenableFuture run(final Task task) { final TaskToolbox toolbox = toolboxFactory.build(task); - final ListenableFuture statusFuture = exec.submit(new ExecutorServiceTaskRunnerCallable(task, toolbox)); - - final TaskRunnerWorkItem taskRunnerWorkItem = new TaskRunnerWorkItem(task, statusFuture); + final ListenableFuture statusFuture = exec.submit(new ThreadPoolTaskRunnerCallable(task, toolbox)); + final ThreadPoolTaskRunnerWorkItem taskRunnerWorkItem = new ThreadPoolTaskRunnerWorkItem(task, statusFuture); runningItems.add(taskRunnerWorkItem); Futures.addCallback( statusFuture, new FutureCallback() @@ -115,7 +107,7 @@ public class ThreadPoolTaskRunner implements TaskRunner, QuerySegmentWalker public void shutdown(final String taskid) { for (final TaskRunnerWorkItem runningItem : runningItems) { - if (runningItem.getTask().getId().equals(taskid)) { + if (runningItem.getTaskId().equals(taskid)) { runningItem.getResult().cancel(true); } } @@ -124,7 +116,7 @@ public class ThreadPoolTaskRunner implements TaskRunner, QuerySegmentWalker @Override public Collection getRunningTasks() { - return ImmutableList.copyOf(runningItems); + return ImmutableList.copyOf(runningItems); } @Override @@ -133,6 +125,12 @@ public class ThreadPoolTaskRunner implements TaskRunner, QuerySegmentWalker return ImmutableList.of(); } + @Override + public Collection getKnownTasks() + { + return ImmutableList.copyOf(runningItems); + } + @Override public Collection getWorkers() { @@ -155,18 +153,8 @@ public class ThreadPoolTaskRunner implements TaskRunner, QuerySegmentWalker { QueryRunner queryRunner = null; - final List runningTasks = Lists.transform( - ImmutableList.copyOf(getRunningTasks()), new Function() - { - @Override - public Task apply(TaskRunnerWorkItem o) - { - return o.getTask(); - } - } - ); - - for (final Task task : runningTasks) { + for (final ThreadPoolTaskRunnerWorkItem taskRunnerWorkItem : ImmutableList.copyOf(runningItems)) { + final Task task = taskRunnerWorkItem.getTask(); if (task.getDataSource().equals(query.getDataSource())) { final QueryRunner taskQueryRunner = task.getQueryRunner(query); @@ -185,12 +173,31 @@ public class ThreadPoolTaskRunner implements TaskRunner, QuerySegmentWalker return queryRunner == null ? new NoopQueryRunner() : queryRunner; } - private static class ExecutorServiceTaskRunnerCallable implements Callable + private static class ThreadPoolTaskRunnerWorkItem extends TaskRunnerWorkItem + { + private final Task task; + + private ThreadPoolTaskRunnerWorkItem( + Task task, + ListenableFuture result + ) + { + super(task.getId(), result); + this.task = task; + } + + public Task getTask() + { + return task; + } + } + + private static class ThreadPoolTaskRunnerCallable implements Callable { private final Task task; private final TaskToolbox toolbox; - public ExecutorServiceTaskRunnerCallable(Task task, TaskToolbox toolbox) + public ThreadPoolTaskRunnerCallable(Task task, TaskToolbox toolbox) { this.task = task; this.toolbox = toolbox; @@ -242,10 +249,5 @@ public class ThreadPoolTaskRunner implements TaskRunner, QuerySegmentWalker throw Throwables.propagate(e); } } - - public TaskRunnerWorkItem getTaskRunnerWorkItem() - { - return new TaskRunnerWorkItem(task, null); - } } } diff --git a/indexing-service/src/main/java/io/druid/indexing/overlord/ZkWorker.java b/indexing-service/src/main/java/io/druid/indexing/overlord/ZkWorker.java index edb0c3df685..335b5fa583d 100644 --- a/indexing-service/src/main/java/io/druid/indexing/overlord/ZkWorker.java +++ b/indexing-service/src/main/java/io/druid/indexing/overlord/ZkWorker.java @@ -71,9 +71,9 @@ public class ZkWorker implements Closeable }; } - public void start(PathChildrenCache.StartMode startMode) throws Exception + public void start() throws Exception { - statusCache.start(startMode); + statusCache.start(PathChildrenCache.StartMode.POST_INITIALIZED_EVENT); } public void addListener(PathChildrenCacheListener listener) diff --git a/indexing-service/src/main/java/io/druid/indexing/overlord/config/ForkingTaskRunnerConfig.java b/indexing-service/src/main/java/io/druid/indexing/overlord/config/ForkingTaskRunnerConfig.java index e2f30e4235b..de864d544f4 100644 --- a/indexing-service/src/main/java/io/druid/indexing/overlord/config/ForkingTaskRunnerConfig.java +++ b/indexing-service/src/main/java/io/druid/indexing/overlord/config/ForkingTaskRunnerConfig.java @@ -62,7 +62,8 @@ public class ForkingTaskRunnerConfig "druid", "io.druid", "user.timezone", - "file.encoding" + "file.encoding", + "java.io.tmpdir" ); public String getTaskDir() diff --git a/indexing-service/src/main/java/io/druid/indexing/overlord/config/TaskQueueConfig.java b/indexing-service/src/main/java/io/druid/indexing/overlord/config/TaskQueueConfig.java new file mode 100644 index 00000000000..ac46f2cc60f --- /dev/null +++ b/indexing-service/src/main/java/io/druid/indexing/overlord/config/TaskQueueConfig.java @@ -0,0 +1,79 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012, 2013 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package io.druid.indexing.overlord.config; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import org.joda.time.Duration; +import org.joda.time.Period; + +public class TaskQueueConfig +{ + @JsonProperty + private int maxSize; + + @JsonProperty + private Duration startDelay; + + @JsonProperty + private Duration restartDelay; + + @JsonProperty + private Duration storageSyncRate; + + @JsonCreator + public TaskQueueConfig( + @JsonProperty("maxSize") final Integer maxSize, + @JsonProperty("startDelay") final Period startDelay, + @JsonProperty("restartDelay") final Period restartDelay, + @JsonProperty("storageSyncRate") final Period storageSyncRate + ) + { + this.maxSize = maxSize == null ? Integer.MAX_VALUE : maxSize; + this.startDelay = defaultDuration(startDelay, "PT1M"); + this.restartDelay = defaultDuration(restartDelay, "PT30S"); + this.storageSyncRate = defaultDuration(storageSyncRate, "PT1M"); + } + + public int getMaxSize() + { + return maxSize; + } + + public Duration getStartDelay() + { + return startDelay; + } + + public Duration getRestartDelay() + { + return restartDelay; + } + + public Duration getStorageSyncRate() + { + return storageSyncRate; + } + + private static Duration defaultDuration(final Period period, final String theDefault) + { + return (period == null ? new Period(theDefault) : period).toStandardDuration(); + } +} diff --git a/indexing-service/src/main/java/io/druid/indexing/overlord/exec/TaskConsumer.java b/indexing-service/src/main/java/io/druid/indexing/overlord/exec/TaskConsumer.java deleted file mode 100644 index d75cad14f08..00000000000 --- a/indexing-service/src/main/java/io/druid/indexing/overlord/exec/TaskConsumer.java +++ /dev/null @@ -1,204 +0,0 @@ -/* - * Druid - a distributed column store. - * Copyright (C) 2012, 2013 Metamarkets Group Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -package io.druid.indexing.overlord.exec; - -import com.google.common.base.Throwables; -import com.google.common.util.concurrent.FutureCallback; -import com.google.common.util.concurrent.Futures; -import com.google.common.util.concurrent.ListenableFuture; -import com.metamx.common.lifecycle.LifecycleStart; -import com.metamx.common.lifecycle.LifecycleStop; -import com.metamx.emitter.EmittingLogger; -import com.metamx.emitter.service.ServiceEmitter; -import com.metamx.emitter.service.ServiceMetricEvent; -import io.druid.indexing.common.TaskStatus; -import io.druid.indexing.common.actions.TaskActionClientFactory; -import io.druid.indexing.common.task.Task; -import io.druid.indexing.overlord.TaskQueue; -import io.druid.indexing.overlord.TaskRunner; - -public class TaskConsumer implements Runnable -{ - private final TaskQueue queue; - private final TaskRunner runner; - private final TaskActionClientFactory taskActionClientFactory; - private final ServiceEmitter emitter; - private final Thread thready; - - private volatile boolean shutdown = false; - - private static final EmittingLogger log = new EmittingLogger(TaskConsumer.class); - - public TaskConsumer( - TaskQueue queue, - TaskRunner runner, - TaskActionClientFactory taskActionClientFactory, - ServiceEmitter emitter - ) - { - this.queue = queue; - this.runner = runner; - this.taskActionClientFactory = taskActionClientFactory; - this.emitter = emitter; - this.thready = new Thread(this); - } - - @LifecycleStart - public void start() - { - thready.start(); - } - - @LifecycleStop - public void stop() - { - shutdown = true; - thready.interrupt(); - } - - @Override - public void run() - { - - try { - while (!Thread.currentThread().isInterrupted()) { - - final Task task; - - try { - task = queue.take(); - } - catch (InterruptedException e) { - log.info("Interrupted while waiting for new work"); - Thread.currentThread().interrupt(); - break; - } - - try { - handoff(task); - } - catch (Exception e) { - log.makeAlert(e, "Failed to hand off task") - .addData("task", task.getId()) - .addData("type", task.getType()) - .addData("dataSource", task.getDataSource()) - .addData("interval", task.getImplicitLockInterval()) - .emit(); - - // Retry would be nice, but only after we have a way to throttle and limit them. Just fail for now. - if (!shutdown) { - queue.notify(task, TaskStatus.failure(task.getId())); - } - } - } - } - catch (Exception e) { - // exit thread - log.error(e, "Uncaught exception while consuming tasks"); - throw Throwables.propagate(e); - } - } - - private void handoff(final Task task) throws Exception - { - final ServiceMetricEvent.Builder metricBuilder = new ServiceMetricEvent.Builder() - .setUser2(task.getDataSource()) - .setUser4(task.getType()) - .setUser5(task.getImplicitLockInterval().toString()); - - // Run preflight checks - TaskStatus preflightStatus; - try { - preflightStatus = task.preflight(taskActionClientFactory.create(task)); - log.info("Preflight done for task: %s", task.getId()); - } - catch (Exception e) { - preflightStatus = TaskStatus.failure(task.getId()); - log.error(e, "Exception thrown during preflight for task: %s", task.getId()); - } - - if (!preflightStatus.isRunnable()) { - log.info("Task finished during preflight: %s", task.getId()); - queue.notify(task, preflightStatus); - return; - } - - // Hand off work to TaskRunner, with a callback - final ListenableFuture status = runner.run(task); - - Futures.addCallback( - status, new FutureCallback() - { - @Override - public void onSuccess(final TaskStatus status) - { - log.info("Received %s status for task: %s", status.getStatusCode(), task); - handleStatus(status); - } - - @Override - public void onFailure(Throwable t) - { - log.makeAlert(t, "Failed to run task") - .addData("task", task.getId()) - .addData("type", task.getType()) - .addData("dataSource", task.getDataSource()) - .addData("interval", task.getImplicitLockInterval()) - .emit(); - - handleStatus(TaskStatus.failure(task.getId())); - } - - private void handleStatus(TaskStatus status) - { - try { - // If we're not supposed to be running anymore, don't do anything. Somewhat racey if the flag gets set after - // we check and before we commit the database transaction, but better than nothing. - if (shutdown) { - log.info("Abandoning task due to shutdown: %s", task.getId()); - return; - } - - queue.notify(task, status); - - // Emit event and log, if the task is done - if (status.isComplete()) { - metricBuilder.setUser3(status.getStatusCode().toString()); - emitter.emit(metricBuilder.build("indexer/time/run/millis", status.getDuration())); - - log.info( - "Task %s: %s (%d run duration)", - status.getStatusCode(), - task, - status.getDuration() - ); - } - } - catch (Exception e) { - log.makeAlert(e, "Failed to handle task status") - .addData("task", task.getId()) - .addData("statusCode", status.getStatusCode()) - .emit(); - } - } - } - ); - } -} diff --git a/indexing-service/src/main/java/io/druid/indexing/overlord/http/OverlordResource.java b/indexing-service/src/main/java/io/druid/indexing/overlord/http/OverlordResource.java index ef195b6f6cc..f161cb3c278 100644 --- a/indexing-service/src/main/java/io/druid/indexing/overlord/http/OverlordResource.java +++ b/indexing-service/src/main/java/io/druid/indexing/overlord/http/OverlordResource.java @@ -19,15 +19,20 @@ package io.druid.indexing.overlord.http; +import com.fasterxml.jackson.annotation.JsonValue; import com.google.common.base.Function; import com.google.common.base.Optional; -import com.google.common.collect.Collections2; import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; import com.google.common.collect.Maps; +import com.google.common.collect.Sets; import com.google.common.io.InputSupplier; +import com.google.common.util.concurrent.SettableFuture; import com.google.inject.Inject; import com.metamx.common.logger.Logger; import io.druid.common.config.JacksonConfigManager; +import io.druid.indexing.common.TaskStatus; import io.druid.indexing.common.actions.TaskActionClient; import io.druid.indexing.common.actions.TaskActionHolder; import io.druid.indexing.common.task.Task; @@ -40,6 +45,7 @@ import io.druid.indexing.overlord.scaling.ResourceManagementScheduler; import io.druid.indexing.overlord.setup.WorkerSetupData; import io.druid.tasklogs.TaskLogStreamer; import io.druid.timeline.DataSegment; +import org.joda.time.DateTime; import javax.ws.rs.Consumes; import javax.ws.rs.DefaultValue; @@ -52,6 +58,8 @@ import javax.ws.rs.QueryParam; import javax.ws.rs.core.Response; import java.io.IOException; import java.io.InputStream; +import java.util.Collection; +import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.atomic.AtomicReference; @@ -63,27 +71,6 @@ public class OverlordResource { private static final Logger log = new Logger(OverlordResource.class); - private static Function> simplifyTaskFn = - new Function>() - { - @Override - public Map apply(TaskRunnerWorkItem input) - { - return new ImmutableMap.Builder() - .put("id", input.getTask().getId()) - .put("dataSource", input.getTask().getDataSource()) - .put("interval", - !input.getTask().getImplicitLockInterval().isPresent() - ? "" - : input.getTask().getImplicitLockInterval().get() - ) - .put("nodeType", input.getTask().getNodeType() == null ? "" : input.getTask().getNodeType()) - .put("createdTime", input.getCreatedTime()) - .put("queueInsertionTime", input.getQueueInsertionTime()) - .build(); - } - }; - private final TaskMaster taskMaster; private final TaskStorageQueryAdapter taskStorageQueryAdapter; private final TaskLogStreamer taskLogStreamer; @@ -146,12 +133,20 @@ public class OverlordResource ); } + @GET + @Path("/task/{taskid}") + @Produces("application/json") + public Response getTaskPayload(@PathParam("taskid") String taskid) + { + return optionalTaskResponse(taskid, "payload", taskStorageQueryAdapter.getTask(taskid)); + } + @GET @Path("/task/{taskid}/status") @Produces("application/json") public Response getTaskStatus(@PathParam("taskid") String taskid) { - return optionalTaskResponse(taskid, "status", taskStorageQueryAdapter.getSameGroupMergedStatus(taskid)); + return optionalTaskResponse(taskid, "status", taskStorageQueryAdapter.getStatus(taskid)); } @GET @@ -159,7 +154,7 @@ public class OverlordResource @Produces("application/json") public Response getTaskSegments(@PathParam("taskid") String taskid) { - final Set segments = taskStorageQueryAdapter.getSameGroupNewSegments(taskid); + final Set segments = taskStorageQueryAdapter.getInsertedSegments(taskid); return Response.ok().entity(segments).build(); } @@ -169,13 +164,13 @@ public class OverlordResource public Response doShutdown(@PathParam("taskid") final String taskid) { return asLeaderWith( - taskMaster.getTaskRunner(), - new Function() + taskMaster.getTaskQueue(), + new Function() { @Override - public Response apply(TaskRunner taskRunner) + public Response apply(TaskQueue taskQueue) { - taskRunner.shutdown(taskid); + taskQueue.shutdown(taskid); return Response.ok(ImmutableMap.of("task", taskid)).build(); } } @@ -225,7 +220,7 @@ public class OverlordResource final Map retMap; // It would be great to verify that this worker is actually supposed to be running the task before - // actually doing the task. Some ideas for how that could be done would be using some sort of attempt_id + // actually doing the action. Some ideas for how that could be done would be using some sort of attempt_id // or token that gets passed around. try { @@ -245,39 +240,64 @@ public class OverlordResource } @GET - @Path("/pendingTasks") + @Path("/waitingTasks") @Produces("application/json") - public Response getPendingTasks( - @QueryParam("full") String full - ) + public Response getWaitingTasks() { - if (full != null) { - return asLeaderWith( - taskMaster.getTaskRunner(), - new Function() - { - @Override - public Response apply(TaskRunner taskRunner) - { - return Response.ok(taskRunner.getPendingTasks()).build(); - } - } - ); - } - - return asLeaderWith( - taskMaster.getTaskRunner(), - new Function() + return workItemsResponse( + new Function>() { @Override - public Response apply(TaskRunner taskRunner) + public Collection apply(TaskRunner taskRunner) { - return Response.ok( - Collections2.transform( - taskRunner.getPendingTasks(), - simplifyTaskFn + // A bit roundabout, but works as a way of figuring out what tasks haven't been handed + // off to the runner yet: + final List activeTasks = taskStorageQueryAdapter.getActiveTasks(); + final Set runnersKnownTasks = Sets.newHashSet( + Iterables.transform( + taskRunner.getKnownTasks(), + new Function() + { + @Override + public String apply(final TaskRunnerWorkItem workItem) + { + return workItem.getTaskId(); + } + } ) - ).build(); + ); + final List waitingTasks = Lists.newArrayList(); + for (final Task task : activeTasks) { + if (!runnersKnownTasks.contains(task.getId())) { + waitingTasks.add( + // Would be nice to include the real created date, but the TaskStorage API doesn't yet allow it. + new TaskRunnerWorkItem( + task.getId(), + SettableFuture.create(), + new DateTime(0), + new DateTime(0) + ) + ); + } + } + return waitingTasks; + } + } + ); + } + + @GET + @Path("/pendingTasks") + @Produces("application/json") + public Response getPendingTasks() + { + return workItemsResponse( + new Function>() + { + @Override + public Collection apply(TaskRunner taskRunner) + { + return taskRunner.getPendingTasks(); } } ); @@ -286,42 +306,45 @@ public class OverlordResource @GET @Path("/runningTasks") @Produces("application/json") - public Response getRunningTasks( - @QueryParam("full") String full - ) + public Response getRunningTasks() { - if (full != null) { - return asLeaderWith( - taskMaster.getTaskRunner(), - new Function() - { - @Override - public Response apply(TaskRunner taskRunner) - { - return Response.ok(taskRunner.getRunningTasks()).build(); - } - } - ); - } - - return asLeaderWith( - taskMaster.getTaskRunner(), - new Function() + return workItemsResponse( + new Function>() { @Override - public Response apply(TaskRunner taskRunner) + public Collection apply(TaskRunner taskRunner) { - return Response.ok( - Collections2.transform( - taskRunner.getRunningTasks(), - simplifyTaskFn - ) - ).build(); + return taskRunner.getRunningTasks(); } } ); } + @GET + @Path("/completeTasks") + @Produces("application/json") + public Response getCompleteTasks() + { + final List completeTasks = Lists.transform( + taskStorageQueryAdapter.getRecentlyFinishedTaskStatuses(), + new Function() + { + @Override + public TaskResponseObject apply(TaskStatus taskStatus) + { + // Would be nice to include the real created date, but the TaskStorage API doesn't yet allow it. + return new TaskResponseObject( + taskStatus.getId(), + new DateTime(0), + new DateTime(0), + Optional.of(taskStatus) + ); + } + } + ); + return Response.ok(completeTasks).build(); + } + @GET @Path("/workers") @Produces("application/json") @@ -345,17 +368,13 @@ public class OverlordResource @Produces("application/json") public Response getScalingState() { - return asLeaderWith( - taskMaster.getResourceManagementScheduler(), - new Function() - { - @Override - public Response apply(ResourceManagementScheduler resourceManagementScheduler) - { - return Response.ok(resourceManagementScheduler.getStats()).build(); - } - } - ); + // Don't use asLeaderWith, since we want to return 200 instead of 503 when missing an autoscaler. + final Optional rms = taskMaster.getResourceManagementScheduler(); + if (rms.isPresent()) { + return Response.ok(rms.get().getStats()).build(); + } else { + return Response.ok().build(); + } } @GET @@ -380,7 +399,39 @@ public class OverlordResource } } - public Response optionalTaskResponse(String taskid, String objectType, Optional x) + private Response workItemsResponse(final Function> fn) + { + return asLeaderWith( + taskMaster.getTaskRunner(), + new Function() + { + @Override + public Response apply(TaskRunner taskRunner) + { + return Response.ok( + Lists.transform( + Lists.newArrayList(fn.apply(taskRunner)), + new Function() + { + @Override + public TaskResponseObject apply(TaskRunnerWorkItem workItem) + { + return new TaskResponseObject( + workItem.getTaskId(), + workItem.getCreatedTime(), + workItem.getQueueInsertionTime(), + Optional.absent() + ); + } + } + ) + ).build(); + } + } + ); + } + + private Response optionalTaskResponse(String taskid, String objectType, Optional x) { final Map results = Maps.newHashMap(); results.put("task", taskid); @@ -392,7 +443,7 @@ public class OverlordResource } } - public Response asLeaderWith(Optional x, Function f) + private Response asLeaderWith(Optional x, Function f) { if (x.isPresent()) { return f.apply(x.get()); @@ -401,4 +452,62 @@ public class OverlordResource return Response.status(Response.Status.SERVICE_UNAVAILABLE).build(); } } + + private static class TaskResponseObject + { + private final String id; + private final DateTime createdTime; + private final DateTime queueInsertionTime; + private final Optional status; + + private TaskResponseObject( + String id, + DateTime createdTime, + DateTime queueInsertionTime, + Optional status + ) + { + this.id = id; + this.createdTime = createdTime; + this.queueInsertionTime = queueInsertionTime; + this.status = status; + } + + public String getId() + { + return id; + } + + public DateTime getCreatedTime() + { + return createdTime; + } + + public DateTime getQueueInsertionTime() + { + return queueInsertionTime; + } + + public Optional getStatus() + { + return status; + } + + @JsonValue + public Map toJson() + { + final Map data = Maps.newLinkedHashMap(); + data.put("id", id); + if (createdTime.getMillis() > 0) { + data.put("createdTime", createdTime); + } + if (queueInsertionTime.getMillis() > 0) { + data.put("queueInsertionTime", queueInsertionTime); + } + if (status.isPresent()) { + data.put("statusCode", status.get().getStatusCode().toString()); + } + return data; + } + } } diff --git a/indexing-service/src/main/java/io/druid/indexing/overlord/scaling/AutoScalingData.java b/indexing-service/src/main/java/io/druid/indexing/overlord/scaling/AutoScalingData.java index 7a0ab258310..34c45d66e71 100644 --- a/indexing-service/src/main/java/io/druid/indexing/overlord/scaling/AutoScalingData.java +++ b/indexing-service/src/main/java/io/druid/indexing/overlord/scaling/AutoScalingData.java @@ -28,12 +28,10 @@ import java.util.List; public class AutoScalingData { private final List nodeIds; - private final List nodes; - public AutoScalingData(List nodeIds, List nodes) + public AutoScalingData(List nodeIds) { this.nodeIds = nodeIds; - this.nodes = nodes; } @JsonProperty @@ -42,17 +40,11 @@ public class AutoScalingData return nodeIds; } - public List getNodes() - { - return nodes; - } - @Override public String toString() { return "AutoScalingData{" + "nodeIds=" + nodeIds + - ", nodes=" + nodes + '}'; } } diff --git a/indexing-service/src/main/java/io/druid/indexing/overlord/scaling/EC2AutoScalingStrategy.java b/indexing-service/src/main/java/io/druid/indexing/overlord/scaling/EC2AutoScalingStrategy.java index 57c2d875ac5..b59f3d1e74e 100644 --- a/indexing-service/src/main/java/io/druid/indexing/overlord/scaling/EC2AutoScalingStrategy.java +++ b/indexing-service/src/main/java/io/druid/indexing/overlord/scaling/EC2AutoScalingStrategy.java @@ -125,8 +125,7 @@ public class EC2AutoScalingStrategy implements AutoScalingStrategy return input.getInstanceId(); } } - ), - result.getReservation().getInstances() + ) ); } catch (Exception e) { @@ -140,7 +139,7 @@ public class EC2AutoScalingStrategy implements AutoScalingStrategy public AutoScalingData terminate(List ips) { if (ips.isEmpty()) { - return new AutoScalingData(Lists.newArrayList(), Lists.newArrayList()); + return new AutoScalingData(Lists.newArrayList()); } DescribeInstancesResult result = amazonEC2Client.describeInstances( @@ -184,8 +183,7 @@ public class EC2AutoScalingStrategy implements AutoScalingStrategy return String.format("%s:%s", input, config.getWorkerPort()); } } - ), - instances + ) ); } catch (Exception e) { diff --git a/indexing-service/src/main/java/io/druid/indexing/overlord/scaling/SimpleResourceManagementStrategy.java b/indexing-service/src/main/java/io/druid/indexing/overlord/scaling/SimpleResourceManagementStrategy.java index 50d612bd908..10e084b3c9e 100644 --- a/indexing-service/src/main/java/io/druid/indexing/overlord/scaling/SimpleResourceManagementStrategy.java +++ b/indexing-service/src/main/java/io/druid/indexing/overlord/scaling/SimpleResourceManagementStrategy.java @@ -20,13 +20,16 @@ package io.druid.indexing.overlord.scaling; import com.google.common.base.Function; +import com.google.common.base.Joiner; import com.google.common.base.Predicate; import com.google.common.base.Supplier; +import com.google.common.collect.Collections2; +import com.google.common.collect.FluentIterable; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.common.collect.Sets; import com.google.inject.Inject; -import com.metamx.common.guava.FunctionalIterable; +import com.metamx.common.ISE; import com.metamx.emitter.EmittingLogger; import io.druid.indexing.overlord.RemoteTaskRunnerWorkItem; import io.druid.indexing.overlord.TaskRunnerWorkItem; @@ -38,7 +41,6 @@ import org.joda.time.Duration; import java.util.Collection; import java.util.List; import java.util.Set; -import java.util.concurrent.ConcurrentSkipListSet; /** */ @@ -48,211 +50,194 @@ public class SimpleResourceManagementStrategy implements ResourceManagementStrat private final AutoScalingStrategy autoScalingStrategy; private final SimpleResourceManagementConfig config; - private final Supplier workerSetupdDataRef; + private final Supplier workerSetupDataRef; private final ScalingStats scalingStats; - private final ConcurrentSkipListSet currentlyProvisioning = new ConcurrentSkipListSet(); - private final ConcurrentSkipListSet currentlyTerminating = new ConcurrentSkipListSet(); + private final Object lock = new Object(); + private final Set currentlyProvisioning = Sets.newHashSet(); + private final Set currentlyTerminating = Sets.newHashSet(); - private volatile DateTime lastProvisionTime = new DateTime(); - private volatile DateTime lastTerminateTime = new DateTime(); + private int targetWorkerCount = -1; + private DateTime lastProvisionTime = new DateTime(); + private DateTime lastTerminateTime = new DateTime(); @Inject public SimpleResourceManagementStrategy( AutoScalingStrategy autoScalingStrategy, SimpleResourceManagementConfig config, - Supplier workerSetupdDataRef + Supplier workerSetupDataRef ) { this.autoScalingStrategy = autoScalingStrategy; this.config = config; - this.workerSetupdDataRef = workerSetupdDataRef; + this.workerSetupDataRef = workerSetupDataRef; this.scalingStats = new ScalingStats(config.getNumEventsToTrack()); } @Override public boolean doProvision(Collection pendingTasks, Collection zkWorkers) { - final WorkerSetupData workerSetupData = workerSetupdDataRef.get(); - - final String minVersion = workerSetupData.getMinVersion() == null - ? config.getWorkerVersion() - : workerSetupData.getMinVersion(); - int maxNumWorkers = workerSetupData.getMaxNumWorkers(); - - int currValidWorkers = 0; - for (ZkWorker zkWorker : zkWorkers) { - if (zkWorker.isValidVersion(minVersion)) { - currValidWorkers++; + synchronized (lock) { + boolean didProvision = false; + final WorkerSetupData workerSetupData = workerSetupDataRef.get(); + if (workerSetupData == null) { + log.warn("No workerSetupData available, cannot provision new workers."); + return false; } - } + final Predicate isValidWorker = createValidWorkerPredicate(config, workerSetupData); + final int currValidWorkers = Collections2.filter(zkWorkers, isValidWorker).size(); - if (currValidWorkers >= maxNumWorkers) { - log.debug( - "Cannot scale anymore. Num workers = %d, Max num workers = %d", - zkWorkers.size(), - workerSetupdDataRef.get().getMaxNumWorkers() - ); - return false; - } - - List workerNodeIds = autoScalingStrategy.ipToIdLookup( - Lists.newArrayList( - Iterables.transform( - zkWorkers, - new Function() - { - @Override - public String apply(ZkWorker input) + final List workerNodeIds = autoScalingStrategy.ipToIdLookup( + Lists.newArrayList( + Iterables.transform( + zkWorkers, + new Function() { - return input.getWorker().getIp(); + @Override + public String apply(ZkWorker input) + { + return input.getWorker().getIp(); + } } - } - ) - ) - ); + ) + ) + ); + currentlyProvisioning.removeAll(workerNodeIds); - currentlyProvisioning.removeAll(workerNodeIds); - boolean nothingProvisioning = currentlyProvisioning.isEmpty(); + updateTargetWorkerCount(workerSetupData, pendingTasks, zkWorkers); - if (nothingProvisioning) { - if (hasTaskPendingBeyondThreshold(pendingTasks)) { - AutoScalingData provisioned = autoScalingStrategy.provision(); - - if (provisioned != null) { - currentlyProvisioning.addAll(provisioned.getNodeIds()); + int want = targetWorkerCount - (currValidWorkers + currentlyProvisioning.size()); + while (want > 0) { + final AutoScalingData provisioned = autoScalingStrategy.provision(); + final List newNodes; + if (provisioned == null || (newNodes = provisioned.getNodeIds()).isEmpty()) { + break; + } else { + currentlyProvisioning.addAll(newNodes); lastProvisionTime = new DateTime(); scalingStats.addProvisionEvent(provisioned); - - return true; + want -= provisioned.getNodeIds().size(); + didProvision = true; } } - } else { - Duration durSinceLastProvision = new Duration(lastProvisionTime, new DateTime()); - log.info( - "%s still provisioning. Wait for all provisioned nodes to complete before requesting new worker. Current wait time: %s", - currentlyProvisioning, - durSinceLastProvision - ); + if (!currentlyProvisioning.isEmpty()) { + Duration durSinceLastProvision = new Duration(lastProvisionTime, new DateTime()); - if (durSinceLastProvision.isLongerThan(config.getMaxScalingDuration().toStandardDuration())) { - log.makeAlert("Worker node provisioning taking too long!") - .addData("millisSinceLastProvision", durSinceLastProvision.getMillis()) - .addData("provisioningCount", currentlyProvisioning.size()) - .emit(); + log.info("%s provisioning. Current wait time: %s", currentlyProvisioning, durSinceLastProvision); - List nodeIps = autoScalingStrategy.idToIpLookup(Lists.newArrayList(currentlyProvisioning)); - autoScalingStrategy.terminate(nodeIps); - currentlyProvisioning.clear(); + if (durSinceLastProvision.isLongerThan(config.getMaxScalingDuration().toStandardDuration())) { + log.makeAlert("Worker node provisioning taking too long!") + .addData("millisSinceLastProvision", durSinceLastProvision.getMillis()) + .addData("provisioningCount", currentlyProvisioning.size()) + .emit(); + + List nodeIps = autoScalingStrategy.idToIpLookup(Lists.newArrayList(currentlyProvisioning)); + autoScalingStrategy.terminate(nodeIps); + currentlyProvisioning.clear(); + } } - } - return false; + return didProvision; + } } @Override public boolean doTerminate(Collection pendingTasks, Collection zkWorkers) { - Set workerNodeIds = Sets.newHashSet( - autoScalingStrategy.ipToIdLookup( - Lists.newArrayList( - Iterables.transform( - zkWorkers, - new Function() - { - @Override - public String apply(ZkWorker input) + synchronized (lock) { + final WorkerSetupData workerSetupData = workerSetupDataRef.get(); + if (workerSetupData == null) { + log.warn("No workerSetupData available, cannot terminate workers."); + return false; + } + + boolean didTerminate = false; + final Set workerNodeIds = Sets.newHashSet( + autoScalingStrategy.ipToIdLookup( + Lists.newArrayList( + Iterables.transform( + zkWorkers, + new Function() { - return input.getWorker().getIp(); + @Override + public String apply(ZkWorker input) + { + return input.getWorker().getIp(); + } } - } - ) - ) - ) - ); - - Set stillExisting = Sets.newHashSet(); - for (String s : currentlyTerminating) { - if (workerNodeIds.contains(s)) { - stillExisting.add(s); - } - } - currentlyTerminating.clear(); - currentlyTerminating.addAll(stillExisting); - boolean nothingTerminating = currentlyTerminating.isEmpty(); - - if (nothingTerminating) { - final int minNumWorkers = workerSetupdDataRef.get().getMinNumWorkers(); - if (zkWorkers.size() <= minNumWorkers) { - log.info("Only [%d <= %d] nodes in the cluster, not terminating anything.", zkWorkers.size(), minNumWorkers); - return false; - } - - List thoseLazyWorkers = Lists.newArrayList( - FunctionalIterable - .create(zkWorkers) - .filter( - new Predicate() - { - @Override - public boolean apply(ZkWorker input) - { - return input.getRunningTasks().isEmpty() - && System.currentTimeMillis() - input.getLastCompletedTaskTime().getMillis() - >= config.getWorkerIdleTimeout().toStandardDuration().getMillis(); - } - } + ) ) - ); - - int maxPossibleNodesTerminated = zkWorkers.size() - minNumWorkers; - int numNodesToTerminate = Math.min(maxPossibleNodesTerminated, thoseLazyWorkers.size()); - if (numNodesToTerminate <= 0) { - log.info("Found no nodes to terminate."); - return false; - } - - AutoScalingData terminated = autoScalingStrategy.terminate( - Lists.transform( - thoseLazyWorkers.subList(0, numNodesToTerminate), - new Function() - { - @Override - public String apply(ZkWorker input) - { - return input.getWorker().getIp(); - } - } ) ); - if (terminated != null) { - currentlyTerminating.addAll(terminated.getNodeIds()); - lastTerminateTime = new DateTime(); - scalingStats.addTerminateEvent(terminated); - - return true; + final Set stillExisting = Sets.newHashSet(); + for (String s : currentlyTerminating) { + if (workerNodeIds.contains(s)) { + stillExisting.add(s); + } } - } else { - Duration durSinceLastTerminate = new Duration(lastTerminateTime, new DateTime()); + currentlyTerminating.clear(); + currentlyTerminating.addAll(stillExisting); - log.info( - "%s still terminating. Wait for all nodes to terminate before trying again.", - currentlyTerminating - ); + updateTargetWorkerCount(workerSetupData, pendingTasks, zkWorkers); - if (durSinceLastTerminate.isLongerThan(config.getMaxScalingDuration().toStandardDuration())) { - log.makeAlert("Worker node termination taking too long!") - .addData("millisSinceLastTerminate", durSinceLastTerminate.getMillis()) - .addData("terminatingCount", currentlyTerminating.size()) - .emit(); + final Predicate isLazyWorker = createLazyWorkerPredicate(config, workerSetupData); + if (currentlyTerminating.isEmpty()) { + final int excessWorkers = (zkWorkers.size() + currentlyProvisioning.size()) - targetWorkerCount; + if (excessWorkers > 0) { + final List laziestWorkerIps = + FluentIterable.from(zkWorkers) + .filter(isLazyWorker) + .limit(excessWorkers) + .transform( + new Function() + { + @Override + public String apply(ZkWorker zkWorker) + { + return zkWorker.getWorker().getIp(); + } + } + ) + .toList(); - currentlyTerminating.clear(); + if (laziestWorkerIps.isEmpty()) { + log.info("Wanted to terminate %,d workers, but couldn't find any lazy ones!", excessWorkers); + } else { + log.info( + "Terminating %,d workers (wanted %,d): %s", + laziestWorkerIps.size(), + excessWorkers, + Joiner.on(", ").join(laziestWorkerIps) + ); + + final AutoScalingData terminated = autoScalingStrategy.terminate(laziestWorkerIps); + if (terminated != null) { + currentlyTerminating.addAll(terminated.getNodeIds()); + lastTerminateTime = new DateTime(); + scalingStats.addTerminateEvent(terminated); + didTerminate = true; + } + } + } + } else { + Duration durSinceLastTerminate = new Duration(lastTerminateTime, new DateTime()); + + log.info("%s terminating. Current wait time: %s", currentlyTerminating, durSinceLastTerminate); + + if (durSinceLastTerminate.isLongerThan(config.getMaxScalingDuration().toStandardDuration())) { + log.makeAlert("Worker node termination taking too long!") + .addData("millisSinceLastTerminate", durSinceLastTerminate.getMillis()) + .addData("terminatingCount", currentlyTerminating.size()) + .emit(); + + currentlyTerminating.clear(); + } } + + return didTerminate; } - - return false; } @Override @@ -261,16 +246,128 @@ public class SimpleResourceManagementStrategy implements ResourceManagementStrat return scalingStats; } - private boolean hasTaskPendingBeyondThreshold(Collection pendingTasks) + private static Predicate createLazyWorkerPredicate( + final SimpleResourceManagementConfig config, + final WorkerSetupData workerSetupData + ) { - long now = System.currentTimeMillis(); - for (TaskRunnerWorkItem pendingTask : pendingTasks) { - final Duration durationSinceInsertion = new Duration(pendingTask.getQueueInsertionTime().getMillis(), now); - final Duration timeoutDuration = config.getPendingTaskTimeout().toStandardDuration(); - if (durationSinceInsertion.isEqual(timeoutDuration) || durationSinceInsertion.isLongerThan(timeoutDuration)) { - return true; + final Predicate isValidWorker = createValidWorkerPredicate(config, workerSetupData); + + return new Predicate() + { + @Override + public boolean apply(ZkWorker worker) + { + final boolean itHasBeenAWhile = System.currentTimeMillis() - worker.getLastCompletedTaskTime().getMillis() + >= config.getWorkerIdleTimeout().toStandardDuration().getMillis(); + return worker.getRunningTasks().isEmpty() && (itHasBeenAWhile || !isValidWorker.apply(worker)); + } + }; + } + + private static Predicate createValidWorkerPredicate( + final SimpleResourceManagementConfig config, + final WorkerSetupData workerSetupData + ) + { + return new Predicate() + { + @Override + public boolean apply(ZkWorker zkWorker) + { + final String minVersion = workerSetupData.getMinVersion() != null + ? workerSetupData.getMinVersion() + : config.getWorkerVersion(); + if (minVersion == null) { + throw new ISE("No minVersion found! It should be set in your runtime properties or configuration database."); + } + return zkWorker.isValidVersion(minVersion); + } + }; + } + + private void updateTargetWorkerCount( + final WorkerSetupData workerSetupData, + final Collection pendingTasks, + final Collection zkWorkers + ) + { + synchronized (lock) { + final Collection validWorkers = Collections2.filter( + zkWorkers, + createValidWorkerPredicate(config, workerSetupData) + ); + final Predicate isLazyWorker = createLazyWorkerPredicate(config, workerSetupData); + + if (targetWorkerCount < 0) { + // Initialize to size of current worker pool, subject to pool size limits + targetWorkerCount = Math.max( + Math.min( + zkWorkers.size(), + workerSetupData.getMaxNumWorkers() + ), + workerSetupData.getMinNumWorkers() + ); + log.info( + "Starting with a target of %,d workers (current = %,d, min = %,d, max = %,d).", + targetWorkerCount, + validWorkers.size(), + workerSetupData.getMinNumWorkers(), + workerSetupData.getMaxNumWorkers() + ); + } + + final boolean atSteadyState = currentlyProvisioning.isEmpty() + && currentlyTerminating.isEmpty() + && validWorkers.size() == targetWorkerCount; + final boolean shouldScaleUp = atSteadyState + && hasTaskPendingBeyondThreshold(pendingTasks) + && targetWorkerCount < workerSetupData.getMaxNumWorkers(); + final boolean shouldScaleDown = atSteadyState + && Iterables.any(validWorkers, isLazyWorker) + && targetWorkerCount > workerSetupData.getMinNumWorkers(); + if (shouldScaleUp) { + targetWorkerCount++; + log.info( + "I think we should scale up to %,d workers (current = %,d, min = %,d, max = %,d).", + targetWorkerCount, + validWorkers.size(), + workerSetupData.getMinNumWorkers(), + workerSetupData.getMaxNumWorkers() + ); + } else if (shouldScaleDown) { + targetWorkerCount--; + log.info( + "I think we should scale down to %,d workers (current = %,d, min = %,d, max = %,d).", + targetWorkerCount, + validWorkers.size(), + workerSetupData.getMinNumWorkers(), + workerSetupData.getMaxNumWorkers() + ); + } else { + log.info( + "Our target is %,d workers, and I'm okay with that (current = %,d, min = %,d, max = %,d).", + targetWorkerCount, + validWorkers.size(), + workerSetupData.getMinNumWorkers(), + workerSetupData.getMaxNumWorkers() + ); } } - return false; + } + + private boolean hasTaskPendingBeyondThreshold(Collection pendingTasks) + { + synchronized (lock) { + long now = System.currentTimeMillis(); + for (TaskRunnerWorkItem pendingTask : pendingTasks) { + final Duration durationSinceInsertion = new Duration(pendingTask.getQueueInsertionTime().getMillis(), now); + final Duration timeoutDuration = config.getPendingTaskTimeout().toStandardDuration(); + if (durationSinceInsertion.isEqual(timeoutDuration) || durationSinceInsertion.isLongerThan(timeoutDuration)) { + return true; + } + } + return false; + } } } diff --git a/indexing-service/src/main/java/io/druid/indexing/worker/config/WorkerConfig.java b/indexing-service/src/main/java/io/druid/indexing/worker/config/WorkerConfig.java index d510df4c3ee..567dd62aa1f 100644 --- a/indexing-service/src/main/java/io/druid/indexing/worker/config/WorkerConfig.java +++ b/indexing-service/src/main/java/io/druid/indexing/worker/config/WorkerConfig.java @@ -38,7 +38,7 @@ public class WorkerConfig @JsonProperty @Min(1) - private int capacity = Runtime.getRuntime().availableProcessors() - 1; + private int capacity = Math.max(1, Runtime.getRuntime().availableProcessors() - 1); public String getIp() { diff --git a/indexing-service/src/main/java/io/druid/indexing/worker/executor/ExecutorLifecycle.java b/indexing-service/src/main/java/io/druid/indexing/worker/executor/ExecutorLifecycle.java index bc8879b6960..17d889b5143 100644 --- a/indexing-service/src/main/java/io/druid/indexing/worker/executor/ExecutorLifecycle.java +++ b/indexing-service/src/main/java/io/druid/indexing/worker/executor/ExecutorLifecycle.java @@ -20,16 +20,19 @@ package io.druid.indexing.worker.executor; import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.api.client.repackaged.com.google.common.base.Preconditions; import com.google.common.base.Function; import com.google.common.base.Throwables; import com.google.common.util.concurrent.Futures; import com.google.common.util.concurrent.ListenableFuture; import com.google.inject.Inject; +import com.metamx.common.ISE; import com.metamx.common.lifecycle.LifecycleStart; import com.metamx.common.lifecycle.LifecycleStop; import com.metamx.emitter.EmittingLogger; import io.druid.concurrent.Execs; import io.druid.indexing.common.TaskStatus; +import io.druid.indexing.common.actions.TaskActionClientFactory; import io.druid.indexing.common.task.Task; import io.druid.indexing.overlord.TaskRunner; @@ -47,6 +50,7 @@ public class ExecutorLifecycle private static final EmittingLogger log = new EmittingLogger(ExecutorLifecycle.class); private final ExecutorLifecycleConfig config; + private final TaskActionClientFactory taskActionClientFactory; private final TaskRunner taskRunner; private final ObjectMapper jsonMapper; @@ -57,11 +61,13 @@ public class ExecutorLifecycle @Inject public ExecutorLifecycle( ExecutorLifecycleConfig config, + TaskActionClientFactory taskActionClientFactory, TaskRunner taskRunner, ObjectMapper jsonMapper ) { this.config = config; + this.taskActionClientFactory = taskActionClientFactory; this.taskRunner = taskRunner; this.jsonMapper = jsonMapper; } @@ -69,9 +75,9 @@ public class ExecutorLifecycle @LifecycleStart public void start() { - final File taskFile = config.getTaskFile(); - final File statusFile = config.getStatusFile(); - final InputStream parentStream = config.getParentStream(); + final File taskFile = Preconditions.checkNotNull(config.getTaskFile(), "taskFile"); + final File statusFile = Preconditions.checkNotNull(config.getStatusFile(), "statusFile"); + final InputStream parentStream = Preconditions.checkNotNull(config.getParentStream(), "parentStream"); final Task task; @@ -111,28 +117,41 @@ public class ExecutorLifecycle } ); - statusFuture = Futures.transform( - taskRunner.run(task), new Function() - { - @Override - public TaskStatus apply(TaskStatus taskStatus) - { - try { - log.info( - "Task completed with status: %s", - jsonMapper.writerWithDefaultPrettyPrinter().writeValueAsString(taskStatus) - ); - - statusFile.getParentFile().mkdirs(); - jsonMapper.writeValue(statusFile, taskStatus); - - return taskStatus; - } - catch (Exception e) { - throw Throwables.propagate(e); - } + // Won't hurt in remote mode, and is required for setting up locks in local mode: + try { + if (!task.isReady(taskActionClientFactory.create(task))) { + throw new ISE("Task is not ready to run yet!", task.getId()); } + } catch (Exception e) { + throw new ISE(e, "Failed to run isReady", task.getId()); } + + statusFuture = Futures.transform( + taskRunner.run(task), + new Function() + { + @Override + public TaskStatus apply(TaskStatus taskStatus) + { + try { + log.info( + "Task completed with status: %s", + jsonMapper.writerWithDefaultPrettyPrinter().writeValueAsString(taskStatus) + ); + + final File statusFileParent = statusFile.getParentFile(); + if (statusFileParent != null) { + statusFileParent.mkdirs(); + } + jsonMapper.writeValue(statusFile, taskStatus); + + return taskStatus; + } + catch (Exception e) { + throw Throwables.propagate(e); + } + } + } ); } diff --git a/indexing-service/src/main/resources/indexer_static/console.html b/indexing-service/src/main/resources/indexer_static/console.html index 6223eee6a1f..e8221aa287e 100644 --- a/indexing-service/src/main/resources/indexer_static/console.html +++ b/indexing-service/src/main/resources/indexer_static/console.html @@ -43,16 +43,24 @@

Loading Running Tasks... this may take a few minutes
-

Pending Tasks

+

Pending Tasks - Tasks waiting to be assigned to a worker

Loading Pending Tasks... this may take a few minutes
-

Workers

+

Waiting Tasks - Tasks waiting on locks

+
Loading Waiting Tasks... this may take a few minutes
+
+ +

Complete Tasks - Tasks recently completed

+
Loading Complete Tasks... this may take a few minutes
+
+ +

Remote Workers

Loading Workers... this may take a few minutes
-

Event Log

-
Loading Event Log... this may take a few minutes
+

Autoscaling Activity

+
Loading Autoscaling Activities... this may take a few minutes
diff --git a/indexing-service/src/main/resources/indexer_static/js/console-0.0.1.js b/indexing-service/src/main/resources/indexer_static/js/console-0.0.1.js index e3ce86c85c9..adaa1fba83f 100644 --- a/indexing-service/src/main/resources/indexer_static/js/console-0.0.1.js +++ b/indexing-service/src/main/resources/indexer_static/js/console-0.0.1.js @@ -3,14 +3,39 @@ var oTable = []; $(document).ready(function() { + var augment = function(data) { + for (i = 0 ; i < data.length ; i++) { + var taskId = encodeURIComponent(data[i].id) + data[i].more = + '
payload' + + 'status' + + 'log (all)' + + 'log (last 8kb)' + } + } + $.get('/druid/indexer/v1/runningTasks', function(data) { $('.running_loading').hide(); - buildTable(data, $('#runningTable'), ["segments"]); + augment(data); + buildTable(data, $('#runningTable')); }); $.get('/druid/indexer/v1/pendingTasks', function(data) { $('.pending_loading').hide(); - buildTable(data, $('#pendingTable'), ["segments"]); + augment(data); + buildTable(data, $('#pendingTable')); + }); + + $.get('/druid/indexer/v1/waitingTasks', function(data) { + $('.waiting_loading').hide(); + augment(data); + buildTable(data, $('#waitingTable')); + }); + + $.get('/druid/indexer/v1/completeTasks', function(data) { + $('.complete_loading').hide(); + augment(data); + buildTable(data, $('#completeTable')); }); $.get('/druid/indexer/v1/workers', function(data) { @@ -22,4 +47,4 @@ $(document).ready(function() { $('.events_loading').hide(); buildTable(data, $('#eventTable')); }); -}); \ No newline at end of file +}); diff --git a/indexing-service/src/test/java/io/druid/indexing/common/TestMergeTask.java b/indexing-service/src/test/java/io/druid/indexing/common/TestMergeTask.java index 906e6e6c1e6..05f3118d3c9 100644 --- a/indexing-service/src/test/java/io/druid/indexing/common/TestMergeTask.java +++ b/indexing-service/src/test/java/io/druid/indexing/common/TestMergeTask.java @@ -44,7 +44,7 @@ public class TestMergeTask extends MergeTask Lists.newArrayList( new DataSegment( "dummyDs", - new Interval(new DateTime(), new DateTime()), + new Interval(new DateTime(), new DateTime().plus(1)), new DateTime().toString(), null, null, diff --git a/indexing-service/src/test/java/io/druid/indexing/common/TestRealtimeTask.java b/indexing-service/src/test/java/io/druid/indexing/common/TestRealtimeTask.java index cc69067d23c..178cae10513 100644 --- a/indexing-service/src/test/java/io/druid/indexing/common/TestRealtimeTask.java +++ b/indexing-service/src/test/java/io/druid/indexing/common/TestRealtimeTask.java @@ -32,7 +32,7 @@ import io.druid.timeline.partition.NoneShardSpec; /** */ @JsonTypeName("test_realtime") -public class TestRealtimeTask extends RealtimeIndexTask implements TestTask +public class TestRealtimeTask extends RealtimeIndexTask { private final TaskStatus status; @@ -64,13 +64,6 @@ public class TestRealtimeTask extends RealtimeIndexTask implements TestTask return "test_realtime"; } - @Override - @JsonProperty - public TaskStatus getStatus() - { - return status; - } - @Override public TaskStatus run(TaskToolbox toolbox) throws Exception { diff --git a/indexing-service/src/test/java/io/druid/indexing/common/task/MergeTaskBaseTest.java b/indexing-service/src/test/java/io/druid/indexing/common/task/MergeTaskBaseTest.java index 577d9f4e0a6..7e8dd86f5a0 100644 --- a/indexing-service/src/test/java/io/druid/indexing/common/task/MergeTaskBaseTest.java +++ b/indexing-service/src/test/java/io/druid/indexing/common/task/MergeTaskBaseTest.java @@ -67,7 +67,7 @@ public class MergeTaskBaseTest @Test public void testInterval() { - Assert.assertEquals(new Interval("2012-01-03/2012-01-07"), testMergeTaskBase.getImplicitLockInterval().get()); + Assert.assertEquals(new Interval("2012-01-03/2012-01-07"), testMergeTaskBase.getInterval()); } @Test diff --git a/indexing-service/src/test/java/io/druid/indexing/common/task/TaskSerdeTest.java b/indexing-service/src/test/java/io/druid/indexing/common/task/TaskSerdeTest.java index 36946e196d7..77ae0af4b52 100644 --- a/indexing-service/src/test/java/io/druid/indexing/common/task/TaskSerdeTest.java +++ b/indexing-service/src/test/java/io/druid/indexing/common/task/TaskSerdeTest.java @@ -19,14 +19,15 @@ package io.druid.indexing.common.task; +import com.fasterxml.jackson.databind.Module; import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.base.Optional; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.metamx.common.Granularity; import io.druid.data.input.impl.JSONDataSpec; import io.druid.data.input.impl.TimestampSpec; import io.druid.granularity.QueryGranularity; +import io.druid.guice.FirehoseModule; import io.druid.indexer.HadoopDruidIndexerSchema; import io.druid.indexer.granularity.UniformGranularitySpec; import io.druid.indexer.rollup.DataRollupSpec; @@ -36,6 +37,7 @@ import io.druid.query.aggregation.CountAggregatorFactory; import io.druid.query.aggregation.DoubleSumAggregatorFactory; import io.druid.segment.IndexGranularity; import io.druid.segment.realtime.Schema; +import io.druid.segment.realtime.firehose.LocalFirehoseFactory; import io.druid.timeline.DataSegment; import io.druid.timeline.partition.NoneShardSpec; import junit.framework.Assert; @@ -43,12 +45,14 @@ import org.joda.time.Interval; import org.joda.time.Period; import org.junit.Test; +import java.io.File; + public class TaskSerdeTest { @Test public void testIndexTaskSerde() throws Exception { - final Task task = new IndexTask( + final IndexTask task = new IndexTask( null, "foo", new UniformGranularitySpec(Granularity.DAY, ImmutableList.of(new Interval("2010-01-01/P2D"))), @@ -56,62 +60,34 @@ public class TaskSerdeTest new AggregatorFactory[]{new DoubleSumAggregatorFactory("met", "met")}, QueryGranularity.NONE, 10000, - null, + new LocalFirehoseFactory(new File("lol"), "rofl", null), -1 ); final ObjectMapper jsonMapper = new DefaultObjectMapper(); + for (final Module jacksonModule : new FirehoseModule().getJacksonModules()) { + jsonMapper.registerModule(jacksonModule); + } final String json = jsonMapper.writeValueAsString(task); Thread.sleep(100); // Just want to run the clock a bit to make sure the task id doesn't change - final Task task2 = jsonMapper.readValue(json, Task.class); + final IndexTask task2 = (IndexTask) jsonMapper.readValue(json, Task.class); Assert.assertEquals("foo", task.getDataSource()); - Assert.assertEquals(Optional.of(new Interval("2010-01-01/P2D")), task.getImplicitLockInterval()); + Assert.assertEquals(new Interval("2010-01-01/P2D"), task.getInterval()); Assert.assertEquals(task.getId(), task2.getId()); Assert.assertEquals(task.getGroupId(), task2.getGroupId()); Assert.assertEquals(task.getDataSource(), task2.getDataSource()); - Assert.assertEquals(task.getImplicitLockInterval(), task2.getImplicitLockInterval()); - } - - @Test - public void testIndexGeneratorTaskSerde() throws Exception - { - final Task task = new IndexGeneratorTask( - null, - "foo", - new Interval("2010-01-01/P1D"), - null, - new Schema( - "foo", - null, - new AggregatorFactory[]{new DoubleSumAggregatorFactory("met", "met")}, - QueryGranularity.NONE, - new NoneShardSpec() - ), - -1 - ); - - final ObjectMapper jsonMapper = new DefaultObjectMapper(); - final String json = jsonMapper.writeValueAsString(task); - - Thread.sleep(100); // Just want to run the clock a bit to make sure the task id doesn't change - final Task task2 = jsonMapper.readValue(json, Task.class); - - Assert.assertEquals("foo", task.getDataSource()); - Assert.assertEquals(Optional.of(new Interval("2010-01-01/P1D")), task.getImplicitLockInterval()); - - Assert.assertEquals(task.getId(), task2.getId()); - Assert.assertEquals(task.getGroupId(), task2.getGroupId()); - Assert.assertEquals(task.getDataSource(), task2.getDataSource()); - Assert.assertEquals(task.getImplicitLockInterval(), task2.getImplicitLockInterval()); + Assert.assertEquals(task.getInterval(), task2.getInterval()); + Assert.assertTrue(task.getFirehoseFactory() instanceof LocalFirehoseFactory); + Assert.assertTrue(task2.getFirehoseFactory() instanceof LocalFirehoseFactory); } @Test public void testMergeTaskSerde() throws Exception { - final Task task = new MergeTask( + final MergeTask task = new MergeTask( null, "foo", ImmutableList.of( @@ -126,26 +102,26 @@ public class TaskSerdeTest final String json = jsonMapper.writeValueAsString(task); Thread.sleep(100); // Just want to run the clock a bit to make sure the task id doesn't change - final Task task2 = jsonMapper.readValue(json, Task.class); + final MergeTask task2 = (MergeTask) jsonMapper.readValue(json, Task.class); Assert.assertEquals("foo", task.getDataSource()); - Assert.assertEquals(Optional.of(new Interval("2010-01-01/P1D")), task.getImplicitLockInterval()); + Assert.assertEquals(new Interval("2010-01-01/P1D"), task.getInterval()); Assert.assertEquals(task.getId(), task2.getId()); Assert.assertEquals(task.getGroupId(), task2.getGroupId()); Assert.assertEquals(task.getDataSource(), task2.getDataSource()); - Assert.assertEquals(task.getImplicitLockInterval(), task2.getImplicitLockInterval()); - Assert.assertEquals(((MergeTask) task).getSegments(), ((MergeTask) task2).getSegments()); + Assert.assertEquals(task.getInterval(), task2.getInterval()); + Assert.assertEquals(task.getSegments(), task2.getSegments()); Assert.assertEquals( - ((MergeTask) task).getAggregators().get(0).getName(), - ((MergeTask) task2).getAggregators().get(0).getName() + task.getAggregators().get(0).getName(), + task2.getAggregators().get(0).getName() ); } @Test public void testKillTaskSerde() throws Exception { - final Task task = new KillTask( + final KillTask task = new KillTask( null, "foo", new Interval("2010-01-01/P1D") @@ -155,21 +131,21 @@ public class TaskSerdeTest final String json = jsonMapper.writeValueAsString(task); Thread.sleep(100); // Just want to run the clock a bit to make sure the task id doesn't change - final Task task2 = jsonMapper.readValue(json, Task.class); + final KillTask task2 = (KillTask) jsonMapper.readValue(json, Task.class); Assert.assertEquals("foo", task.getDataSource()); - Assert.assertEquals(Optional.of(new Interval("2010-01-01/P1D")), task.getImplicitLockInterval()); + Assert.assertEquals(new Interval("2010-01-01/P1D"), task.getInterval()); Assert.assertEquals(task.getId(), task2.getId()); Assert.assertEquals(task.getGroupId(), task2.getGroupId()); Assert.assertEquals(task.getDataSource(), task2.getDataSource()); - Assert.assertEquals(task.getImplicitLockInterval(), task2.getImplicitLockInterval()); + Assert.assertEquals(task.getInterval(), task2.getInterval()); } @Test public void testVersionConverterTaskSerde() throws Exception { - final Task task = VersionConverterTask.create( + final VersionConverterTask task = VersionConverterTask.create( DataSegment.builder().dataSource("foo").interval(new Interval("2010-01-01/P1D")).version("1234").build() ); @@ -177,22 +153,22 @@ public class TaskSerdeTest final String json = jsonMapper.writeValueAsString(task); Thread.sleep(100); // Just want to run the clock a bit to make sure the task id doesn't change - final Task task2 = jsonMapper.readValue(json, Task.class); + final VersionConverterTask task2 = (VersionConverterTask) jsonMapper.readValue(json, Task.class); Assert.assertEquals("foo", task.getDataSource()); - Assert.assertEquals(Optional.of(new Interval("2010-01-01/P1D")), task.getImplicitLockInterval()); + Assert.assertEquals(new Interval("2010-01-01/P1D"), task.getInterval()); Assert.assertEquals(task.getId(), task2.getId()); Assert.assertEquals(task.getGroupId(), task2.getGroupId()); Assert.assertEquals(task.getDataSource(), task2.getDataSource()); - Assert.assertEquals(task.getImplicitLockInterval(), task2.getImplicitLockInterval()); - Assert.assertEquals(((VersionConverterTask) task).getSegment(), ((VersionConverterTask) task).getSegment()); + Assert.assertEquals(task.getInterval(), task2.getInterval()); + Assert.assertEquals(task.getSegment(), task.getSegment()); } @Test public void testVersionConverterSubTaskSerde() throws Exception { - final Task task = new VersionConverterTask.SubTask( + final VersionConverterTask.SubTask task = new VersionConverterTask.SubTask( "myGroupId", DataSegment.builder().dataSource("foo").interval(new Interval("2010-01-01/P1D")).version("1234").build() ); @@ -201,26 +177,21 @@ public class TaskSerdeTest final String json = jsonMapper.writeValueAsString(task); Thread.sleep(100); // Just want to run the clock a bit to make sure the task id doesn't change - final Task task2 = jsonMapper.readValue(json, Task.class); + final VersionConverterTask.SubTask task2 = (VersionConverterTask.SubTask) jsonMapper.readValue(json, Task.class); Assert.assertEquals("foo", task.getDataSource()); - Assert.assertEquals(Optional.of(new Interval("2010-01-01/P1D")), task.getImplicitLockInterval()); Assert.assertEquals("myGroupId", task.getGroupId()); Assert.assertEquals(task.getId(), task2.getId()); Assert.assertEquals(task.getGroupId(), task2.getGroupId()); Assert.assertEquals(task.getDataSource(), task2.getDataSource()); - Assert.assertEquals(task.getImplicitLockInterval(), task2.getImplicitLockInterval()); - Assert.assertEquals( - ((VersionConverterTask.SubTask) task).getSegment(), - ((VersionConverterTask.SubTask) task).getSegment() - ); + Assert.assertEquals(task.getSegment(), task2.getSegment()); } @Test public void testRealtimeIndexTaskSerde() throws Exception { - final Task task = new RealtimeIndexTask( + final RealtimeIndexTask task = new RealtimeIndexTask( null, new TaskResource("rofl", 2), new Schema("foo", null, new AggregatorFactory[0], QueryGranularity.NONE, new NoneShardSpec()), @@ -235,32 +206,27 @@ public class TaskSerdeTest final String json = jsonMapper.writeValueAsString(task); Thread.sleep(100); // Just want to run the clock a bit to make sure the task id doesn't change - final Task task2 = jsonMapper.readValue(json, Task.class); + final RealtimeIndexTask task2 = (RealtimeIndexTask) jsonMapper.readValue(json, Task.class); Assert.assertEquals("foo", task.getDataSource()); - Assert.assertEquals(Optional.absent(), task.getImplicitLockInterval()); Assert.assertEquals(2, task.getTaskResource().getRequiredCapacity()); Assert.assertEquals("rofl", task.getTaskResource().getAvailabilityGroup()); - Assert.assertEquals(new Period("PT10M"), ((RealtimeIndexTask) task).getWindowPeriod()); - Assert.assertEquals(IndexGranularity.HOUR, ((RealtimeIndexTask) task).getSegmentGranularity()); + Assert.assertEquals(new Period("PT10M"), task.getWindowPeriod()); + Assert.assertEquals(IndexGranularity.HOUR, task.getSegmentGranularity()); Assert.assertEquals(task.getId(), task2.getId()); Assert.assertEquals(task.getGroupId(), task2.getGroupId()); Assert.assertEquals(task.getDataSource(), task2.getDataSource()); - Assert.assertEquals(task.getImplicitLockInterval(), task2.getImplicitLockInterval()); Assert.assertEquals(task.getTaskResource().getRequiredCapacity(), task2.getTaskResource().getRequiredCapacity()); Assert.assertEquals(task.getTaskResource().getAvailabilityGroup(), task2.getTaskResource().getAvailabilityGroup()); - Assert.assertEquals(((RealtimeIndexTask) task).getWindowPeriod(), ((RealtimeIndexTask) task2).getWindowPeriod()); - Assert.assertEquals( - ((RealtimeIndexTask) task).getSegmentGranularity(), - ((RealtimeIndexTask) task2).getSegmentGranularity() - ); + Assert.assertEquals(task.getWindowPeriod(), task2.getWindowPeriod()); + Assert.assertEquals(task.getSegmentGranularity(), task2.getSegmentGranularity()); } @Test public void testDeleteTaskSerde() throws Exception { - final Task task = new DeleteTask( + final DeleteTask task = new DeleteTask( null, "foo", new Interval("2010-01-01/P1D") @@ -270,46 +236,44 @@ public class TaskSerdeTest final String json = jsonMapper.writeValueAsString(task); Thread.sleep(100); // Just want to run the clock a bit to make sure the task id doesn't change - final Task task2 = jsonMapper.readValue(json, Task.class); + final DeleteTask task2 = (DeleteTask) jsonMapper.readValue(json, Task.class); Assert.assertEquals("foo", task.getDataSource()); - Assert.assertEquals(Optional.of(new Interval("2010-01-01/P1D")), task.getImplicitLockInterval()); + Assert.assertEquals(new Interval("2010-01-01/P1D"), task.getInterval()); Assert.assertEquals(task.getId(), task2.getId()); Assert.assertEquals(task.getGroupId(), task2.getGroupId()); Assert.assertEquals(task.getDataSource(), task2.getDataSource()); - Assert.assertEquals(task.getImplicitLockInterval(), task2.getImplicitLockInterval()); - Assert.assertEquals(task.getImplicitLockInterval().get(), task2.getImplicitLockInterval().get()); + Assert.assertEquals(task.getInterval(), task2.getInterval()); } @Test public void testDeleteTaskFromJson() throws Exception { final ObjectMapper jsonMapper = new DefaultObjectMapper(); - final Task task = jsonMapper.readValue( + final DeleteTask task = (DeleteTask) jsonMapper.readValue( "{\"type\":\"delete\",\"dataSource\":\"foo\",\"interval\":\"2010-01-01/P1D\"}", Task.class ); final String json = jsonMapper.writeValueAsString(task); Thread.sleep(100); // Just want to run the clock a bit to make sure the task id doesn't change - final Task task2 = jsonMapper.readValue(json, Task.class); + final DeleteTask task2 = (DeleteTask) jsonMapper.readValue(json, Task.class); Assert.assertNotNull(task.getId()); Assert.assertEquals("foo", task.getDataSource()); - Assert.assertEquals(Optional.of(new Interval("2010-01-01/P1D")), task.getImplicitLockInterval()); + Assert.assertEquals(new Interval("2010-01-01/P1D"), task.getInterval()); Assert.assertEquals(task.getId(), task2.getId()); Assert.assertEquals(task.getGroupId(), task2.getGroupId()); Assert.assertEquals(task.getDataSource(), task2.getDataSource()); - Assert.assertEquals(task.getImplicitLockInterval(), task2.getImplicitLockInterval()); - Assert.assertEquals(task.getImplicitLockInterval().get(), task2.getImplicitLockInterval().get()); + Assert.assertEquals(task.getInterval(), task2.getInterval()); } @Test public void testAppendTaskSerde() throws Exception { - final Task task = new AppendTask( + final AppendTask task = new AppendTask( null, "foo", ImmutableList.of( @@ -321,17 +285,67 @@ public class TaskSerdeTest final String json = jsonMapper.writeValueAsString(task); Thread.sleep(100); // Just want to run the clock a bit to make sure the task id doesn't change - final Task task2 = jsonMapper.readValue(json, Task.class); + final AppendTask task2 = (AppendTask) jsonMapper.readValue(json, Task.class); Assert.assertEquals("foo", task.getDataSource()); - Assert.assertEquals(Optional.of(new Interval("2010-01-01/P1D")), task.getImplicitLockInterval()); + Assert.assertEquals(new Interval("2010-01-01/P1D"), task.getInterval()); Assert.assertEquals(task.getId(), task2.getId()); Assert.assertEquals(task.getGroupId(), task2.getGroupId()); Assert.assertEquals(task.getDataSource(), task2.getDataSource()); - Assert.assertEquals(task.getImplicitLockInterval(), task2.getImplicitLockInterval()); - Assert.assertEquals(task.getImplicitLockInterval().get(), task2.getImplicitLockInterval().get()); - Assert.assertEquals(((AppendTask) task).getSegments(), ((AppendTask) task2).getSegments()); + Assert.assertEquals(task.getInterval(), task2.getInterval()); + Assert.assertEquals(task.getSegments(), task2.getSegments()); + } + + @Test + public void testArchiveTaskSerde() throws Exception + { + final ArchiveTask task = new ArchiveTask( + null, + "foo", + new Interval("2010-01-01/P1D") + ); + + final ObjectMapper jsonMapper = new DefaultObjectMapper(); + final String json = jsonMapper.writeValueAsString(task); + + Thread.sleep(100); // Just want to run the clock a bit to make sure the task id doesn't change + final ArchiveTask task2 = (ArchiveTask) jsonMapper.readValue(json, Task.class); + + Assert.assertEquals("foo", task.getDataSource()); + Assert.assertEquals(new Interval("2010-01-01/P1D"), task.getInterval()); + + Assert.assertEquals(task.getId(), task2.getId()); + Assert.assertEquals(task.getGroupId(), task2.getGroupId()); + Assert.assertEquals(task.getDataSource(), task2.getDataSource()); + Assert.assertEquals(task.getInterval(), task2.getInterval()); + } + + @Test + public void testMoveTaskSerde() throws Exception + { + final MoveTask task = new MoveTask( + null, + "foo", + new Interval("2010-01-01/P1D"), + ImmutableMap.of("bucket", "hey", "baseKey", "what") + ); + + final ObjectMapper jsonMapper = new DefaultObjectMapper(); + final String json = jsonMapper.writeValueAsString(task); + + Thread.sleep(100); // Just want to run the clock a bit to make sure the task id doesn't change + final MoveTask task2 = (MoveTask) jsonMapper.readValue(json, Task.class); + + Assert.assertEquals("foo", task.getDataSource()); + Assert.assertEquals(new Interval("2010-01-01/P1D"), task.getInterval()); + Assert.assertEquals(ImmutableMap.of("bucket", "hey", "baseKey", "what"), task.getTargetLoadSpec()); + + Assert.assertEquals(task.getId(), task2.getId()); + Assert.assertEquals(task.getGroupId(), task2.getGroupId()); + Assert.assertEquals(task.getDataSource(), task2.getDataSource()); + Assert.assertEquals(task.getInterval(), task2.getInterval()); + Assert.assertEquals(task.getTargetLoadSpec(), task2.getTargetLoadSpec()); } @Test @@ -364,14 +378,14 @@ public class TaskSerdeTest final ObjectMapper jsonMapper = new DefaultObjectMapper(); final String json = jsonMapper.writeValueAsString(task); - final Task task2 = jsonMapper.readValue(json, Task.class); + final HadoopIndexTask task2 = (HadoopIndexTask) jsonMapper.readValue(json, Task.class); Assert.assertEquals("foo", task.getDataSource()); - Assert.assertEquals(Optional.of(new Interval("2010-01-01/P1D")), task.getImplicitLockInterval()); + Assert.assertEquals(new Interval("2010-01-01/P1D"), task.getInterval()); Assert.assertEquals(task.getId(), task2.getId()); Assert.assertEquals(task.getGroupId(), task2.getGroupId()); Assert.assertEquals(task.getDataSource(), task2.getDataSource()); - Assert.assertEquals(task.getImplicitLockInterval(), task2.getImplicitLockInterval()); + Assert.assertEquals(task.getInterval(), task2.getInterval()); } } diff --git a/indexing-service/src/test/java/io/druid/indexing/overlord/RealtimeishTask.java b/indexing-service/src/test/java/io/druid/indexing/overlord/RealtimeishTask.java index b6f93f04704..e4ec6d54aa1 100644 --- a/indexing-service/src/test/java/io/druid/indexing/overlord/RealtimeishTask.java +++ b/indexing-service/src/test/java/io/druid/indexing/overlord/RealtimeishTask.java @@ -28,6 +28,7 @@ import io.druid.indexing.common.actions.LockAcquireAction; import io.druid.indexing.common.actions.LockListAction; import io.druid.indexing.common.actions.LockReleaseAction; import io.druid.indexing.common.actions.SegmentInsertAction; +import io.druid.indexing.common.actions.TaskActionClient; import io.druid.indexing.common.task.AbstractTask; import io.druid.indexing.common.task.TaskResource; import io.druid.timeline.DataSegment; @@ -42,12 +43,12 @@ public class RealtimeishTask extends AbstractTask { public RealtimeishTask() { - super("rt1", "rt", new TaskResource("rt1", 1), "foo", null); + super("rt1", "rt", new TaskResource("rt1", 1), "foo"); } - public RealtimeishTask(String id, String groupId, TaskResource taskResource, String dataSource, Interval interval) + public RealtimeishTask(String id, String groupId, TaskResource taskResource, String dataSource) { - super(id, groupId, taskResource, dataSource, interval); + super(id, groupId, taskResource, dataSource); } @Override @@ -56,6 +57,12 @@ public class RealtimeishTask extends AbstractTask return "realtime_test"; } + @Override + public boolean isReady(TaskActionClient taskActionClient) throws Exception + { + return true; + } + @Override public TaskStatus run(TaskToolbox toolbox) throws Exception { diff --git a/indexing-service/src/test/java/io/druid/indexing/overlord/RemoteTaskRunnerTest.java b/indexing-service/src/test/java/io/druid/indexing/overlord/RemoteTaskRunnerTest.java index f368e69758b..fcf9715fe62 100644 --- a/indexing-service/src/test/java/io/druid/indexing/overlord/RemoteTaskRunnerTest.java +++ b/indexing-service/src/test/java/io/druid/indexing/overlord/RemoteTaskRunnerTest.java @@ -23,8 +23,8 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.api.client.repackaged.com.google.common.base.Throwables; import com.google.common.base.Function; import com.google.common.base.Joiner; +import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; -import com.google.common.collect.Lists; import com.google.common.collect.Sets; import com.google.common.util.concurrent.ListenableFuture; import com.metamx.emitter.EmittingLogger; @@ -55,7 +55,6 @@ import org.junit.Assert; import org.junit.Before; import org.junit.Test; -import java.util.Arrays; import java.util.Set; import java.util.concurrent.Future; import java.util.concurrent.atomic.AtomicReference; @@ -219,7 +218,7 @@ public class RemoteTaskRunnerTest ) ); - Assert.assertTrue(remoteTaskRunner.getPendingTasks().iterator().next().getTask().getId().equals("rt2")); + Assert.assertTrue(remoteTaskRunner.getPendingTasks().iterator().next().getTaskId().equals("rt2")); } @Test @@ -266,7 +265,7 @@ public class RemoteTaskRunnerTest ) ); - Assert.assertTrue(remoteTaskRunner.getPendingTasks().iterator().next().getTask().getId().equals("rt2")); + Assert.assertTrue(remoteTaskRunner.getPendingTasks().iterator().next().getTaskId().equals("rt2")); } @Test @@ -280,7 +279,7 @@ public class RemoteTaskRunnerTest Assert.assertTrue(workerRunningTask(task.getId())); - Assert.assertTrue(remoteTaskRunner.getRunningTasks().iterator().next().getTask().getId().equals("task")); + Assert.assertTrue(remoteTaskRunner.getRunningTasks().iterator().next().getTaskId().equals("task")); cf.delete().forPath(joiner.join(statusPath, task.getId())); @@ -303,18 +302,13 @@ public class RemoteTaskRunnerTest doSetup(); - Set existingTasks = Sets.newHashSet(); + final Set existingTasks = Sets.newHashSet(); for (ZkWorker zkWorker : remoteTaskRunner.getWorkers()) { existingTasks.addAll(zkWorker.getRunningTasks().keySet()); } + Assert.assertEquals("existingTasks", ImmutableSet.of("first", "second"), existingTasks); - Assert.assertTrue(existingTasks.size() == 2); - Assert.assertTrue(existingTasks.contains("first")); - Assert.assertTrue(existingTasks.contains("second")); - - remoteTaskRunner.bootstrap(Arrays.asList(TestMergeTask.createDummyTask("second"))); - - Set runningTasks = Sets.newHashSet( + final Set runningTasks = Sets.newHashSet( Iterables.transform( remoteTaskRunner.getRunningTasks(), new Function() @@ -322,15 +316,12 @@ public class RemoteTaskRunnerTest @Override public String apply(RemoteTaskRunnerWorkItem input) { - return input.getTask().getId(); + return input.getTaskId(); } } ) ); - - Assert.assertTrue(runningTasks.size() == 1); - Assert.assertTrue(runningTasks.contains("second")); - Assert.assertFalse(runningTasks.contains("first")); + Assert.assertEquals("runningTasks", ImmutableSet.of("first", "second"), runningTasks); } @Test @@ -343,8 +334,6 @@ public class RemoteTaskRunnerTest doSetup(); - remoteTaskRunner.bootstrap(Arrays.asList(task)); - ListenableFuture future = remoteTaskRunner.run(task); TaskStatus status = future.get(); @@ -356,7 +345,6 @@ public class RemoteTaskRunnerTest public void testWorkerRemoved() throws Exception { doSetup(); - remoteTaskRunner.bootstrap(Lists.newArrayList()); Future future = remoteTaskRunner.run(task); Assert.assertTrue(taskAnnounced(task.getId())); diff --git a/indexing-service/src/test/java/io/druid/indexing/overlord/TaskLifecycleTest.java b/indexing-service/src/test/java/io/druid/indexing/overlord/TaskLifecycleTest.java index 0b3a4e9ed97..85637d75c51 100644 --- a/indexing-service/src/test/java/io/druid/indexing/overlord/TaskLifecycleTest.java +++ b/indexing-service/src/test/java/io/druid/indexing/overlord/TaskLifecycleTest.java @@ -19,6 +19,7 @@ package io.druid.indexing.overlord; +import com.google.api.client.repackaged.com.google.common.base.Preconditions; import com.google.common.base.Optional; import com.google.common.base.Throwables; import com.google.common.collect.ImmutableList; @@ -53,16 +54,19 @@ import io.druid.indexing.common.actions.SegmentInsertAction; import io.druid.indexing.common.actions.TaskActionClientFactory; import io.druid.indexing.common.actions.TaskActionToolbox; import io.druid.indexing.common.config.TaskConfig; -import io.druid.indexing.common.task.AbstractTask; +import io.druid.indexing.common.config.TaskStorageConfig; +import io.druid.indexing.common.task.AbstractFixedIntervalTask; import io.druid.indexing.common.task.IndexTask; import io.druid.indexing.common.task.KillTask; import io.druid.indexing.common.task.Task; import io.druid.indexing.common.task.TaskResource; -import io.druid.indexing.overlord.exec.TaskConsumer; +import io.druid.indexing.overlord.config.TaskQueueConfig; import io.druid.jackson.DefaultObjectMapper; import io.druid.query.aggregation.AggregatorFactory; import io.druid.query.aggregation.DoubleSumAggregatorFactory; +import io.druid.segment.loading.DataSegmentArchiver; import io.druid.segment.loading.DataSegmentKiller; +import io.druid.segment.loading.DataSegmentMover; import io.druid.segment.loading.DataSegmentPuller; import io.druid.segment.loading.DataSegmentPusher; import io.druid.segment.loading.LocalDataSegmentPuller; @@ -84,6 +88,7 @@ import java.io.File; import java.io.IOException; import java.util.Iterator; import java.util.List; +import java.util.Map; import java.util.Set; public class TaskLifecycleTest @@ -96,7 +101,6 @@ public class TaskLifecycleTest private MockIndexerDBCoordinator mdc = null; private TaskActionClientFactory tac = null; private TaskToolboxFactory tb = null; - private TaskConsumer tc = null; TaskStorageQueryAdapter tsqa = null; private static final Ordering byIntervalOrdering = new Ordering() @@ -109,18 +113,26 @@ public class TaskLifecycleTest }; @Before - public void setUp() + public void setUp() throws Exception { - EmittingLogger.registerEmitter(EasyMock.createMock(ServiceEmitter.class)); + final ServiceEmitter emitter = EasyMock.createMock(ServiceEmitter.class); + EmittingLogger.registerEmitter(emitter); tmp = Files.createTempDir(); - ts = new HeapMemoryTaskStorage(); + final TaskQueueConfig tqc = new DefaultObjectMapper().readValue( + "{\"startDelay\":\"PT0S\", \"restartDelay\":\"PT1S\"}", + TaskQueueConfig.class + ); + ts = new HeapMemoryTaskStorage( + new TaskStorageConfig() + { + } + ); + tsqa = new TaskStorageQueryAdapter(ts); tl = new TaskLockbox(ts); - tq = new TaskQueue(ts, tl); mdc = newMockMDC(); - tac = new LocalTaskActionClientFactory(ts, new TaskActionToolbox(tq, tl, mdc, newMockEmitter())); - + tac = new LocalTaskActionClientFactory(ts, new TaskActionToolbox(tl, mdc, newMockEmitter())); tb = new TaskToolboxFactory( new TaskConfig(tmp.toString(), null, null, 50000), tac, @@ -147,6 +159,22 @@ public class TaskLifecycleTest } }, + new DataSegmentMover() + { + @Override + public DataSegment move(DataSegment dataSegment, Map targetLoadSpec) throws SegmentLoadingException + { + return dataSegment; + } + }, + new DataSegmentArchiver() + { + @Override + public DataSegment archive(DataSegment segment) throws SegmentLoadingException + { + return segment; + } + }, null, // segment announcer null, // new segment server view null, // query runner factory conglomerate corporation unionized collective @@ -171,14 +199,9 @@ public class TaskLifecycleTest ), new DefaultObjectMapper() ); - tr = new ThreadPoolTaskRunner(tb); - - tc = new TaskConsumer(tq, tr, tac, newMockEmitter()); - tsqa = new TaskStorageQueryAdapter(ts); - + tq = new TaskQueue(tqc, ts, tr, tac, tl, emitter); tq.start(); - tc.start(); } @After @@ -190,7 +213,6 @@ public class TaskLifecycleTest catch (Exception e) { // suppress } - tc.stop(); tq.stop(); } @@ -216,13 +238,13 @@ public class TaskLifecycleTest -1 ); - final Optional preRunTaskStatus = tsqa.getSameGroupMergedStatus(indexTask.getId()); + final Optional preRunTaskStatus = tsqa.getStatus(indexTask.getId()); Assert.assertTrue("pre run task status not present", !preRunTaskStatus.isPresent()); final TaskStatus mergedStatus = runTask(indexTask); final TaskStatus status = ts.getStatus(indexTask.getId()).get(); final List publishedSegments = byIntervalOrdering.sortedCopy(mdc.getPublished()); - final List loggedSegments = byIntervalOrdering.sortedCopy(tsqa.getSameGroupNewSegments(indexTask.getId())); + final List loggedSegments = byIntervalOrdering.sortedCopy(tsqa.getInsertedSegments(indexTask.getId())); Assert.assertEquals("statusCode", TaskStatus.Status.SUCCESS, status.getStatusCode()); Assert.assertEquals("merged statusCode", TaskStatus.Status.SUCCESS, mergedStatus.getStatusCode()); @@ -264,11 +286,9 @@ public class TaskLifecycleTest -1 ); - final TaskStatus mergedStatus = runTask(indexTask); - final TaskStatus status = ts.getStatus(indexTask.getId()).get(); + final TaskStatus status = runTask(indexTask); - Assert.assertEquals("statusCode", TaskStatus.Status.SUCCESS, status.getStatusCode()); - Assert.assertEquals("merged statusCode", TaskStatus.Status.FAILED, mergedStatus.getStatusCode()); + Assert.assertEquals("statusCode", TaskStatus.Status.FAILED, status.getStatusCode()); Assert.assertEquals("num segments published", 0, mdc.getPublished().size()); Assert.assertEquals("num segments nuked", 0, mdc.getNuked().size()); } @@ -297,10 +317,44 @@ public class TaskLifecycleTest Assert.assertEquals("num segments nuked", 0, mdc.getNuked().size()); } + @Test + public void testNoopTask() throws Exception + { + final Task noopTask = new DefaultObjectMapper().readValue( + "{\"type\":\"noop\", \"runTime\":\"100\"}\"", + Task.class + ); + final TaskStatus status = runTask(noopTask); + + Assert.assertEquals("statusCode", TaskStatus.Status.SUCCESS, status.getStatusCode()); + Assert.assertEquals("num segments published", 0, mdc.getPublished().size()); + Assert.assertEquals("num segments nuked", 0, mdc.getNuked().size()); + } + + @Test + public void testNeverReadyTask() throws Exception + { + final Task neverReadyTask = new DefaultObjectMapper().readValue( + "{\"type\":\"noop\", \"isReadyResult\":\"exception\"}\"", + Task.class + ); + final TaskStatus status = runTask(neverReadyTask); + + Assert.assertEquals("statusCode", TaskStatus.Status.FAILED, status.getStatusCode()); + Assert.assertEquals("num segments published", 0, mdc.getPublished().size()); + Assert.assertEquals("num segments nuked", 0, mdc.getNuked().size()); + } + @Test public void testSimple() throws Exception { - final Task task = new AbstractTask("id1", "id1", new TaskResource("id1", 1), "ds", new Interval("2012-01-01/P1D")) + final Task task = new AbstractFixedIntervalTask( + "id1", + "id1", + new TaskResource("id1", 1), + "ds", + new Interval("2012-01-01/P1D") + ) { @Override public String getType() @@ -337,7 +391,7 @@ public class TaskLifecycleTest @Test public void testBadInterval() throws Exception { - final Task task = new AbstractTask("id1", "id1", "ds", new Interval("2012-01-01/P1D")) + final Task task = new AbstractFixedIntervalTask("id1", "id1", "ds", new Interval("2012-01-01/P1D")) { @Override public String getType() @@ -371,7 +425,7 @@ public class TaskLifecycleTest @Test public void testBadVersion() throws Exception { - final Task task = new AbstractTask("id1", "id1", "ds", new Interval("2012-01-01/P1D")) + final Task task = new AbstractFixedIntervalTask("id1", "id1", "ds", new Interval("2012-01-01/P1D")) { @Override public String getType() @@ -402,28 +456,41 @@ public class TaskLifecycleTest Assert.assertEquals("segments nuked", 0, mdc.getNuked().size()); } - private TaskStatus runTask(Task task) + private TaskStatus runTask(final Task task) throws Exception { + final Task dummyTask = new DefaultObjectMapper().readValue( + "{\"type\":\"noop\", \"isReadyResult\":\"exception\"}\"", + Task.class + ); final long startTime = System.currentTimeMillis(); + Preconditions.checkArgument(!task.getId().equals(dummyTask.getId())); + + tq.add(dummyTask); tq.add(task); - TaskStatus status; + TaskStatus retVal = null; - try { - while ((status = tsqa.getSameGroupMergedStatus(task.getId()).get()).isRunnable()) { - if (System.currentTimeMillis() > startTime + 10 * 1000) { - throw new ISE("Where did the task go?!: %s", task.getId()); + for (final String taskId : ImmutableList.of(dummyTask.getId(), task.getId())) { + try { + TaskStatus status; + while ((status = tsqa.getStatus(taskId).get()).isRunnable()) { + if (System.currentTimeMillis() > startTime + 10 * 1000) { + throw new ISE("Where did the task go?!: %s", task.getId()); + } + + Thread.sleep(100); } - - Thread.sleep(100); + if (taskId.equals(task.getId())) { + retVal = status; + } + } + catch (Exception e) { + throw Throwables.propagate(e); } } - catch (Exception e) { - throw Throwables.propagate(e); - } - return status; + return retVal; } private static class MockIndexerDBCoordinator extends IndexerDBCoordinator diff --git a/indexing-service/src/test/java/io/druid/indexing/overlord/TaskQueueTest.java b/indexing-service/src/test/java/io/druid/indexing/overlord/TaskQueueTest.java deleted file mode 100644 index 2b1d0e560c1..00000000000 --- a/indexing-service/src/test/java/io/druid/indexing/overlord/TaskQueueTest.java +++ /dev/null @@ -1,438 +0,0 @@ -/* - * Druid - a distributed column store. - * Copyright (C) 2012, 2013 Metamarkets Group Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -package io.druid.indexing.overlord; - -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Iterables; -import com.google.common.collect.Lists; -import com.google.common.collect.Sets; -import io.druid.indexing.common.SegmentLoaderFactory; -import io.druid.indexing.common.TaskLock; -import io.druid.indexing.common.TaskStatus; -import io.druid.indexing.common.TaskToolbox; -import io.druid.indexing.common.TaskToolboxFactory; -import io.druid.indexing.common.actions.LocalTaskActionClientFactory; -import io.druid.indexing.common.actions.SpawnTasksAction; -import io.druid.indexing.common.actions.TaskActionToolbox; -import io.druid.indexing.common.config.TaskConfig; -import io.druid.indexing.common.task.AbstractTask; -import io.druid.indexing.common.task.Task; -import io.druid.segment.loading.DataSegmentPuller; -import io.druid.segment.loading.LocalDataSegmentPuller; -import io.druid.segment.loading.OmniSegmentLoader; -import io.druid.segment.loading.SegmentLoaderConfig; -import io.druid.segment.loading.StorageLocationConfig; -import org.joda.time.Interval; -import org.junit.Assert; -import org.junit.Test; - -import java.util.List; -import java.util.Set; - -public class TaskQueueTest -{ - @Test - public void testEmptyQueue() throws Exception - { - final TaskStorage ts = new HeapMemoryTaskStorage(); - final TaskLockbox tl = new TaskLockbox(ts); - final TaskQueue tq = newTaskQueue(ts, tl); - - // get task status for nonexistent task - Assert.assertFalse("getStatus", ts.getStatus("foo").isPresent()); - - // poll on empty queue - Assert.assertNull("poll", tq.poll()); - } - - public static TaskQueue newTaskQueue(TaskStorage storage, TaskLockbox lockbox) - { - final TaskQueue tq = new TaskQueue(storage, lockbox); - tq.bootstrap(); - tq.start(); - return tq; - } - - @Test - public void testAddRemove() throws Exception - { - final TaskStorage ts = new HeapMemoryTaskStorage(); - final TaskLockbox tl = new TaskLockbox(ts); - final TaskQueue tq = newTaskQueue(ts, tl); - - final Task[] tasks = { - newTask("T0", "G0", "bar", new Interval("2011/P1Y")), - newTask("T1", "G1", "bar", new Interval("2011-03-01/P1D")), - newTask("T2", "G2", "foo", new Interval("2011-03-01/P1D")), - newTask("T3", "G3", "foo", new Interval("2011/P1Y")), - newTask("T4", "G4", "foo", new Interval("2012-01-02/P1D")), - newTask("T5", "G5", "foo", new Interval("2012-02-01/PT1H")) - }; - - Throwable thrown; - - for (Task task : tasks) { - tq.add(task); - } - - // get task status for in-progress task - Assert.assertEquals( - "T2 status (before finishing)", - TaskStatus.Status.RUNNING, - ts.getStatus(tasks[2].getId()).get().getStatusCode() - ); - - // Can't add tasks with the same id - thrown = null; - try { - tq.add(newTask("T5", "G5", "baz", new Interval("2013-02-01/PT1H"))); - } - catch (TaskExistsException e) { - thrown = e; - } - - Assert.assertNotNull("Exception on duplicate task id", thrown); - - // take max number of tasks - final List taken = Lists.newArrayList(); - while (true) { - final Task task = tq.poll(); - if (task != null) { - taken.add(task); - } else { - break; - } - } - - // check them - Assert.assertEquals( - "Taken tasks (round 1)", - Lists.newArrayList( - tasks[0], tasks[2], tasks[4], tasks[5] - ), - taken - ); - - // mark one done - tq.notify(tasks[2], tasks[2].run(null)); - - // get its status back - Assert.assertEquals( - "T2 status (after finishing)", - TaskStatus.Status.SUCCESS, - ts.getStatus(tasks[2].getId()).get().getStatusCode() - ); - - // We should be able to get one more task now - taken.clear(); - while (true) { - final Task task = tq.poll(); - if (task != null) { - taken.add(task); - } else { - break; - } - } - - // check it - Assert.assertEquals( - "Taken tasks (round 2)", - Lists.newArrayList( - tasks[3] - ), - taken - ); - - // there should be no more tasks to get - Assert.assertNull("poll queue with no tasks available", tq.poll()); - } - - @Test - public void testContinues() throws Exception - { - final TaskStorage ts = new HeapMemoryTaskStorage(); - final TaskLockbox tl = new TaskLockbox(ts); - final TaskQueue tq = newTaskQueue(ts, tl); - final TaskToolboxFactory tb = new TaskToolboxFactory( - new TaskConfig(null, null, null, null), - new LocalTaskActionClientFactory(ts, new TaskActionToolbox(tq, tl, null, null)), - null, - null, - null, - null, - null, - null, - null, - null, - new SegmentLoaderFactory( - new OmniSegmentLoader( - ImmutableMap.of( - "local", - new LocalDataSegmentPuller() - ), - null, - new SegmentLoaderConfig() - { - @Override - public List getLocations() - { - return Lists.newArrayList(); - } - } - ) - ), - null - ); - - final Task t0 = newTask("T0", "G0", "bar", new Interval("2011/P1Y")); - final Task t1 = newContinuedTask("T1", "G1", "bar", new Interval("2013/P1Y"), Lists.newArrayList(t0)); - tq.add(t1); - - Assert.assertTrue("T0 isPresent (#1)", !ts.getStatus("T0").isPresent()); - Assert.assertTrue("T1 isPresent (#1)", ts.getStatus("T1").isPresent()); - Assert.assertTrue("T1 isRunnable (#1)", ts.getStatus("T1").get().isRunnable()); - Assert.assertTrue("T1 isComplete (#1)", !ts.getStatus("T1").get().isComplete()); - - // should be able to get t1 out - Assert.assertEquals("poll #1", "T1", tq.poll().getId()); - Assert.assertNull("poll #2", tq.poll()); - - // report T1 done. Should cause T0 to be created - tq.notify(t1, t1.run(tb.build(t1))); - - Assert.assertTrue("T0 isPresent (#2)", ts.getStatus("T0").isPresent()); - Assert.assertTrue("T0 isRunnable (#2)", ts.getStatus("T0").get().isRunnable()); - Assert.assertTrue("T0 isComplete (#2)", !ts.getStatus("T0").get().isComplete()); - Assert.assertTrue("T1 isPresent (#2)", ts.getStatus("T1").isPresent()); - Assert.assertTrue("T1 isRunnable (#2)", !ts.getStatus("T1").get().isRunnable()); - Assert.assertTrue("T1 isComplete (#2)", ts.getStatus("T1").get().isComplete()); - - // should be able to get t0 out - Assert.assertEquals("poll #3", "T0", tq.poll().getId()); - Assert.assertNull("poll #4", tq.poll()); - - // report T0 done. Should cause T0, T1 to be marked complete - tq.notify(t0, t0.run(tb.build(t0))); - - Assert.assertTrue("T0 isPresent (#3)", ts.getStatus("T0").isPresent()); - Assert.assertTrue("T0 isRunnable (#3)", !ts.getStatus("T0").get().isRunnable()); - Assert.assertTrue("T0 isComplete (#3)", ts.getStatus("T0").get().isComplete()); - Assert.assertTrue("T1 isPresent (#3)", ts.getStatus("T1").isPresent()); - Assert.assertTrue("T1 isRunnable (#3)", !ts.getStatus("T1").get().isRunnable()); - Assert.assertTrue("T1 isComplete (#3)", ts.getStatus("T1").get().isComplete()); - - // should be no more events available for polling - Assert.assertNull("poll #5", tq.poll()); - } - - @Test - public void testConcurrency() throws Exception - { - final TaskStorage ts = new HeapMemoryTaskStorage(); - final TaskLockbox tl = new TaskLockbox(ts); - final TaskQueue tq = newTaskQueue(ts, tl); - final TaskToolboxFactory tb = new TaskToolboxFactory( - new TaskConfig(null, null, null, null), - new LocalTaskActionClientFactory(ts, new TaskActionToolbox(tq, tl, null, null)), - null, - null, - null, - null, - null, - null, - null, - null, - new SegmentLoaderFactory( - new OmniSegmentLoader( - ImmutableMap.of( - "local", - new LocalDataSegmentPuller() - ), - null, - new SegmentLoaderConfig() - { - @Override - public List getLocations() - { - return Lists.newArrayList(); - } - } - ) - ), - null - ); - - // Imagine a larger task that splits itself up into pieces - final Task t1 = newTask("T1", "G0", "bar", new Interval("2011-01-01/P1D")); - final Task t2 = newTask("T2", "G1", "bar", new Interval("2011-01-02/P1D")); // Task group different from original - final Task t3 = newTask("T3", "G0", "bar", new Interval("2011-01-03/P1D")); - final Task t4 = newTask("T4", "G0", "bar", new Interval("2011-01-02/P5D")); // Interval wider than original - final Task t0 = newContinuedTask( - "T0", - "G0", - "bar", - new Interval("2011-01-01/P3D"), - ImmutableList.of(t1, t2, t3, t4) - ); - - tq.add(t0); - - final Task wt0 = tq.poll(); - final TaskLock wt0Lock = Iterables.getOnlyElement(tl.findLocksForTask(wt0)); - Assert.assertEquals("wt0 task id", "T0", wt0.getId()); - Assert.assertNull("null poll #1", tq.poll()); - - // Sleep a bit to avoid false test passes - Thread.sleep(5); - - // Finish t0 - tq.notify(t0, t0.run(tb.build(t0))); - - // take max number of tasks - final Set taken = Sets.newHashSet(); - while (true) { - - // Sleep a bit to avoid false test passes - Thread.sleep(5); - - final Task task = tq.poll(); - - if (task != null) { - final TaskLock taskLock = Iterables.getOnlyElement(tl.findLocksForTask(task)); - Assert.assertEquals( - String.format("%s version", task.getId()), - wt0Lock.getVersion(), - taskLock.getVersion() - ); - taken.add(task.getId()); - } else { - break; - } - - } - - Assert.assertEquals("taken", Sets.newHashSet("T1", "T3"), taken); - - // Finish t1 - tq.notify(t1, t1.run(null)); - Assert.assertNull("null poll #2", tq.poll()); - - // Finish t3 - tq.notify(t3, t3.run(tb.build(t3))); - - // We should be able to get t2 now - final Task wt2 = tq.poll(); - final TaskLock wt2Lock = Iterables.getOnlyElement(tl.findLocksForTask(wt2)); - Assert.assertEquals("wt2 task id", "T2", wt2.getId()); - Assert.assertEquals("wt2 group id", "G1", wt2.getGroupId()); - Assert.assertNotSame("wt2 version", wt0Lock.getVersion(), wt2Lock.getVersion()); - Assert.assertNull("null poll #3", tq.poll()); - - // Finish t2 - tq.notify(t2, t2.run(tb.build(t2))); - - // We should be able to get t4 - // And it should be in group G0, but that group should have a different version than last time - // (Since the previous transaction named "G0" has ended and transaction names are not necessarily tied to - // one version if they end and are re-started) - final Task wt4 = tq.poll(); - final TaskLock wt4Lock = Iterables.getOnlyElement(tl.findLocksForTask(wt4)); - Assert.assertEquals("wt4 task id", "T4", wt4.getId()); - Assert.assertEquals("wt4 group id", "G0", wt4.getGroupId()); - Assert.assertNotSame("wt4 version", wt0Lock.getVersion(), wt4Lock.getVersion()); - Assert.assertNotSame("wt4 version", wt2Lock.getVersion(), wt4Lock.getVersion()); - - // Kind of done testing at this point, but let's finish t4 anyway - tq.notify(t4, t4.run(tb.build(t4))); - Assert.assertNull("null poll #4", tq.poll()); - } - - @Test - public void testBootstrap() throws Exception - { - final TaskStorage storage = new HeapMemoryTaskStorage(); - final TaskLockbox lockbox = new TaskLockbox(storage); - - storage.insert(newTask("T1", "G1", "bar", new Interval("2011-01-01/P1D")), TaskStatus.running("T1")); - storage.insert(newTask("T2", "G2", "bar", new Interval("2011-02-01/P1D")), TaskStatus.running("T2")); - storage.addLock("T1", new TaskLock("G1", "bar", new Interval("2011-01-01/P1D"), "1234")); - - final TaskQueue tq = newTaskQueue(storage, lockbox); - - final Task vt1 = tq.poll(); - final TaskLock vt1Lock = Iterables.getOnlyElement(lockbox.findLocksForTask(vt1)); - Assert.assertEquals("vt1 id", "T1", vt1.getId()); - Assert.assertEquals("vt1 version", "1234", vt1Lock.getVersion()); - - tq.notify(vt1, TaskStatus.success("T1")); - - // re-bootstrap - tq.stop(); - storage.setStatus(TaskStatus.failure("T2")); - tq.bootstrap(); - tq.start(); - - Assert.assertNull("null poll", tq.poll()); - } - - private static Task newTask(final String id, final String groupId, final String dataSource, final Interval interval) - { - return new AbstractTask(id, groupId, dataSource, interval) - { - @Override - public TaskStatus run(TaskToolbox toolbox) throws Exception - { - return TaskStatus.success(id); - } - - @Override - public String getType() - { - return "null"; - } - }; - } - - private static Task newContinuedTask( - final String id, - final String groupId, - final String dataSource, - final Interval interval, - final List nextTasks - ) - { - return new AbstractTask(id, groupId, dataSource, interval) - { - @Override - public String getType() - { - return "null"; - } - - @Override - public TaskStatus run(TaskToolbox toolbox) throws Exception - { - toolbox.getTaskActionClient().submit(new SpawnTasksAction(nextTasks)); - return TaskStatus.success(id); - } - }; - } -} diff --git a/indexing-service/src/test/java/io/druid/indexing/overlord/scaling/EC2AutoScalingStrategyTest.java b/indexing-service/src/test/java/io/druid/indexing/overlord/scaling/EC2AutoScalingStrategyTest.java index d0b5edb5ca1..e6cd52c80ac 100644 --- a/indexing-service/src/test/java/io/druid/indexing/overlord/scaling/EC2AutoScalingStrategyTest.java +++ b/indexing-service/src/test/java/io/druid/indexing/overlord/scaling/EC2AutoScalingStrategyTest.java @@ -126,13 +126,11 @@ public class EC2AutoScalingStrategyTest AutoScalingData created = strategy.provision(); Assert.assertEquals(created.getNodeIds().size(), 1); - Assert.assertEquals(created.getNodes().size(), 1); Assert.assertEquals("theInstance", created.getNodeIds().get(0)); AutoScalingData deleted = strategy.terminate(Arrays.asList("dummyIP")); Assert.assertEquals(deleted.getNodeIds().size(), 1); - Assert.assertEquals(deleted.getNodes().size(), 1); Assert.assertEquals(String.format("%s:8080", IP), deleted.getNodeIds().get(0)); } } diff --git a/indexing-service/src/test/java/io/druid/indexing/overlord/scaling/SimpleResourceManagementStrategyTest.java b/indexing-service/src/test/java/io/druid/indexing/overlord/scaling/SimpleResourceManagementStrategyTest.java index 02ac9a21778..6ffc6ae6222 100644 --- a/indexing-service/src/test/java/io/druid/indexing/overlord/scaling/SimpleResourceManagementStrategyTest.java +++ b/indexing-service/src/test/java/io/druid/indexing/overlord/scaling/SimpleResourceManagementStrategyTest.java @@ -19,6 +19,7 @@ package io.druid.indexing.overlord.scaling; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import com.google.common.collect.Maps; @@ -28,6 +29,7 @@ import com.metamx.emitter.service.ServiceEventBuilder; import io.druid.common.guava.DSuppliers; import io.druid.indexing.common.TestMergeTask; import io.druid.indexing.common.TaskStatus; +import io.druid.indexing.common.task.NoopTask; import io.druid.indexing.common.task.Task; import io.druid.indexing.overlord.RemoteTaskRunnerWorkItem; import io.druid.indexing.overlord.ZkWorker; @@ -63,7 +65,7 @@ public class SimpleResourceManagementStrategyTest public void setUp() throws Exception { autoScalingStrategy = EasyMock.createMock(AutoScalingStrategy.class); - workerSetupData = new AtomicReference( + workerSetupData = new AtomicReference<>( new WorkerSetupData( "0", 0, 2, null, null, null ) @@ -105,13 +107,13 @@ public class SimpleResourceManagementStrategyTest EasyMock.expect(autoScalingStrategy.ipToIdLookup(EasyMock.>anyObject())) .andReturn(Lists.newArrayList()); EasyMock.expect(autoScalingStrategy.provision()).andReturn( - new AutoScalingData(Lists.newArrayList(), Lists.newArrayList()) + new AutoScalingData(Lists.newArrayList("aNode")) ); EasyMock.replay(autoScalingStrategy); boolean provisionedSomething = simpleResourceManagementStrategy.doProvision( Arrays.asList( - new RemoteTaskRunnerWorkItem(testTask, null, null).withQueueInsertionTime(new DateTime()) + new RemoteTaskRunnerWorkItem(testTask.getId(), null, null).withQueueInsertionTime(new DateTime()) ), Arrays.asList( new TestZkWorker(testTask) @@ -133,13 +135,13 @@ public class SimpleResourceManagementStrategyTest EasyMock.expect(autoScalingStrategy.ipToIdLookup(EasyMock.>anyObject())) .andReturn(Lists.newArrayList()).times(2); EasyMock.expect(autoScalingStrategy.provision()).andReturn( - new AutoScalingData(Lists.newArrayList("fake"), Lists.newArrayList("faker")) + new AutoScalingData(Lists.newArrayList("fake")) ); EasyMock.replay(autoScalingStrategy); boolean provisionedSomething = simpleResourceManagementStrategy.doProvision( Arrays.asList( - new RemoteTaskRunnerWorkItem(testTask, null, null).withQueueInsertionTime(new DateTime()) + new RemoteTaskRunnerWorkItem(testTask.getId(), null, null).withQueueInsertionTime(new DateTime()) ), Arrays.asList( new TestZkWorker(testTask) @@ -155,7 +157,7 @@ public class SimpleResourceManagementStrategyTest provisionedSomething = simpleResourceManagementStrategy.doProvision( Arrays.asList( - new RemoteTaskRunnerWorkItem(testTask, null, null).withQueueInsertionTime(new DateTime()) + new RemoteTaskRunnerWorkItem(testTask.getId(), null, null).withQueueInsertionTime(new DateTime()) ), Arrays.asList( new TestZkWorker(testTask) @@ -190,13 +192,13 @@ public class SimpleResourceManagementStrategyTest EasyMock.expect(autoScalingStrategy.terminate(EasyMock.>anyObject())) .andReturn(null); EasyMock.expect(autoScalingStrategy.provision()).andReturn( - new AutoScalingData(Lists.newArrayList("fake"), Lists.newArrayList("faker")) + new AutoScalingData(Lists.newArrayList("fake")) ); EasyMock.replay(autoScalingStrategy); boolean provisionedSomething = simpleResourceManagementStrategy.doProvision( Arrays.asList( - new RemoteTaskRunnerWorkItem(testTask, null, null).withQueueInsertionTime(new DateTime()) + new RemoteTaskRunnerWorkItem(testTask.getId(), null, null).withQueueInsertionTime(new DateTime()) ), Arrays.asList( new TestZkWorker(testTask) @@ -214,7 +216,7 @@ public class SimpleResourceManagementStrategyTest provisionedSomething = simpleResourceManagementStrategy.doProvision( Arrays.asList( - new RemoteTaskRunnerWorkItem(testTask, null, null).withQueueInsertionTime(new DateTime()) + new RemoteTaskRunnerWorkItem(testTask.getId(), null, null).withQueueInsertionTime(new DateTime()) ), Arrays.asList( new TestZkWorker(testTask) @@ -242,13 +244,13 @@ public class SimpleResourceManagementStrategyTest EasyMock.expect(autoScalingStrategy.ipToIdLookup(EasyMock.>anyObject())) .andReturn(Lists.newArrayList()); EasyMock.expect(autoScalingStrategy.terminate(EasyMock.>anyObject())).andReturn( - new AutoScalingData(Lists.newArrayList(), Lists.newArrayList()) + new AutoScalingData(Lists.newArrayList()) ); EasyMock.replay(autoScalingStrategy); boolean terminatedSomething = simpleResourceManagementStrategy.doTerminate( Arrays.asList( - new RemoteTaskRunnerWorkItem(testTask, null, null).withQueueInsertionTime(new DateTime()) + new RemoteTaskRunnerWorkItem(testTask.getId(), null, null).withQueueInsertionTime(new DateTime()) ), Arrays.asList( new TestZkWorker(null) @@ -272,13 +274,13 @@ public class SimpleResourceManagementStrategyTest EasyMock.expect(autoScalingStrategy.ipToIdLookup(EasyMock.>anyObject())) .andReturn(Lists.newArrayList("ip")).times(2); EasyMock.expect(autoScalingStrategy.terminate(EasyMock.>anyObject())).andReturn( - new AutoScalingData(Lists.newArrayList("ip"), Lists.newArrayList("ip")) + new AutoScalingData(Lists.newArrayList("ip")) ); EasyMock.replay(autoScalingStrategy); boolean terminatedSomething = simpleResourceManagementStrategy.doTerminate( Arrays.asList( - new RemoteTaskRunnerWorkItem(testTask, null, null).withQueueInsertionTime(new DateTime()) + new RemoteTaskRunnerWorkItem(testTask.getId(), null, null).withQueueInsertionTime(new DateTime()) ), Arrays.asList( new TestZkWorker(null) @@ -293,7 +295,7 @@ public class SimpleResourceManagementStrategyTest terminatedSomething = simpleResourceManagementStrategy.doTerminate( Arrays.asList( - new RemoteTaskRunnerWorkItem(testTask, null, null).withQueueInsertionTime(new DateTime()) + new RemoteTaskRunnerWorkItem(testTask.getId(), null, null).withQueueInsertionTime(new DateTime()) ), Arrays.asList( new TestZkWorker(null) @@ -309,15 +311,174 @@ public class SimpleResourceManagementStrategyTest EasyMock.verify(autoScalingStrategy); } + @Test + public void testNoActionNeeded() throws Exception + { + EasyMock.reset(autoScalingStrategy); + EasyMock.expect(autoScalingStrategy.ipToIdLookup(EasyMock.>anyObject())) + .andReturn(Lists.newArrayList("ip")); + EasyMock.replay(autoScalingStrategy); + + boolean terminatedSomething = simpleResourceManagementStrategy.doTerminate( + Arrays.asList( + new RemoteTaskRunnerWorkItem(testTask.getId(), null, null).withQueueInsertionTime(new DateTime()) + ), + Arrays.asList( + new TestZkWorker(NoopTask.create()), + new TestZkWorker(NoopTask.create()) + ) + ); + + Assert.assertFalse(terminatedSomething); + EasyMock.verify(autoScalingStrategy); + + EasyMock.reset(autoScalingStrategy); + EasyMock.expect(autoScalingStrategy.ipToIdLookup(EasyMock.>anyObject())) + .andReturn(Lists.newArrayList("ip")); + EasyMock.replay(autoScalingStrategy); + + boolean provisionedSomething = simpleResourceManagementStrategy.doProvision( + Arrays.asList( + new RemoteTaskRunnerWorkItem(testTask.getId(), null, null).withQueueInsertionTime(new DateTime()) + ), + Arrays.asList( + new TestZkWorker(NoopTask.create()), + new TestZkWorker(NoopTask.create()) + ) + ); + + Assert.assertFalse(provisionedSomething); + EasyMock.verify(autoScalingStrategy); + } + + @Test + public void testMinVersionIncrease() throws Exception + { + // Don't terminate anything + EasyMock.reset(autoScalingStrategy); + EasyMock.expect(autoScalingStrategy.ipToIdLookup(EasyMock.>anyObject())) + .andReturn(Lists.newArrayList("ip")); + EasyMock.replay(autoScalingStrategy); + boolean terminatedSomething = simpleResourceManagementStrategy.doTerminate( + Arrays.asList(), + Arrays.asList( + new TestZkWorker(NoopTask.create(), "h1", "i1", "0"), + new TestZkWorker(NoopTask.create(), "h1", "i2", "0") + ) + ); + Assert.assertFalse(terminatedSomething); + EasyMock.verify(autoScalingStrategy); + + // Don't provision anything + EasyMock.reset(autoScalingStrategy); + EasyMock.expect(autoScalingStrategy.ipToIdLookup(EasyMock.>anyObject())) + .andReturn(Lists.newArrayList("ip")); + EasyMock.replay(autoScalingStrategy); + boolean provisionedSomething = simpleResourceManagementStrategy.doProvision( + Arrays.asList(), + Arrays.asList( + new TestZkWorker(NoopTask.create()), + new TestZkWorker(NoopTask.create()) + ) + ); + Assert.assertFalse(provisionedSomething); + EasyMock.verify(autoScalingStrategy); + + // Increase minVersion + workerSetupData.set(new WorkerSetupData("1", 0, 2, null, null, null)); + + // Provision two new workers + EasyMock.reset(autoScalingStrategy); + EasyMock.expect(autoScalingStrategy.ipToIdLookup(EasyMock.>anyObject())) + .andReturn(Lists.newArrayList("ip")); + EasyMock.expect(autoScalingStrategy.provision()).andReturn( + new AutoScalingData(Lists.newArrayList("h3")) + ); + EasyMock.expect(autoScalingStrategy.provision()).andReturn( + new AutoScalingData(Lists.newArrayList("h4")) + ); + EasyMock.replay(autoScalingStrategy); + provisionedSomething = simpleResourceManagementStrategy.doProvision( + Arrays.asList(), + Arrays.asList( + new TestZkWorker(NoopTask.create(), "h1", "i1", "0"), + new TestZkWorker(NoopTask.create(), "h2", "i2", "0") + ) + ); + Assert.assertTrue(provisionedSomething); + EasyMock.verify(autoScalingStrategy); + + // Terminate old workers + EasyMock.reset(autoScalingStrategy); + EasyMock.expect(autoScalingStrategy.ipToIdLookup(ImmutableList.of("i1", "i2", "i3", "i4"))).andReturn( + ImmutableList.of("h1", "h2", "h3", "h4") + ); + EasyMock.expect(autoScalingStrategy.terminate(ImmutableList.of("i1", "i2"))).andReturn( + new AutoScalingData(ImmutableList.of("h1", "h2")) + ); + EasyMock.replay(autoScalingStrategy); + terminatedSomething = simpleResourceManagementStrategy.doTerminate( + Arrays.asList(), + Arrays.asList( + new TestZkWorker(null, "h1", "i1", "0"), + new TestZkWorker(null, "h2", "i2", "0"), + new TestZkWorker(NoopTask.create(), "h3", "i3", "1"), + new TestZkWorker(NoopTask.create(), "h4", "i4", "1") + ) + ); + Assert.assertTrue(terminatedSomething); + EasyMock.verify(autoScalingStrategy); + } + + @Test + public void testNullWorkerSetupData() throws Exception + { + workerSetupData.set(null); + EasyMock.replay(autoScalingStrategy); + + boolean terminatedSomething = simpleResourceManagementStrategy.doTerminate( + Arrays.asList( + new RemoteTaskRunnerWorkItem(testTask.getId(), null, null).withQueueInsertionTime(new DateTime()) + ), + Arrays.asList( + new TestZkWorker(null) + ) + ); + + boolean provisionedSomething = simpleResourceManagementStrategy.doProvision( + Arrays.asList( + new RemoteTaskRunnerWorkItem(testTask.getId(), null, null).withQueueInsertionTime(new DateTime()) + ), + Arrays.asList( + new TestZkWorker(null) + ) + ); + + Assert.assertFalse(terminatedSomething); + Assert.assertFalse(provisionedSomething); + + EasyMock.verify(autoScalingStrategy); + } + private static class TestZkWorker extends ZkWorker { private final Task testTask; - private TestZkWorker( + public TestZkWorker( Task testTask ) { - super(new Worker("host", "ip", 3, "version"), null, new DefaultObjectMapper()); + this(testTask, "host", "ip", "0"); + } + + public TestZkWorker( + Task testTask, + String host, + String ip, + String version + ) + { + super(new Worker(host, ip, 3, version), null, new DefaultObjectMapper()); this.testTask = testTask; } diff --git a/indexing-service/src/test/java/io/druid/indexing/worker/WorkerTaskMonitorTest.java b/indexing-service/src/test/java/io/druid/indexing/worker/WorkerTaskMonitorTest.java index f80ca3cd8db..8d4bf32b870 100644 --- a/indexing-service/src/test/java/io/druid/indexing/worker/WorkerTaskMonitorTest.java +++ b/indexing-service/src/test/java/io/druid/indexing/worker/WorkerTaskMonitorTest.java @@ -122,7 +122,7 @@ public class WorkerTaskMonitorTest new ThreadPoolTaskRunner( new TaskToolboxFactory( new TaskConfig(tmp.toString(), null, null, 0), - null, null, null, null, null, null, null, null, null, new SegmentLoaderFactory( + null, null, null, null, null, null, null, null, null, null, null, new SegmentLoaderFactory( new OmniSegmentLoader( ImmutableMap.of( "local", @@ -209,4 +209,4 @@ public class WorkerTaskMonitorTest Assert.assertEquals(task.getId(), taskAnnouncement.getTaskStatus().getId()); Assert.assertEquals(TaskStatus.Status.RUNNING, taskAnnouncement.getTaskStatus().getStatusCode()); } -} \ No newline at end of file +} diff --git a/kafka-eight/pom.xml b/kafka-eight/pom.xml index c92048c47b3..79d053ad355 100644 --- a/kafka-eight/pom.xml +++ b/kafka-eight/pom.xml @@ -28,12 +28,10 @@ io.druid druid - 0.6.27-SNAPSHOT + 0.6.48-SNAPSHOT - - io.druid druid-api @@ -41,7 +39,7 @@ org.apache.kafka kafka_2.9.2 - 0.8.0-beta1 + 0.8.0 log4j @@ -54,61 +52,6 @@ - - - org.scala-lang - scala-library - 2.9.2 - - - net.sf.jopt-simple - jopt-simple - 3.2 - - - org.slf4j - slf4j-simple - 1.6.4 - - - org.scala-lang - scala-compiler - 2.9.2 - - - com.101tec - zkclient - 0.3 - - - org.xerial.snappy - snappy-java - 1.0.4.1 - - - com.yammer.metrics - metrics-core - 2.2.0 - - - com.yammer.metrics - metrics-annotation - 2.2.0 - - - org.easymock - easymock - 3.0 - test - - - org.scalatest - scalatest_2.9.2 - 1.8 - test - - - junit @@ -116,4 +59,20 @@ test + + + + + maven-jar-plugin + + + + true + true + + + + + + diff --git a/kafka-eight/src/main/java/io/druid/firehose/kafka/KafkaEightFirehoseFactory.java b/kafka-eight/src/main/java/io/druid/firehose/kafka/KafkaEightFirehoseFactory.java index c8bf876cc92..86c165c04d8 100644 --- a/kafka-eight/src/main/java/io/druid/firehose/kafka/KafkaEightFirehoseFactory.java +++ b/kafka-eight/src/main/java/io/druid/firehose/kafka/KafkaEightFirehoseFactory.java @@ -107,7 +107,15 @@ public class KafkaEightFirehoseFactory implements FirehoseFactory return null; } - return parser.parse(ByteBuffer.wrap(message)); + try { + return parser.parse(ByteBuffer.wrap(message)); + } + catch (Exception e) { + throw new FormattedException.Builder() + .withErrorCode(FormattedException.ErrorCode.UNPARSABLE_ROW) + .withMessage(String.format("Error parsing[%s], got [%s]", ByteBuffer.wrap(message), e.toString())) + .build(); + } } @Override diff --git a/kafka-seven/pom.xml b/kafka-seven/pom.xml index cd5ef152864..e2922e97e3c 100644 --- a/kafka-seven/pom.xml +++ b/kafka-seven/pom.xml @@ -28,7 +28,7 @@ io.druid druid - 0.6.27-SNAPSHOT + 0.6.48-SNAPSHOT @@ -55,4 +55,19 @@ test + + + + maven-jar-plugin + + + + true + true + + + + + + diff --git a/kafka-seven/src/main/java/io/druid/firehose/kafka/KafkaSevenFirehoseFactory.java b/kafka-seven/src/main/java/io/druid/firehose/kafka/KafkaSevenFirehoseFactory.java index 8f200d1cdbc..c227b323877 100644 --- a/kafka-seven/src/main/java/io/druid/firehose/kafka/KafkaSevenFirehoseFactory.java +++ b/kafka-seven/src/main/java/io/druid/firehose/kafka/KafkaSevenFirehoseFactory.java @@ -120,7 +120,15 @@ public class KafkaSevenFirehoseFactory implements FirehoseFactory public InputRow parseMessage(Message message) throws FormattedException { - return parser.parse(message.payload()); + try { + return parser.parse(message.payload()); + } + catch (Exception e) { + throw new FormattedException.Builder() + .withErrorCode(FormattedException.ErrorCode.UNPARSABLE_ROW) + .withMessage(String.format("Error parsing[%s], got [%s]", message.payload(), e.toString())) + .build(); + } } @Override diff --git a/pom.xml b/pom.xml index e0dff689ffa..3d082a9cf93 100644 --- a/pom.xml +++ b/pom.xml @@ -23,7 +23,7 @@ io.druid druid pom - 0.6.27-SNAPSHOT + 0.6.48-SNAPSHOT druid druid @@ -39,8 +39,9 @@ UTF-8 - 0.25.0 - 2.1.0-incubating + 0.25.1 + 2.3.0 + 0.1.7 @@ -65,14 +66,14 @@ io.druid druid-api - 0.1.3 + ${druid.api.version} com.metamx emitter - 0.2.6 + 0.2.7 com.metamx @@ -288,6 +289,16 @@ com.sun.jersey.contribs jersey-guice 1.17.1 + + + com.google.inject + guice + + + com.google.inject.extensions + guice-servlet + + com.sun.jersey @@ -408,7 +419,7 @@ junit junit - 4.8.1 + 4.11 test diff --git a/processing/pom.xml b/processing/pom.xml index 4c40c9f5a50..7b5cd091eb1 100644 --- a/processing/pom.xml +++ b/processing/pom.xml @@ -28,7 +28,7 @@ io.druid druid - 0.6.27-SNAPSHOT + 0.6.48-SNAPSHOT @@ -37,10 +37,6 @@ druid-common ${project.parent.version} - - com.metamx - java-util - com.metamx bytebuffer-collections @@ -49,8 +45,6 @@ com.metamx emitter - - com.ning compress-lzf @@ -63,34 +57,6 @@ it.uniroma3.mat extendedset - - com.google.guava - guava - - - com.google.inject - guice - - - com.fasterxml.jackson.core - jackson-core - - - com.fasterxml.jackson.core - jackson-databind - - - javax.inject - javax.inject - - - joda-time - joda-time - - - log4j - log4j - org.slf4j slf4j-log4j12 @@ -107,6 +73,11 @@ com.ibm.icu icu4j + + org.mozilla + rhino + 1.7R4 + @@ -133,6 +104,14 @@ + + + + true + true + + + diff --git a/processing/src/main/java/io/druid/query/ChainedExecutionQueryRunner.java b/processing/src/main/java/io/druid/query/ChainedExecutionQueryRunner.java index 8710b627ece..316c8d8675e 100644 --- a/processing/src/main/java/io/druid/query/ChainedExecutionQueryRunner.java +++ b/processing/src/main/java/io/druid/query/ChainedExecutionQueryRunner.java @@ -84,6 +84,11 @@ public class ChainedExecutionQueryRunner implements QueryRunner { final int priority = Integer.parseInt(query.getContextValue("priority", "0")); + if (Iterables.isEmpty(queryables)) { + log.warn("No queryables found."); + return Sequences.empty(); + } + return new BaseSequence>( new BaseSequence.IteratorMaker>() { diff --git a/processing/src/main/java/io/druid/query/Query.java b/processing/src/main/java/io/druid/query/Query.java index fdf84266d96..39da69c2b40 100644 --- a/processing/src/main/java/io/druid/query/Query.java +++ b/processing/src/main/java/io/druid/query/Query.java @@ -29,6 +29,7 @@ import io.druid.query.select.SelectQuery; import io.druid.query.spec.QuerySegmentSpec; import io.druid.query.timeboundary.TimeBoundaryQuery; import io.druid.query.timeseries.TimeseriesQuery; +import io.druid.query.topn.TopNQuery; import org.joda.time.Duration; import org.joda.time.Interval; @@ -42,7 +43,8 @@ import java.util.Map; @JsonSubTypes.Type(name = Query.TIME_BOUNDARY, value = TimeBoundaryQuery.class), @JsonSubTypes.Type(name = Query.GROUP_BY, value = GroupByQuery.class), @JsonSubTypes.Type(name = Query.SEGMENT_METADATA, value = SegmentMetadataQuery.class), - @JsonSubTypes.Type(name = Query.SELECT, value = SelectQuery.class) + @JsonSubTypes.Type(name = Query.SELECT, value = SelectQuery.class), + @JsonSubTypes.Type(name = Query.TOPN, value = TopNQuery.class) }) public interface Query { @@ -52,6 +54,7 @@ public interface Query public static final String GROUP_BY = "groupBy"; public static final String SEGMENT_METADATA = "segmentMetadata"; public static final String SELECT = "select"; + public static final String TOPN = "topN"; public String getDataSource(); diff --git a/processing/src/main/java/io/druid/query/aggregation/JavaScriptAggregatorFactory.java b/processing/src/main/java/io/druid/query/aggregation/JavaScriptAggregatorFactory.java index ff55c9cee7f..927ab89676f 100644 --- a/processing/src/main/java/io/druid/query/aggregation/JavaScriptAggregatorFactory.java +++ b/processing/src/main/java/io/druid/query/aggregation/JavaScriptAggregatorFactory.java @@ -265,9 +265,11 @@ public class JavaScriptAggregatorFactory implements AggregatorFactory final Object[] args = new Object[size + 1]; args[0] = current; - int i = 0; - while (i < size) { - args[i + 1] = selectorList[i++].get(); + for (int i = 0 ; i < size ; i++) { + final ObjectColumnSelector selector = selectorList[i]; + if (selector != null) { + args[i + 1] = selector.get(); + } } final Object res = fnAggregate.call(cx, scope, scope, args); diff --git a/processing/src/main/java/io/druid/query/extraction/JavascriptDimExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/JavascriptDimExtractionFn.java index 1878df5479e..f552ef4e9cd 100644 --- a/processing/src/main/java/io/druid/query/extraction/JavascriptDimExtractionFn.java +++ b/processing/src/main/java/io/druid/query/extraction/JavascriptDimExtractionFn.java @@ -51,7 +51,8 @@ public class JavascriptDimExtractionFn implements DimExtractionFn cx = contextFactory.enterContext(); } - return Context.toString(fn.call(cx, scope, scope, new String[]{input})); + final Object res = fn.call(cx, scope, scope, new String[]{input}); + return res != null ? Context.toString(res) : null; } }; } diff --git a/processing/src/main/java/io/druid/query/topn/AggregateTopNMetricFirstAlgorithm.java b/processing/src/main/java/io/druid/query/topn/AggregateTopNMetricFirstAlgorithm.java new file mode 100644 index 00000000000..ea878736bbd --- /dev/null +++ b/processing/src/main/java/io/druid/query/topn/AggregateTopNMetricFirstAlgorithm.java @@ -0,0 +1,174 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012, 2013 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package io.druid.query.topn; + +import com.google.common.collect.Lists; +import com.metamx.common.ISE; +import io.druid.collections.StupidPool; +import io.druid.query.aggregation.AggregatorFactory; +import io.druid.query.aggregation.PostAggregator; +import io.druid.segment.Capabilities; +import io.druid.segment.Cursor; +import io.druid.segment.DimensionSelector; + +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.Comparator; +import java.util.Iterator; +import java.util.List; + +/** + */ +public class AggregateTopNMetricFirstAlgorithm implements TopNAlgorithm +{ + private final Capabilities capabilities; + private final TopNQuery query; + private final Comparator comparator; + private final StupidPool bufferPool; + + public AggregateTopNMetricFirstAlgorithm( + Capabilities capabilities, + TopNQuery query, + StupidPool bufferPool + ) + { + this.capabilities = capabilities; + this.query = query; + this.comparator = query.getTopNMetricSpec() + .getComparator(query.getAggregatorSpecs(), query.getPostAggregatorSpecs()); + this.bufferPool = bufferPool; + } + + + @Override + public TopNParams makeInitParams( + DimensionSelector dimSelector, Cursor cursor + ) + { + return new TopNParams(dimSelector, cursor, dimSelector.getValueCardinality(), Integer.MAX_VALUE); + } + + @Override + public TopNResultBuilder makeResultBuilder(TopNParams params) + { + return query.getTopNMetricSpec().getResultBuilder( + params.getCursor().getTime(), query.getDimensionSpec(), query.getThreshold(), comparator + ); + } + + @Override + public void run( + TopNParams params, TopNResultBuilder resultBuilder, int[] ints + ) + { + final TopNResultBuilder singleMetricResultBuilder = makeResultBuilder(params); + final String metric = ((NumericTopNMetricSpec) query.getTopNMetricSpec()).getMetric(); + + // Find either the aggregator or post aggregator to do the topN over + List condensedAggs = Lists.newArrayList(); + for (AggregatorFactory aggregatorSpec : query.getAggregatorSpecs()) { + if (aggregatorSpec.getName().equalsIgnoreCase(metric)) { + condensedAggs.add(aggregatorSpec); + break; + } + } + List condensedPostAggs = Lists.newArrayList(); + if (condensedAggs.isEmpty()) { + for (PostAggregator postAggregator : query.getPostAggregatorSpecs()) { + if (postAggregator.getName().equalsIgnoreCase(metric)) { + condensedPostAggs.add(postAggregator); + + // Add all dependent metrics + for (AggregatorFactory aggregatorSpec : query.getAggregatorSpecs()) { + if (postAggregator.getDependentFields().contains(aggregatorSpec.getName())) { + condensedAggs.add(aggregatorSpec); + } + } + break; + } + } + } + if (condensedAggs.isEmpty() && condensedPostAggs.isEmpty()) { + throw new ISE("WTF! Can't find the metric to do topN over?"); + } + + // Run topN for only a single metric + TopNQuery singleMetricQuery = new TopNQueryBuilder().copy(query) + .aggregators(condensedAggs) + .postAggregators(condensedPostAggs) + .build(); + + PooledTopNAlgorithm singleMetricAlgo = new PooledTopNAlgorithm(capabilities, singleMetricQuery, bufferPool); + PooledTopNAlgorithm.PooledTopNParams singleMetricParam = null; + int[] dimValSelector = null; + try { + singleMetricParam = singleMetricAlgo.makeInitParams(params.getDimSelector(), params.getCursor()); + singleMetricAlgo.run( + singleMetricParam, + singleMetricResultBuilder, + null + ); + + // Get only the topN dimension values + dimValSelector = getDimValSelectorForTopNMetric(singleMetricParam, singleMetricResultBuilder); + } + finally { + if (singleMetricParam != null) { + singleMetricAlgo.cleanup(singleMetricParam); + } + } + + PooledTopNAlgorithm allMetricAlgo = new PooledTopNAlgorithm(capabilities, query, bufferPool); + PooledTopNAlgorithm.PooledTopNParams allMetricsParam = null; + try { + // Run topN for all metrics for top N dimension values + allMetricsParam = allMetricAlgo.makeInitParams(params.getDimSelector(), params.getCursor()); + allMetricAlgo.run( + allMetricsParam, + resultBuilder, + dimValSelector + ); + } + finally { + if (allMetricsParam != null) { + allMetricAlgo.cleanup(allMetricsParam); + } + } + } + + @Override + public void cleanup(TopNParams params) + { + } + + private int[] getDimValSelectorForTopNMetric(TopNParams params, TopNResultBuilder resultBuilder) + { + int[] dimValSelector = new int[params.getDimSelector().getValueCardinality()]; + Arrays.fill(dimValSelector, SKIP_POSITION_VALUE); + + Iterator dimValIter = resultBuilder.getTopNIterator(); + while (dimValIter.hasNext()) { + int dimValIndex = (Integer) dimValIter.next().getDimValIndex(); + dimValSelector[dimValIndex] = INIT_POSITION_VALUE; + } + + return dimValSelector; + } +} diff --git a/processing/src/main/java/io/druid/query/topn/BaseTopNAlgorithm.java b/processing/src/main/java/io/druid/query/topn/BaseTopNAlgorithm.java new file mode 100644 index 00000000000..47093cea8a5 --- /dev/null +++ b/processing/src/main/java/io/druid/query/topn/BaseTopNAlgorithm.java @@ -0,0 +1,234 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012, 2013 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package io.druid.query.topn; + +import com.metamx.common.Pair; +import io.druid.query.aggregation.Aggregator; +import io.druid.query.aggregation.AggregatorFactory; +import io.druid.query.aggregation.BufferAggregator; +import io.druid.segment.Capabilities; +import io.druid.segment.Cursor; +import io.druid.segment.DimensionSelector; + +import java.util.Arrays; +import java.util.List; + +/** + */ +public abstract class BaseTopNAlgorithm + implements TopNAlgorithm +{ + protected static Aggregator[] makeAggregators(Cursor cursor, List aggregatorSpecs) + { + Aggregator[] aggregators = new Aggregator[aggregatorSpecs.size()]; + int aggregatorIndex = 0; + for (AggregatorFactory spec : aggregatorSpecs) { + aggregators[aggregatorIndex] = spec.factorize(cursor); + ++aggregatorIndex; + } + return aggregators; + } + + protected static BufferAggregator[] makeBufferAggregators(Cursor cursor, List aggregatorSpecs) + { + BufferAggregator[] aggregators = new BufferAggregator[aggregatorSpecs.size()]; + int aggregatorIndex = 0; + for (AggregatorFactory spec : aggregatorSpecs) { + aggregators[aggregatorIndex] = spec.factorizeBuffered(cursor); + ++aggregatorIndex; + } + return aggregators; + } + + private final Capabilities capabilities; + + protected BaseTopNAlgorithm(Capabilities capabilities) + { + this.capabilities = capabilities; + } + + @Override + public void run( + Parameters params, + TopNResultBuilder resultBuilder, + DimValSelector dimValSelector + ) + { + boolean hasDimValSelector = (dimValSelector != null); + + final int cardinality = params.getCardinality(); + int numProcessed = 0; + while (numProcessed < cardinality) { + final int numToProcess = Math.min(params.getNumValuesPerPass(), cardinality - numProcessed); + + params.getCursor().reset(); + + DimValSelector theDimValSelector; + if (!hasDimValSelector) { + theDimValSelector = makeDimValSelector(params, numProcessed, numToProcess); + } else { + theDimValSelector = updateDimValSelector(dimValSelector, numProcessed, numToProcess); + } + + DimValAggregateStore aggregatesStore = makeDimValAggregateStore(params); + + scanAndAggregate(params, theDimValSelector, aggregatesStore, numProcessed); + + updateResults(params, theDimValSelector, aggregatesStore, resultBuilder); + + closeAggregators(aggregatesStore); + + numProcessed += numToProcess; + } + } + + protected abstract DimValSelector makeDimValSelector(Parameters params, int numProcessed, int numToProcess); + + protected abstract DimValSelector updateDimValSelector( + DimValSelector dimValSelector, + int numProcessed, + int numToProcess + ); + + protected abstract DimValAggregateStore makeDimValAggregateStore(Parameters params); + + protected abstract void scanAndAggregate( + Parameters params, + DimValSelector dimValSelector, + DimValAggregateStore dimValAggregateStore, + int numProcessed + ); + + protected abstract void updateResults( + Parameters params, + DimValSelector dimValSelector, + DimValAggregateStore dimValAggregateStore, + TopNResultBuilder resultBuilder + ); + + protected abstract void closeAggregators( + DimValAggregateStore dimValAggregateStore + ); + + protected class AggregatorArrayProvider extends BaseArrayProvider + { + Aggregator[][] expansionAggs; + int cardinality; + + public AggregatorArrayProvider(DimensionSelector dimSelector, TopNQuery query, int cardinality) + { + super(dimSelector, query, capabilities); + + this.expansionAggs = new Aggregator[cardinality][]; + this.cardinality = cardinality; + } + + @Override + public Aggregator[][] build() + { + Pair startEnd = computeStartEnd(cardinality); + + Arrays.fill(expansionAggs, 0, startEnd.lhs, EMPTY_ARRAY); + Arrays.fill(expansionAggs, startEnd.lhs, startEnd.rhs, null); + Arrays.fill(expansionAggs, startEnd.rhs, expansionAggs.length, EMPTY_ARRAY); + + return expansionAggs; + } + } + + protected static abstract class BaseArrayProvider implements TopNMetricSpecBuilder + { + private volatile String previousStop; + private volatile boolean ignoreAfterThreshold; + private volatile int ignoreFirstN; + private volatile int keepOnlyN; + + private final DimensionSelector dimSelector; + private final TopNQuery query; + private final Capabilities capabilities; + + public BaseArrayProvider( + DimensionSelector dimSelector, + TopNQuery query, + Capabilities capabilities + ) + { + this.dimSelector = dimSelector; + this.query = query; + this.capabilities = capabilities; + + previousStop = null; + ignoreAfterThreshold = false; + ignoreFirstN = 0; + keepOnlyN = dimSelector.getValueCardinality(); + } + + @Override + public void skipTo(String previousStop) + { + if (capabilities.dimensionValuesSorted()) { + this.previousStop = previousStop; + } + } + + @Override + public void ignoreAfterThreshold() + { + ignoreAfterThreshold = true; + } + + @Override + public void ignoreFirstN(int n) + { + ignoreFirstN = n; + } + + @Override + public void keepOnlyN(int n) + { + keepOnlyN = n; + } + + protected Pair computeStartEnd(int cardinality) + { + int startIndex = ignoreFirstN; + + if (previousStop != null) { + int lookupId = dimSelector.lookupId(previousStop) + 1; + if (lookupId < 0) { + lookupId *= -1; + } + if (lookupId > ignoreFirstN + keepOnlyN) { + startIndex = ignoreFirstN + keepOnlyN; + } else { + startIndex = Math.max(lookupId, startIndex); + } + } + + int endIndex = Math.min(ignoreFirstN + keepOnlyN, cardinality); + + if (ignoreAfterThreshold && query.getDimensionsFilter() == null) { + endIndex = Math.min(endIndex, startIndex + query.getThreshold()); + } + + return Pair.of(startIndex, endIndex); + } + } +} diff --git a/processing/src/main/java/io/druid/query/topn/BySegmentTopNResultValue.java b/processing/src/main/java/io/druid/query/topn/BySegmentTopNResultValue.java new file mode 100644 index 00000000000..56849d6fd47 --- /dev/null +++ b/processing/src/main/java/io/druid/query/topn/BySegmentTopNResultValue.java @@ -0,0 +1,90 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012, 2013 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package io.druid.query.topn; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonValue; +import io.druid.query.BySegmentResultValue; +import io.druid.query.Result; + +import java.util.List; + +/** + */ +public class BySegmentTopNResultValue extends TopNResultValue implements BySegmentResultValue +{ + private final List> results; + private final String segmentId; + private final String intervalString; + + @JsonCreator + public BySegmentTopNResultValue( + @JsonProperty("results") List> results, + @JsonProperty("segment") String segmentId, + @JsonProperty("interval") String intervalString + ) + { + super(null); + + this.results = results; + this.segmentId = segmentId; + this.intervalString = intervalString; + } + + @Override + @JsonValue(false) + public List getValue() + { + throw new UnsupportedOperationException(); + } + + + @Override + @JsonProperty("results") + public List> getResults() + { + return results; + } + + @Override + @JsonProperty("segment") + public String getSegmentId() + { + return segmentId; + } + + @Override + @JsonProperty("interval") + public String getIntervalString() + { + return intervalString; + } + + @Override + public String toString() + { + return "BySegmentTopNResultValue{" + + "results=" + results + + ", segmentId='" + segmentId + '\'' + + ", intervalString='" + intervalString + '\'' + + '}'; + } +} diff --git a/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java b/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java new file mode 100644 index 00000000000..95ce5312e9d --- /dev/null +++ b/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java @@ -0,0 +1,169 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012, 2013 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package io.druid.query.topn; + +import com.google.common.collect.Maps; +import io.druid.query.aggregation.Aggregator; +import io.druid.segment.Capabilities; +import io.druid.segment.Cursor; +import io.druid.segment.DimensionSelector; +import io.druid.segment.data.IndexedInts; + +import java.util.Comparator; +import java.util.Map; + +/** + */ +public class DimExtractionTopNAlgorithm extends BaseTopNAlgorithm, TopNParams> +{ + private final TopNQuery query; + private final Comparator comparator; + + public DimExtractionTopNAlgorithm( + Capabilities capabilities, + TopNQuery query + ) + { + super(capabilities); + + this.query = query; + this.comparator = query.getTopNMetricSpec() + .getComparator(query.getAggregatorSpecs(), query.getPostAggregatorSpecs()); + } + + @Override + public TopNParams makeInitParams( + final DimensionSelector dimSelector, final Cursor cursor + ) + { + return new TopNParams(dimSelector, cursor, dimSelector.getValueCardinality(), Integer.MAX_VALUE); + } + + @Override + public TopNResultBuilder makeResultBuilder(TopNParams params) + { + return query.getTopNMetricSpec().getResultBuilder( + params.getCursor().getTime(), query.getDimensionSpec(), query.getThreshold(), comparator + ); + } + + @Override + protected Aggregator[][] makeDimValSelector(TopNParams params, int numProcessed, int numToProcess) + { + return query.getTopNMetricSpec().configureOptimizer( + new AggregatorArrayProvider(params.getDimSelector(), query, params.getCardinality()) + ).build(); + } + + @Override + protected Aggregator[][] updateDimValSelector(Aggregator[][] aggregators, int numProcessed, int numToProcess) + { + return aggregators; + } + + @Override + protected Map makeDimValAggregateStore(TopNParams params) + { + return Maps.newHashMap(); + } + + @Override + public void scanAndAggregate( + TopNParams params, + Aggregator[][] rowSelector, + Map aggregatesStore, + int numProcessed + ) + { + final Cursor cursor = params.getCursor(); + final DimensionSelector dimSelector = params.getDimSelector(); + + while (!cursor.isDone()) { + final IndexedInts dimValues = dimSelector.getRow(); + + for (int i = 0; i < dimValues.size(); ++i) { + final int dimIndex = dimValues.get(i); + + Aggregator[] theAggregators = rowSelector[dimIndex]; + if (theAggregators == null) { + String key = query.getDimensionSpec().getDimExtractionFn().apply(dimSelector.lookupName(dimIndex)); + if (key == null) { + rowSelector[dimIndex] = EMPTY_ARRAY; + continue; + } + theAggregators = aggregatesStore.get(key); + if (theAggregators == null) { + theAggregators = makeAggregators(cursor, query.getAggregatorSpecs()); + aggregatesStore.put(key, theAggregators); + } + rowSelector[dimIndex] = theAggregators; + } + + for (Aggregator aggregator : theAggregators) { + aggregator.aggregate(); + } + } + + cursor.advance(); + } + } + + @Override + protected void updateResults( + TopNParams params, + Aggregator[][] rowSelector, + Map aggregatesStore, + TopNResultBuilder resultBuilder + ) + { + for (Map.Entry entry : aggregatesStore.entrySet()) { + Aggregator[] aggs = entry.getValue(); + if (aggs != null && aggs.length > 0) { + Object[] vals = new Object[aggs.length]; + for (int i = 0; i < aggs.length; i++) { + vals[i] = aggs[i].get(); + } + + resultBuilder.addEntry( + entry.getKey(), + entry.getKey(), + vals, + query.getAggregatorSpecs(), + query.getPostAggregatorSpecs() + ); + } + } + } + + @Override + protected void closeAggregators(Map stringMap) + { + for (Aggregator[] aggregators : stringMap.values()) { + for (Aggregator agg : aggregators) { + agg.close(); + } + } + } + + @Override + public void cleanup(TopNParams params) + { + } +} diff --git a/processing/src/main/java/io/druid/query/topn/DimValHolder.java b/processing/src/main/java/io/druid/query/topn/DimValHolder.java new file mode 100644 index 00000000000..f17c77685c0 --- /dev/null +++ b/processing/src/main/java/io/druid/query/topn/DimValHolder.java @@ -0,0 +1,110 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012, 2013 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package io.druid.query.topn; + +import java.util.Map; + +/** + */ +public class DimValHolder +{ + private final Object topNMetricVal; + private final String dimName; + private final Object dimValIndex; + private final Map metricValues; + + public DimValHolder( + Object topNMetricVal, + String dimName, + Object dimValIndex, + Map metricValues + ) + { + this.topNMetricVal = topNMetricVal; + this.dimName = dimName; + this.dimValIndex = dimValIndex; + this.metricValues = metricValues; + } + + public Object getTopNMetricVal() + { + return topNMetricVal; + } + + public String getDimName() + { + return dimName; + } + + public Object getDimValIndex() + { + return dimValIndex; + } + + public Map getMetricValues() + { + return metricValues; + } + + public static class Builder + { + private Object topNMetricVal; + private String dirName; + private Object dimValIndex; + private Map metricValues; + + public Builder() + { + topNMetricVal = null; + dirName = null; + dimValIndex = null; + metricValues = null; + } + + public Builder withTopNMetricVal(Object topNMetricVal) + { + this.topNMetricVal = topNMetricVal; + return this; + } + + public Builder withDirName(String dirName) + { + this.dirName = dirName; + return this; + } + + public Builder withDimValIndex(Object dimValIndex) + { + this.dimValIndex = dimValIndex; + return this; + } + + public Builder withMetricValues(Map metricValues) + { + this.metricValues = metricValues; + return this; + } + + public DimValHolder build() + { + return new DimValHolder(topNMetricVal, dirName, dimValIndex, metricValues); + } + } +} diff --git a/processing/src/main/java/io/druid/query/topn/DimensionAndMetricValueExtractor.java b/processing/src/main/java/io/druid/query/topn/DimensionAndMetricValueExtractor.java new file mode 100644 index 00000000000..500074cebbd --- /dev/null +++ b/processing/src/main/java/io/druid/query/topn/DimensionAndMetricValueExtractor.java @@ -0,0 +1,83 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012, 2013 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package io.druid.query.topn; + +import com.fasterxml.jackson.annotation.JsonCreator; +import io.druid.query.MetricValueExtractor; + +import java.util.Map; + +/** + */ +public class DimensionAndMetricValueExtractor extends MetricValueExtractor +{ + private final Map value; + + @JsonCreator + public DimensionAndMetricValueExtractor(Map value) + { + super(value); + + this.value = value; + } + + public String getStringDimensionValue(String dimension) + { + return (String) value.get(dimension); + } + + public Object getDimensionValue(String dimension) + { + return value.get(dimension); + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + + DimensionAndMetricValueExtractor that = (DimensionAndMetricValueExtractor) o; + + if (value != null ? !value.equals(that.value) : that.value != null) { + return false; + } + + return true; + } + + @Override + public int hashCode() + { + return value != null ? value.hashCode() : 0; + } + + @Override + public String toString() + { + return "DimensionAndMetricValueExtractor{" + + "value=" + value + + '}'; + } +} diff --git a/processing/src/main/java/io/druid/query/topn/InvertedTopNMetricSpec.java b/processing/src/main/java/io/druid/query/topn/InvertedTopNMetricSpec.java new file mode 100644 index 00000000000..2e458295387 --- /dev/null +++ b/processing/src/main/java/io/druid/query/topn/InvertedTopNMetricSpec.java @@ -0,0 +1,104 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012, 2013 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package io.druid.query.topn; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.metamx.common.guava.Comparators; +import io.druid.query.aggregation.AggregatorFactory; +import io.druid.query.aggregation.PostAggregator; +import io.druid.query.dimension.DimensionSpec; +import org.joda.time.DateTime; + +import java.nio.ByteBuffer; +import java.util.Comparator; +import java.util.List; + +/** + */ +public class InvertedTopNMetricSpec implements TopNMetricSpec +{ + private static final byte CACHE_TYPE_ID = 0x3; + + private final TopNMetricSpec delegate; + + @JsonCreator + public InvertedTopNMetricSpec( + @JsonProperty("metric") TopNMetricSpec delegate + ) + { + this.delegate = delegate; + } + + @Override + public void verifyPreconditions( + List aggregatorSpecs, + List postAggregatorSpecs + ) + { + delegate.verifyPreconditions(aggregatorSpecs, postAggregatorSpecs); + } + + @JsonProperty("metric") + public TopNMetricSpec getDelegate() + { + return delegate; + } + + @Override + public Comparator getComparator( + List aggregatorSpecs, + List postAggregatorSpecs + ) + { + return Comparators.inverse(delegate.getComparator(aggregatorSpecs, postAggregatorSpecs)); + } + + @Override + public TopNResultBuilder getResultBuilder( + DateTime timestamp, + DimensionSpec dimSpec, + int threshold, + Comparator comparator + ) + { + return delegate.getResultBuilder(timestamp, dimSpec, threshold, comparator); + } + + @Override + public byte[] getCacheKey() + { + final byte[] cacheKey = delegate.getCacheKey(); + + return ByteBuffer.allocate(1 + cacheKey.length).put(CACHE_TYPE_ID).put(cacheKey).array(); + } + + @Override + public TopNMetricSpecBuilder configureOptimizer(TopNMetricSpecBuilder builder) + { + return delegate.configureOptimizer(builder); + } + + @Override + public void initTopNAlgorithmSelector(TopNAlgorithmSelector selector) + { + delegate.initTopNAlgorithmSelector(selector); + } +} diff --git a/processing/src/main/java/io/druid/query/topn/LegacyTopNMetricSpec.java b/processing/src/main/java/io/druid/query/topn/LegacyTopNMetricSpec.java new file mode 100644 index 00000000000..f185d799ea1 --- /dev/null +++ b/processing/src/main/java/io/druid/query/topn/LegacyTopNMetricSpec.java @@ -0,0 +1,51 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012, 2013 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package io.druid.query.topn; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.metamx.common.IAE; + +import java.util.Map; + +/** + */ +public class LegacyTopNMetricSpec extends NumericTopNMetricSpec +{ + private static final String convertValue(Object metric) + { + final String retVal; + + if (metric instanceof String) { + retVal = (String) metric; + } else if (metric instanceof Map) { + retVal = (String) ((Map) metric).get("metric"); + } else { + throw new IAE("Unknown type[%s] for metric[%s]", metric.getClass(), metric); + } + + return retVal; + } + + @JsonCreator + public LegacyTopNMetricSpec(Object metric) + { + super(convertValue(metric)); + } +} diff --git a/processing/src/main/java/io/druid/query/topn/LexicographicTopNMetricSpec.java b/processing/src/main/java/io/druid/query/topn/LexicographicTopNMetricSpec.java new file mode 100644 index 00000000000..dca6d3ed651 --- /dev/null +++ b/processing/src/main/java/io/druid/query/topn/LexicographicTopNMetricSpec.java @@ -0,0 +1,121 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012, 2013 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package io.druid.query.topn; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Charsets; +import com.google.common.primitives.UnsignedBytes; +import io.druid.query.aggregation.AggregatorFactory; +import io.druid.query.aggregation.PostAggregator; +import io.druid.query.dimension.DimensionSpec; +import org.joda.time.DateTime; + +import java.nio.ByteBuffer; +import java.util.Comparator; +import java.util.List; + +/** + */ +public class LexicographicTopNMetricSpec implements TopNMetricSpec +{ + private static final byte CACHE_TYPE_ID = 0x1; + + private static Comparator comparator = new Comparator() + { + @Override + public int compare(String s, String s2) + { + return UnsignedBytes.lexicographicalComparator().compare(s.getBytes(Charsets.UTF_8), s2.getBytes(Charsets.UTF_8)); + } + }; + + private final String previousStop; + + @JsonCreator + public LexicographicTopNMetricSpec( + @JsonProperty("previousStop") String previousStop + ) + { + this.previousStop = (previousStop == null) ? "" : previousStop; + } + + @Override + public void verifyPreconditions(List aggregatorSpecs, List postAggregatorSpecs) + { + } + + @JsonProperty + public String getPreviousStop() + { + return previousStop; + } + + + @Override + public Comparator getComparator(List aggregatorSpecs, List postAggregatorSpecs) + { + return comparator; + } + + @Override + public TopNResultBuilder getResultBuilder( + DateTime timestamp, + DimensionSpec dimSpec, + int threshold, + Comparator comparator + ) + { + return new TopNLexicographicResultBuilder(timestamp, dimSpec, threshold, previousStop, comparator); + } + + @Override + public byte[] getCacheKey() + { + byte[] previousStopBytes = previousStop.getBytes(Charsets.UTF_8); + + return ByteBuffer.allocate(1 + previousStopBytes.length) + .put(CACHE_TYPE_ID) + .put(previousStopBytes) + .array(); + } + + @Override + public TopNMetricSpecBuilder configureOptimizer(TopNMetricSpecBuilder builder) + { + builder.skipTo(previousStop); + builder.ignoreAfterThreshold(); + return builder; + } + + @Override + public void initTopNAlgorithmSelector(TopNAlgorithmSelector selector) + { + selector.setAggregateAllMetrics(true); + } + + @Override + public String toString() + { + return "LexicographicTopNMetricSpec{" + + "previousStop='" + previousStop + '\'' + + '}'; + } +} diff --git a/processing/src/main/java/io/druid/query/topn/NumericTopNMetricSpec.java b/processing/src/main/java/io/druid/query/topn/NumericTopNMetricSpec.java new file mode 100644 index 00000000000..4f0b6ebdc22 --- /dev/null +++ b/processing/src/main/java/io/druid/query/topn/NumericTopNMetricSpec.java @@ -0,0 +1,160 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012, 2013 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package io.druid.query.topn; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Charsets; +import com.google.common.base.Preconditions; +import com.google.common.base.Predicate; +import com.google.common.collect.Iterables; +import io.druid.query.aggregation.AggregatorFactory; +import io.druid.query.aggregation.PostAggregator; +import io.druid.query.dimension.DimensionSpec; +import org.joda.time.DateTime; + +import java.nio.ByteBuffer; +import java.util.Comparator; +import java.util.List; + +/** + */ +public class NumericTopNMetricSpec implements TopNMetricSpec +{ + private static final byte CACHE_TYPE_ID = 0x0; + + private final String metric; + + @JsonCreator + public NumericTopNMetricSpec( + @JsonProperty("metric") String metric + ) + { + this.metric = metric; + } + + @Override + public void verifyPreconditions(List aggregatorSpecs, List postAggregatorSpecs) + { + Preconditions.checkNotNull(metric, "metric can't be null"); + Preconditions.checkNotNull(aggregatorSpecs, "aggregations cannot be null"); + Preconditions.checkArgument(aggregatorSpecs.size() > 0, "Must have at least one AggregatorFactory"); + + final AggregatorFactory aggregator = Iterables.tryFind( + aggregatorSpecs, + new Predicate() + { + @Override + public boolean apply(AggregatorFactory input) + { + return input.getName().equals(metric); + } + } + ).orNull(); + + final PostAggregator postAggregator = Iterables.tryFind( + postAggregatorSpecs, + new Predicate() + { + @Override + public boolean apply(PostAggregator input) + { + return input.getName().equals(metric); + } + } + ).orNull(); + + Preconditions.checkArgument( + aggregator != null || postAggregator != null, + "Must have an AggregatorFactory or PostAggregator for metric[%s], gave[%s] and [%s]", + metric, + aggregatorSpecs, + postAggregatorSpecs + ); + } + + @JsonProperty + public String getMetric() + { + return metric; + } + + @Override + public Comparator getComparator(List aggregatorSpecs, List postAggregatorSpecs) + { + Comparator comp = null; + for (AggregatorFactory factory : aggregatorSpecs) { + if (metric.equals(factory.getName())) { + comp = factory.getComparator(); + break; + } + } + for (PostAggregator pf : postAggregatorSpecs) { + if (metric.equals(pf.getName())) { + comp = pf.getComparator(); + break; + } + } + + return comp; + } + + @Override + public TopNResultBuilder getResultBuilder( + DateTime timestamp, + DimensionSpec dimSpec, + int threshold, + Comparator comparator + ) + { + return new TopNNumericResultBuilder(timestamp, dimSpec, metric, threshold, comparator); + } + + @Override + public byte[] getCacheKey() + { + byte[] metricBytes = metric.getBytes(Charsets.UTF_8); + + return ByteBuffer.allocate(1 + metricBytes.length) + .put(CACHE_TYPE_ID) + .put(metricBytes) + .array(); + } + + @Override + public TopNMetricSpecBuilder configureOptimizer(TopNMetricSpecBuilder builder) + { + return builder; + } + + @Override + public void initTopNAlgorithmSelector(TopNAlgorithmSelector selector) + { + selector.setAggregateTopNMetricFirst(true); + } + + @Override + public String toString() + { + return "NumericTopNMetricSpec{" + + "metric='" + metric + '\'' + + '}'; + } +} diff --git a/processing/src/main/java/io/druid/query/topn/PooledTopNAlgorithm.java b/processing/src/main/java/io/druid/query/topn/PooledTopNAlgorithm.java new file mode 100644 index 00000000000..d87631c7b57 --- /dev/null +++ b/processing/src/main/java/io/druid/query/topn/PooledTopNAlgorithm.java @@ -0,0 +1,401 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012, 2013 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package io.druid.query.topn; + +import com.google.common.io.Closeables; +import com.metamx.common.Pair; +import io.druid.collections.ResourceHolder; +import io.druid.collections.StupidPool; +import io.druid.query.aggregation.BufferAggregator; +import io.druid.segment.Capabilities; +import io.druid.segment.Cursor; +import io.druid.segment.DimensionSelector; +import io.druid.segment.data.IndexedInts; + +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.Comparator; + +/** + */ +public class PooledTopNAlgorithm extends BaseTopNAlgorithm +{ + private final Capabilities capabilities; + private final TopNQuery query; + private final Comparator comparator; + private final StupidPool bufferPool; + + public PooledTopNAlgorithm( + Capabilities capabilities, + TopNQuery query, + StupidPool bufferPool + ) + { + super(capabilities); + + this.capabilities = capabilities; + this.query = query; + this.comparator = query.getTopNMetricSpec() + .getComparator(query.getAggregatorSpecs(), query.getPostAggregatorSpecs()); + this.bufferPool = bufferPool; + } + + @Override + public PooledTopNParams makeInitParams( + DimensionSelector dimSelector, Cursor cursor + ) + { + ResourceHolder resultsBufHolder = bufferPool.take(); + ByteBuffer resultsBuf = resultsBufHolder.get(); + resultsBuf.clear(); + + final int cardinality = dimSelector.getValueCardinality(); + + final TopNMetricSpecBuilder arrayProvider = new BaseArrayProvider( + dimSelector, + query, + capabilities + ) + { + private final int[] positions = new int[cardinality]; + + @Override + public int[] build() + { + Pair startEnd = computeStartEnd(cardinality); + + Arrays.fill(positions, 0, startEnd.lhs, SKIP_POSITION_VALUE); + Arrays.fill(positions, startEnd.lhs, startEnd.rhs, INIT_POSITION_VALUE); + Arrays.fill(positions, startEnd.rhs, positions.length, SKIP_POSITION_VALUE); + + return positions; + } + }; + + final int numBytesToWorkWith = resultsBuf.remaining(); + final int[] aggregatorSizes = new int[query.getAggregatorSpecs().size()]; + int numBytesPerRecord = 0; + + for (int i = 0; i < query.getAggregatorSpecs().size(); ++i) { + aggregatorSizes[i] = query.getAggregatorSpecs().get(i).getMaxIntermediateSize(); + numBytesPerRecord += aggregatorSizes[i]; + } + + final int numValuesPerPass = numBytesToWorkWith / numBytesPerRecord; + + return PooledTopNParams.builder() + .withDimSelector(dimSelector) + .withCursor(cursor) + .withCardinality(cardinality) + .withResultsBufHolder(resultsBufHolder) + .withResultsBuf(resultsBuf) + .withArrayProvider(arrayProvider) + .withNumBytesPerRecord(numBytesPerRecord) + .withNumValuesPerPass(numValuesPerPass) + .withAggregatorSizes(aggregatorSizes) + .build(); + } + + @Override + public TopNResultBuilder makeResultBuilder(PooledTopNParams params) + { + return query.getTopNMetricSpec().getResultBuilder( + params.getCursor().getTime(), query.getDimensionSpec(), query.getThreshold(), comparator + ); + } + + @Override + protected int[] makeDimValSelector(PooledTopNParams params, int numProcessed, int numToProcess) + { + final TopNMetricSpecBuilder arrayProvider = params.getArrayProvider(); + + arrayProvider.ignoreFirstN(numProcessed); + arrayProvider.keepOnlyN(numToProcess); + return query.getTopNMetricSpec().configureOptimizer(arrayProvider).build(); + } + + protected int[] updateDimValSelector(int[] dimValSelector, int numProcessed, int numToProcess) + { + final int[] retVal = Arrays.copyOf(dimValSelector, dimValSelector.length); + + final int validEnd = Math.min(retVal.length, numProcessed + numToProcess); + final int end = Math.max(retVal.length, validEnd); + + Arrays.fill(retVal, 0, numProcessed, SKIP_POSITION_VALUE); + Arrays.fill(retVal, validEnd, end, SKIP_POSITION_VALUE); + + return retVal; + } + + @Override + protected BufferAggregator[] makeDimValAggregateStore(PooledTopNParams params) + { + return makeBufferAggregators(params.getCursor(), query.getAggregatorSpecs()); + } + + @Override + protected void scanAndAggregate( + PooledTopNParams params, + int[] positions, + BufferAggregator[] theAggregators, + int numProcessed + ) + { + final ByteBuffer resultsBuf = params.getResultsBuf(); + final int numBytesPerRecord = params.getNumBytesPerRecord(); + final int[] aggregatorSizes = params.getAggregatorSizes(); + final Cursor cursor = params.getCursor(); + final DimensionSelector dimSelector = params.getDimSelector(); + + while (!cursor.isDone()) { + final IndexedInts dimValues = dimSelector.getRow(); + + for (int i = 0; i < dimValues.size(); ++i) { + final int dimIndex = dimValues.get(i); + int position = positions[dimIndex]; + switch (position) { + case SKIP_POSITION_VALUE: + break; + case INIT_POSITION_VALUE: + positions[dimIndex] = (dimIndex - numProcessed) * numBytesPerRecord; + position = positions[dimIndex]; + for (int j = 0; j < theAggregators.length; ++j) { + theAggregators[j].init(resultsBuf, position); + position += aggregatorSizes[j]; + } + position = positions[dimIndex]; + default: + for (int j = 0; j < theAggregators.length; ++j) { + theAggregators[j].aggregate(resultsBuf, position); + position += aggregatorSizes[j]; + } + } + } + + cursor.advance(); + } + } + + @Override + protected void updateResults( + PooledTopNParams params, + int[] positions, + BufferAggregator[] theAggregators, + TopNResultBuilder resultBuilder + ) + { + final ByteBuffer resultsBuf = params.getResultsBuf(); + final int[] aggregatorSizes = params.getAggregatorSizes(); + final DimensionSelector dimSelector = params.getDimSelector(); + + for (int i = 0; i < positions.length; i++) { + int position = positions[i]; + if (position >= 0) { + Object[] vals = new Object[theAggregators.length]; + for (int j = 0; j < theAggregators.length; j++) { + vals[j] = theAggregators[j].get(resultsBuf, position); + position += aggregatorSizes[j]; + } + + resultBuilder.addEntry( + dimSelector.lookupName(i), + i, + vals, + query.getAggregatorSpecs(), + query.getPostAggregatorSpecs() + ); + } + } + } + + @Override + protected void closeAggregators(BufferAggregator[] bufferAggregators) + { + for(BufferAggregator agg : bufferAggregators) { + agg.close(); + } + } + + @Override + public void cleanup(PooledTopNParams params) + { + ResourceHolder resultsBufHolder = params.getResultsBufHolder(); + + if (resultsBufHolder != null) { + resultsBufHolder.get().clear(); + } + Closeables.closeQuietly(resultsBufHolder); + } + + public static class PooledTopNParams extends TopNParams + { + public static Builder builder() + { + return new Builder(); + } + + private final ResourceHolder resultsBufHolder; + private final ByteBuffer resultsBuf; + private final int[] aggregatorSizes; + private final int numBytesPerRecord; + private final TopNMetricSpecBuilder arrayProvider; + + public PooledTopNParams( + DimensionSelector dimSelector, + Cursor cursor, + int cardinality, + ResourceHolder resultsBufHolder, + ByteBuffer resultsBuf, + int[] aggregatorSizes, + int numBytesPerRecord, + int numValuesPerPass, + TopNMetricSpecBuilder arrayProvider + ) + { + super(dimSelector, cursor, cardinality, numValuesPerPass); + + this.resultsBufHolder = resultsBufHolder; + this.resultsBuf = resultsBuf; + this.aggregatorSizes = aggregatorSizes; + this.numBytesPerRecord = numBytesPerRecord; + this.arrayProvider = arrayProvider; + } + + public ResourceHolder getResultsBufHolder() + { + return resultsBufHolder; + } + + public ByteBuffer getResultsBuf() + { + return resultsBuf; + } + + public int[] getAggregatorSizes() + { + return aggregatorSizes; + } + + public int getNumBytesPerRecord() + { + return numBytesPerRecord; + } + + public TopNMetricSpecBuilder getArrayProvider() + { + return arrayProvider; + } + + public static class Builder + { + private DimensionSelector dimSelector; + private Cursor cursor; + private int cardinality; + private ResourceHolder resultsBufHolder; + private ByteBuffer resultsBuf; + private int[] aggregatorSizes; + private int numBytesPerRecord; + private int numValuesPerPass; + private TopNMetricSpecBuilder arrayProvider; + + public Builder() + { + dimSelector = null; + cursor = null; + cardinality = 0; + resultsBufHolder = null; + resultsBuf = null; + aggregatorSizes = null; + numBytesPerRecord = 0; + numValuesPerPass = 0; + arrayProvider = null; + } + + public Builder withDimSelector(DimensionSelector dimSelector) + { + this.dimSelector = dimSelector; + return this; + } + + public Builder withCursor(Cursor cursor) + { + this.cursor = cursor; + return this; + } + + public Builder withCardinality(int cardinality) + { + this.cardinality = cardinality; + return this; + } + + public Builder withResultsBufHolder(ResourceHolder resultsBufHolder) + { + this.resultsBufHolder = resultsBufHolder; + return this; + } + + public Builder withResultsBuf(ByteBuffer resultsBuf) + { + this.resultsBuf = resultsBuf; + return this; + } + + public Builder withAggregatorSizes(int[] aggregatorSizes) + { + this.aggregatorSizes = aggregatorSizes; + return this; + } + + public Builder withNumBytesPerRecord(int numBytesPerRecord) + { + this.numBytesPerRecord = numBytesPerRecord; + return this; + } + + public Builder withNumValuesPerPass(int numValuesPerPass) + { + this.numValuesPerPass = numValuesPerPass; + return this; + } + + public Builder withArrayProvider(TopNMetricSpecBuilder arrayProvider) + { + this.arrayProvider = arrayProvider; + return this; + } + + public PooledTopNParams build() + { + return new PooledTopNParams( + dimSelector, + cursor, + cardinality, + resultsBufHolder, + resultsBuf, + aggregatorSizes, + numBytesPerRecord, + numValuesPerPass, + arrayProvider + ); + } + } + } +} diff --git a/indexing-service/src/main/java/io/druid/indexing/overlord/http/OldOverlordResource.java b/processing/src/main/java/io/druid/query/topn/TopNAlgorithm.java similarity index 54% rename from indexing-service/src/main/java/io/druid/indexing/overlord/http/OldOverlordResource.java rename to processing/src/main/java/io/druid/query/topn/TopNAlgorithm.java index 6897490624a..89bac871441 100644 --- a/indexing-service/src/main/java/io/druid/indexing/overlord/http/OldOverlordResource.java +++ b/processing/src/main/java/io/druid/query/topn/TopNAlgorithm.java @@ -17,30 +17,29 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ -package io.druid.indexing.overlord.http; +package io.druid.query.topn; -import com.google.inject.Inject; -import io.druid.common.config.JacksonConfigManager; -import io.druid.indexing.overlord.TaskMaster; -import io.druid.indexing.overlord.TaskStorageQueryAdapter; -import io.druid.tasklogs.TaskLogStreamer; - -import javax.ws.rs.Path; +import io.druid.query.aggregation.Aggregator; +import io.druid.segment.Cursor; +import io.druid.segment.DimensionSelector; /** */ -@Deprecated -@Path("/mmx/merger/v1") -public class OldOverlordResource extends OverlordResource +public interface TopNAlgorithm { - @Inject - public OldOverlordResource( - TaskMaster taskMaster, - TaskStorageQueryAdapter taskStorageQueryAdapter, - TaskLogStreamer taskLogStreamer, - JacksonConfigManager configManager - ) throws Exception - { - super(taskMaster, taskStorageQueryAdapter, taskLogStreamer, configManager); - } + public static final Aggregator[] EMPTY_ARRAY = {}; + public static final int INIT_POSITION_VALUE = -1; + public static final int SKIP_POSITION_VALUE = -2; + + public TopNParams makeInitParams(DimensionSelector dimSelector, Cursor cursor); + + public TopNResultBuilder makeResultBuilder(Parameters params); + + public void run( + Parameters params, + TopNResultBuilder resultBuilder, + DimValSelector dimValSelector + ); + + public void cleanup(Parameters params); } diff --git a/processing/src/main/java/io/druid/query/topn/TopNAlgorithmSelector.java b/processing/src/main/java/io/druid/query/topn/TopNAlgorithmSelector.java new file mode 100644 index 00000000000..a65b78dbc96 --- /dev/null +++ b/processing/src/main/java/io/druid/query/topn/TopNAlgorithmSelector.java @@ -0,0 +1,72 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012, 2013 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package io.druid.query.topn; + +/** + */ +public class TopNAlgorithmSelector +{ + private final int cardinality; + private final int numBytesPerRecord; + + private volatile boolean hasDimExtractionFn; + private volatile boolean aggregateAllMetrics; + private volatile boolean aggregateTopNMetricFirst; + + public TopNAlgorithmSelector(int cardinality, int numBytesPerRecord) + { + this.cardinality = cardinality; + this.numBytesPerRecord = numBytesPerRecord; + } + + public void setHasDimExtractionFn(boolean hasDimExtractionFn) + { + this.hasDimExtractionFn = hasDimExtractionFn; + } + + public void setAggregateAllMetrics(boolean aggregateAllMetrics) + { + this.aggregateAllMetrics = aggregateAllMetrics; + } + + public void setAggregateTopNMetricFirst(boolean aggregateTopNMetricFirst) + { + // These are just heuristics based on an analysis of where an inflection point may lie to switch + // between different algorithms + if (cardinality > 400000 && numBytesPerRecord > 100) { + this.aggregateTopNMetricFirst = aggregateTopNMetricFirst; + } + } + + public boolean isHasDimExtractionFn() + { + return hasDimExtractionFn; + } + + public boolean isAggregateAllMetrics() + { + return aggregateAllMetrics; + } + + public boolean isAggregateTopNMetricFirst() + { + return aggregateTopNMetricFirst; + } +} diff --git a/processing/src/main/java/io/druid/query/topn/TopNBinaryFn.java b/processing/src/main/java/io/druid/query/topn/TopNBinaryFn.java new file mode 100644 index 00000000000..437c28f640f --- /dev/null +++ b/processing/src/main/java/io/druid/query/topn/TopNBinaryFn.java @@ -0,0 +1,126 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012, 2013 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package io.druid.query.topn; + +import com.metamx.common.guava.nary.BinaryFn; +import io.druid.granularity.AllGranularity; +import io.druid.granularity.QueryGranularity; +import io.druid.query.Result; +import io.druid.query.aggregation.AggregatorFactory; +import io.druid.query.aggregation.PostAggregator; +import io.druid.query.dimension.DimensionSpec; +import org.joda.time.DateTime; + +import java.util.Comparator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +/** + */ +public class TopNBinaryFn implements BinaryFn, Result, Result> +{ + private final TopNResultMerger merger; + private final DimensionSpec dimSpec; + private final QueryGranularity gran; + private final String dimension; + private final TopNMetricSpec topNMetricSpec; + private final int threshold; + private final List aggregations; + private final List postAggregations; + private final Comparator comparator; + + public TopNBinaryFn( + final TopNResultMerger merger, + final QueryGranularity granularity, + final DimensionSpec dimSpec, + final TopNMetricSpec topNMetricSpec, + final int threshold, + final List aggregatorSpecs, + final List postAggregatorSpecs + ) + { + this.merger = merger; + this.dimSpec = dimSpec; + this.gran = granularity; + this.topNMetricSpec = topNMetricSpec; + this.threshold = threshold; + this.aggregations = aggregatorSpecs; + this.postAggregations = postAggregatorSpecs; + + this.dimension = dimSpec.getOutputName(); + this.comparator = topNMetricSpec.getComparator(aggregatorSpecs, postAggregatorSpecs); + } + + @Override + public Result apply(Result arg1, Result arg2) + { + if (arg1 == null) { + return merger.getResult(arg2, comparator); + } + if (arg2 == null) { + return merger.getResult(arg1, comparator); + } + + Map retVals = new LinkedHashMap(); + + TopNResultValue arg1Vals = arg1.getValue(); + TopNResultValue arg2Vals = arg2.getValue(); + + for (DimensionAndMetricValueExtractor arg1Val : arg1Vals) { + retVals.put(arg1Val.getStringDimensionValue(dimension), arg1Val); + } + for (DimensionAndMetricValueExtractor arg2Val : arg2Vals) { + final String dimensionValue = arg2Val.getStringDimensionValue(dimension); + DimensionAndMetricValueExtractor arg1Val = retVals.get(dimensionValue); + + if (arg1Val != null) { + Map retVal = new LinkedHashMap(); + + retVal.put(dimension, dimensionValue); + for (AggregatorFactory factory : aggregations) { + final String metricName = factory.getName(); + retVal.put(metricName, factory.combine(arg1Val.getMetric(metricName), arg2Val.getMetric(metricName))); + } + + for (PostAggregator pf : postAggregations) { + retVal.put(pf.getName(), pf.compute(retVal)); + } + + retVals.put(dimensionValue, new DimensionAndMetricValueExtractor(retVal)); + } else { + retVals.put(dimensionValue, arg2Val); + } + } + + final DateTime timestamp; + if (gran instanceof AllGranularity) { + timestamp = arg1.getTimestamp(); + } else { + timestamp = gran.toDateTime(gran.truncate(arg1.getTimestamp().getMillis())); + } + + TopNResultBuilder bob = topNMetricSpec.getResultBuilder(timestamp, dimSpec, threshold, comparator); + for (DimensionAndMetricValueExtractor extractor : retVals.values()) { + bob.addEntry(extractor); + } + return bob.build(); + } +} diff --git a/processing/src/main/java/io/druid/query/topn/TopNLexicographicResultBuilder.java b/processing/src/main/java/io/druid/query/topn/TopNLexicographicResultBuilder.java new file mode 100644 index 00000000000..37360dfb1cd --- /dev/null +++ b/processing/src/main/java/io/druid/query/topn/TopNLexicographicResultBuilder.java @@ -0,0 +1,134 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012, 2013 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package io.druid.query.topn; + +import com.google.common.collect.Maps; +import com.google.common.collect.MinMaxPriorityQueue; +import io.druid.query.Result; +import io.druid.query.aggregation.AggregatorFactory; +import io.druid.query.aggregation.PostAggregator; +import io.druid.query.dimension.DimensionSpec; +import org.joda.time.DateTime; + +import java.util.ArrayList; +import java.util.Comparator; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +/** + */ +public class TopNLexicographicResultBuilder implements TopNResultBuilder +{ + private final DateTime timestamp; + private final DimensionSpec dimSpec; + private final String previousStop; + + private MinMaxPriorityQueue pQueue = null; + + public TopNLexicographicResultBuilder( + DateTime timestamp, + DimensionSpec dimSpec, + int threshold, + String previousStop, + final Comparator comparator + ) + { + this.timestamp = timestamp; + this.dimSpec = dimSpec; + this.previousStop = previousStop; + + instantiatePQueue(threshold, comparator); + } + + @Override + public TopNResultBuilder addEntry( + String dimName, + Object dimValIndex, + Object[] metricVals, + List aggFactories, + List postAggs + ) + { + Map metricValues = Maps.newLinkedHashMap(); + + if (dimName.compareTo(previousStop) > 0) { + metricValues.put(dimSpec.getOutputName(), dimName); + Iterator aggsIter = aggFactories.iterator(); + for (Object metricVal : metricVals) { + metricValues.put(aggsIter.next().getName(), metricVal); + } + for (PostAggregator postAgg : postAggs) { + metricValues.put(postAgg.getName(), postAgg.compute(metricValues)); + } + + pQueue.add(new DimValHolder.Builder().withDirName(dimName).withMetricValues(metricValues).build()); + } + + return this; + } + + @Override + public TopNResultBuilder addEntry(DimensionAndMetricValueExtractor dimensionAndMetricValueExtractor) + { + pQueue.add( + new DimValHolder.Builder().withDirName(dimensionAndMetricValueExtractor.getStringDimensionValue(dimSpec.getOutputName())) + .withMetricValues(dimensionAndMetricValueExtractor.getBaseObject()) + .build() + ); + + return this; + } + + @Override + public Iterator getTopNIterator() + { + return pQueue.iterator(); + } + + @Override + public Result build() + { + // Pull out top aggregated values + List> values = new ArrayList>(pQueue.size()); + while (!pQueue.isEmpty()) { + values.add(pQueue.remove().getMetricValues()); + } + + return new Result(timestamp, new TopNResultValue(values)); + } + + private void instantiatePQueue(int threshold, final Comparator comparator) + { + this.pQueue = MinMaxPriorityQueue.orderedBy( + new Comparator() + { + @Override + public int compare( + DimValHolder o1, + DimValHolder o2 + ) + { + return comparator.compare(o1.getDimName(), o2.getDimName()); + } + } + ).maximumSize(threshold).create(); + } +} diff --git a/processing/src/main/java/io/druid/query/topn/TopNMapFn.java b/processing/src/main/java/io/druid/query/topn/TopNMapFn.java new file mode 100644 index 00000000000..c013d546f7f --- /dev/null +++ b/processing/src/main/java/io/druid/query/topn/TopNMapFn.java @@ -0,0 +1,65 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012, 2013 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package io.druid.query.topn; + +import com.google.common.base.Function; +import io.druid.query.Result; +import io.druid.segment.Cursor; +import io.druid.segment.DimensionSelector; + +public class TopNMapFn implements Function> +{ + private final TopNQuery query; + private final TopNAlgorithm topNAlgorithm; + + + public TopNMapFn( + TopNQuery query, + TopNAlgorithm topNAlgorithm + ) + { + this.query = query; + this.topNAlgorithm = topNAlgorithm; + } + + @Override + @SuppressWarnings("unchecked") + public Result apply(Cursor cursor) + { + final DimensionSelector dimSelector = cursor.makeDimensionSelector(query.getDimensionSpec().getDimension()); + if (dimSelector == null) { + return null; + } + + TopNParams params = null; + try { + params = topNAlgorithm.makeInitParams(dimSelector, cursor); + + TopNResultBuilder resultBuilder = topNAlgorithm.makeResultBuilder(params); + + topNAlgorithm.run(params, resultBuilder, null); + + return resultBuilder.build(); + } + finally { + topNAlgorithm.cleanup(params); + } + } +} diff --git a/processing/src/main/java/io/druid/query/topn/TopNMetricSpec.java b/processing/src/main/java/io/druid/query/topn/TopNMetricSpec.java new file mode 100644 index 00000000000..c2baf13e3eb --- /dev/null +++ b/processing/src/main/java/io/druid/query/topn/TopNMetricSpec.java @@ -0,0 +1,58 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012, 2013 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package io.druid.query.topn; + +import com.fasterxml.jackson.annotation.JsonSubTypes; +import com.fasterxml.jackson.annotation.JsonTypeInfo; +import io.druid.query.aggregation.AggregatorFactory; +import io.druid.query.aggregation.PostAggregator; +import io.druid.query.dimension.DimensionSpec; +import org.joda.time.DateTime; + +import java.util.Comparator; +import java.util.List; + +/** + */ +@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type", defaultImpl = LegacyTopNMetricSpec.class) +@JsonSubTypes(value = { + @JsonSubTypes.Type(name = "numeric", value = NumericTopNMetricSpec.class), + @JsonSubTypes.Type(name = "lexicographic", value = LexicographicTopNMetricSpec.class), + @JsonSubTypes.Type(name = "inverted", value = InvertedTopNMetricSpec.class) +}) +public interface TopNMetricSpec +{ + public void verifyPreconditions(List aggregatorSpecs, List postAggregatorSpecs); + + public Comparator getComparator(List aggregatorSpecs, List postAggregatorSpecs); + + public TopNResultBuilder getResultBuilder( + DateTime timestamp, + DimensionSpec dimSpec, + int threshold, + Comparator comparator + ); + + public byte[] getCacheKey(); + + public TopNMetricSpecBuilder configureOptimizer(TopNMetricSpecBuilder builder); + + public void initTopNAlgorithmSelector(TopNAlgorithmSelector selector); +} diff --git a/indexing-service/src/main/java/io/druid/indexing/common/config/EventReceiverFirehoseFactoryConfig.java b/processing/src/main/java/io/druid/query/topn/TopNMetricSpecBuilder.java similarity index 77% rename from indexing-service/src/main/java/io/druid/indexing/common/config/EventReceiverFirehoseFactoryConfig.java rename to processing/src/main/java/io/druid/query/topn/TopNMetricSpecBuilder.java index 5822bdb622a..f1f5cdc9d0c 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/config/EventReceiverFirehoseFactoryConfig.java +++ b/processing/src/main/java/io/druid/query/topn/TopNMetricSpecBuilder.java @@ -17,15 +17,19 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ -package io.druid.indexing.common.config; - -import org.skife.config.Config; +package io.druid.query.topn; /** */ -@Deprecated -public abstract class EventReceiverFirehoseFactoryConfig +public interface TopNMetricSpecBuilder { - @Config("druid.indexer.firehoseId.prefix") - public abstract String getFirehoseIdPrefix(); + public void skipTo(String previousStop); + + public void ignoreAfterThreshold(); + + public void ignoreFirstN(int n); + + public void keepOnlyN(int n); + + public T build(); } diff --git a/processing/src/main/java/io/druid/query/topn/TopNNumericResultBuilder.java b/processing/src/main/java/io/druid/query/topn/TopNNumericResultBuilder.java new file mode 100644 index 00000000000..9f6479baee4 --- /dev/null +++ b/processing/src/main/java/io/druid/query/topn/TopNNumericResultBuilder.java @@ -0,0 +1,153 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012, 2013 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package io.druid.query.topn; + +import com.google.common.collect.Maps; +import com.google.common.collect.MinMaxPriorityQueue; +import io.druid.query.Result; +import io.druid.query.aggregation.AggregatorFactory; +import io.druid.query.aggregation.PostAggregator; +import io.druid.query.dimension.DimensionSpec; +import org.joda.time.DateTime; + +import java.util.ArrayList; +import java.util.Comparator; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +/** + */ +public class TopNNumericResultBuilder implements TopNResultBuilder +{ + private final DateTime timestamp; + private final DimensionSpec dimSpec; + private final String metricName; + + private MinMaxPriorityQueue pQueue = null; + + public TopNNumericResultBuilder( + DateTime timestamp, + DimensionSpec dimSpec, + String metricName, + int threshold, + final Comparator comparator + ) + { + this.timestamp = timestamp; + this.dimSpec = dimSpec; + this.metricName = metricName; + + instantiatePQueue(threshold, comparator); + } + + @Override + public TopNResultBuilder addEntry( + String dimName, + Object dimValIndex, + Object[] metricVals, + List aggFactories, + List postAggs + ) + { + Map metricValues = Maps.newLinkedHashMap(); + + metricValues.put(dimSpec.getOutputName(), dimName); + + Iterator aggFactoryIter = aggFactories.iterator(); + for (Object metricVal : metricVals) { + metricValues.put(aggFactoryIter.next().getName(), metricVal); + } + for (PostAggregator postAgg : postAggs) { + metricValues.put(postAgg.getName(), postAgg.compute(metricValues)); + } + + Object topNMetricVal = metricValues.get(metricName); + pQueue.add( + new DimValHolder.Builder().withTopNMetricVal(topNMetricVal) + .withDirName(dimName) + .withDimValIndex(dimValIndex) + .withMetricValues(metricValues) + .build() + ); + + return this; + } + + @Override + public TopNResultBuilder addEntry(DimensionAndMetricValueExtractor dimensionAndMetricValueExtractor) + { + pQueue.add( + new DimValHolder.Builder().withTopNMetricVal(dimensionAndMetricValueExtractor.getDimensionValue(metricName)) + .withDirName(dimSpec.getOutputName()) + .withMetricValues(dimensionAndMetricValueExtractor.getBaseObject()) + .build() + ); + + return this; + } + + @Override + public Iterator getTopNIterator() + { + return pQueue.iterator(); + } + + @Override + public Result build() + { + // Pull out top aggregated values + List> values = new ArrayList>(pQueue.size()); + while (!pQueue.isEmpty()) { + values.add(pQueue.remove().getMetricValues()); + } + + return new Result( + timestamp, + new TopNResultValue(values) + ); + } + + private void instantiatePQueue(int threshold, final Comparator comparator) + { + this.pQueue = MinMaxPriorityQueue.orderedBy( + new Comparator() + { + @Override + public int compare(DimValHolder d1, DimValHolder d2) + { + int retVal = comparator.compare(d2.getTopNMetricVal(), d1.getTopNMetricVal()); + + if (retVal == 0) { + if (d1.getDimName() == null) { + retVal = -1; + } else if (d2.getDimName() == null) { + retVal = 1; + } else { + retVal = d1.getDimName().compareTo(d2.getDimName()); + } + } + + return retVal; + } + } + ).maximumSize(threshold).create(); + } +} diff --git a/processing/src/main/java/io/druid/query/topn/TopNParams.java b/processing/src/main/java/io/druid/query/topn/TopNParams.java new file mode 100644 index 00000000000..8ccc85da284 --- /dev/null +++ b/processing/src/main/java/io/druid/query/topn/TopNParams.java @@ -0,0 +1,61 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012, 2013 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package io.druid.query.topn; + +import io.druid.segment.Cursor; +import io.druid.segment.DimensionSelector; + +/** + */ +public class TopNParams +{ + private final DimensionSelector dimSelector; + private final Cursor cursor; + private final int cardinality; + private final int numValuesPerPass; + + protected TopNParams(DimensionSelector dimSelector, Cursor cursor, int cardinality, int numValuesPerPass) + { + this.dimSelector = dimSelector; + this.cursor = cursor; + this.cardinality = cardinality; + this.numValuesPerPass = numValuesPerPass; + } + + public DimensionSelector getDimSelector() + { + return dimSelector; + } + + public Cursor getCursor() + { + return cursor; + } + + public int getCardinality() + { + return cardinality; + } + + public int getNumValuesPerPass() + { + return numValuesPerPass; + } +} diff --git a/processing/src/main/java/io/druid/query/topn/TopNQuery.java b/processing/src/main/java/io/druid/query/topn/TopNQuery.java new file mode 100644 index 00000000000..10b27deb6e3 --- /dev/null +++ b/processing/src/main/java/io/druid/query/topn/TopNQuery.java @@ -0,0 +1,211 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012, 2013 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package io.druid.query.topn; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import io.druid.granularity.QueryGranularity; +import io.druid.query.BaseQuery; +import io.druid.query.Queries; +import io.druid.query.Result; +import io.druid.query.aggregation.AggregatorFactory; +import io.druid.query.aggregation.PostAggregator; +import io.druid.query.dimension.DimensionSpec; +import io.druid.query.filter.DimFilter; +import io.druid.query.spec.QuerySegmentSpec; + +import java.util.List; +import java.util.Map; + +/** + */ +public class TopNQuery extends BaseQuery> +{ + public static final String TOPN = "topN"; + + private final DimensionSpec dimensionSpec; + private final TopNMetricSpec topNMetricSpec; + private final int threshold; + private final DimFilter dimFilter; + private final QueryGranularity granularity; + private final List aggregatorSpecs; + private final List postAggregatorSpecs; + + @JsonCreator + public TopNQuery( + @JsonProperty("dataSource") String dataSource, + @JsonProperty("dimension") DimensionSpec dimensionSpec, + @JsonProperty("metric") TopNMetricSpec topNMetricSpec, + @JsonProperty("threshold") int threshold, + @JsonProperty("intervals") QuerySegmentSpec querySegmentSpec, + @JsonProperty("filter") DimFilter dimFilter, + @JsonProperty("granularity") QueryGranularity granularity, + @JsonProperty("aggregations") List aggregatorSpecs, + @JsonProperty("postAggregations") List postAggregatorSpecs, + @JsonProperty("context") Map context + ) + { + super(dataSource, querySegmentSpec, context); + this.dimensionSpec = dimensionSpec; + this.topNMetricSpec = topNMetricSpec; + this.threshold = threshold; + + this.dimFilter = dimFilter; + this.granularity = granularity; + this.aggregatorSpecs = aggregatorSpecs; + this.postAggregatorSpecs = postAggregatorSpecs == null ? ImmutableList.of() : postAggregatorSpecs; + + Preconditions.checkNotNull(dimensionSpec, "dimensionSpec can't be null"); + Preconditions.checkNotNull(topNMetricSpec, "must specify a metric"); + + Preconditions.checkArgument(threshold != 0, "Threshold cannot be equal to 0."); + topNMetricSpec.verifyPreconditions(this.aggregatorSpecs, this.postAggregatorSpecs); + + Queries.verifyAggregations(this.aggregatorSpecs, this.postAggregatorSpecs); + } + + @Override + public boolean hasFilters() + { + return dimFilter != null; + } + + @Override + public String getType() + { + return TOPN; + } + + @JsonProperty("dimension") + public DimensionSpec getDimensionSpec() + { + return dimensionSpec; + } + + @JsonProperty("metric") + public TopNMetricSpec getTopNMetricSpec() + { + return topNMetricSpec; + } + + @JsonProperty("threshold") + public int getThreshold() + { + return threshold; + } + + @JsonProperty("filter") + public DimFilter getDimensionsFilter() + { + return dimFilter; + } + + @JsonProperty + public QueryGranularity getGranularity() + { + return granularity; + } + + @JsonProperty("aggregations") + public List getAggregatorSpecs() + { + return aggregatorSpecs; + } + + @JsonProperty("postAggregations") + public List getPostAggregatorSpecs() + { + return postAggregatorSpecs; + } + + public void initTopNAlgorithmSelector(TopNAlgorithmSelector selector) + { + if (dimensionSpec.getDimExtractionFn() != null) { + selector.setHasDimExtractionFn(true); + } + topNMetricSpec.initTopNAlgorithmSelector(selector); + } + + public TopNQuery withQuerySegmentSpec(QuerySegmentSpec querySegmentSpec) + { + return new TopNQuery( + getDataSource(), + dimensionSpec, + topNMetricSpec, + threshold, + querySegmentSpec, + dimFilter, + granularity, + aggregatorSpecs, + postAggregatorSpecs, + getContext() + ); + } + + public TopNQuery withThreshold(int threshold) + { + return new TopNQuery( + getDataSource(), + dimensionSpec, + topNMetricSpec, + threshold, + getQuerySegmentSpec(), + dimFilter, + granularity, + aggregatorSpecs, + postAggregatorSpecs, + getContext() + ); + } + + public TopNQuery withOverriddenContext(Map contextOverrides) + { + return new TopNQuery( + getDataSource(), + dimensionSpec, + topNMetricSpec, + threshold, + getQuerySegmentSpec(), + dimFilter, + granularity, + aggregatorSpecs, + postAggregatorSpecs, + computeOverridenContext(contextOverrides) + ); + } + + @Override + public String toString() + { + return "TopNQuery{" + + "dataSource='" + getDataSource() + '\'' + + ", dimensionSpec=" + dimensionSpec + + ", topNMetricSpec=" + topNMetricSpec + + ", threshold=" + threshold + + ", querySegmentSpec=" + getQuerySegmentSpec() + + ", dimFilter=" + dimFilter + + ", granularity='" + granularity + '\'' + + ", aggregatorSpecs=" + aggregatorSpecs + + ", postAggregatorSpecs=" + postAggregatorSpecs + + '}'; + } +} diff --git a/processing/src/main/java/io/druid/query/topn/TopNQueryBuilder.java b/processing/src/main/java/io/druid/query/topn/TopNQueryBuilder.java new file mode 100644 index 00000000000..1bfb690f490 --- /dev/null +++ b/processing/src/main/java/io/druid/query/topn/TopNQueryBuilder.java @@ -0,0 +1,290 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012, 2013 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package io.druid.query.topn; + +import com.google.common.collect.Lists; +import io.druid.granularity.QueryGranularity; +import io.druid.query.aggregation.AggregatorFactory; +import io.druid.query.aggregation.PostAggregator; +import io.druid.query.dimension.DefaultDimensionSpec; +import io.druid.query.dimension.DimensionSpec; +import io.druid.query.filter.DimFilter; +import io.druid.query.filter.OrDimFilter; +import io.druid.query.filter.SelectorDimFilter; +import io.druid.query.spec.LegacySegmentSpec; +import io.druid.query.spec.QuerySegmentSpec; +import org.joda.time.Interval; + +import java.util.List; +import java.util.Map; + +/** + * A Builder for TopNQuery. + *

+ * Required: dataSource(), intervals(), metric() and threshold() must be called before build() + * Additional requirement for numeric metric sorts: aggregators() must be called before build() + *

+ * Optional: filters(), granularity(), postAggregators() and context() can be called before build() + *

+ * Usage example: + *


+ *   TopNQuery query = new TopNQueryBuilder()
+ *                                  .dataSource("Example")
+ *                                  .dimension("example_dim")
+ *                                  .metric("example_metric")
+ *                                  .threshold(100)
+ *                                  .intervals("2012-01-01/2012-01-02")
+ *                                  .build();
+ * 
+ * + * @see io.druid.query.topn.TopNQuery + */ +public class TopNQueryBuilder +{ + private String dataSource; + private DimensionSpec dimensionSpec; + private TopNMetricSpec topNMetricSpec; + private int threshold; + private QuerySegmentSpec querySegmentSpec; + private DimFilter dimFilter; + private QueryGranularity granularity; + private List aggregatorSpecs; + private List postAggregatorSpecs; + private Map context; + + public TopNQueryBuilder() + { + dataSource = ""; + dimensionSpec = null; + topNMetricSpec = null; + threshold = 0; + querySegmentSpec = null; + dimFilter = null; + granularity = QueryGranularity.ALL; + aggregatorSpecs = Lists.newArrayList(); + postAggregatorSpecs = Lists.newArrayList(); + context = null; + } + + public String getDataSource() + { + return dataSource; + } + + public DimensionSpec getDimensionSpec() + { + return dimensionSpec; + } + + public TopNMetricSpec getTopNMetricSpec() + { + return topNMetricSpec; + } + + public int getThreshold() + { + return threshold; + } + + public QuerySegmentSpec getQuerySegmentSpec() + { + return querySegmentSpec; + } + + public DimFilter getDimFilter() + { + return dimFilter; + } + + public QueryGranularity getGranularity() + { + return granularity; + } + + public List getAggregatorSpecs() + { + return aggregatorSpecs; + } + + public List getPostAggregatorSpecs() + { + return postAggregatorSpecs; + } + + public Map getContext() + { + return context; + } + + public TopNQuery build() + { + return new TopNQuery( + dataSource, + dimensionSpec, + topNMetricSpec, + threshold, + querySegmentSpec, + dimFilter, + granularity, + aggregatorSpecs, + postAggregatorSpecs, + context + ); + } + + public TopNQueryBuilder copy(TopNQuery query) + { + return new TopNQueryBuilder() + .dataSource(query.getDataSource()) + .dimension(query.getDimensionSpec()) + .metric(query.getTopNMetricSpec()) + .threshold(query.getThreshold()) + .intervals(query.getIntervals()) + .filters(query.getDimensionsFilter()) + .granularity(query.getGranularity()) + .aggregators(query.getAggregatorSpecs()) + .postAggregators(query.getPostAggregatorSpecs()) + .context(query.getContext()); + } + + public TopNQueryBuilder copy(TopNQueryBuilder builder) + { + return new TopNQueryBuilder() + .dataSource(builder.dataSource) + .dimension(builder.dimensionSpec) + .metric(builder.topNMetricSpec) + .threshold(builder.threshold) + .intervals(builder.querySegmentSpec) + .filters(builder.dimFilter) + .granularity(builder.granularity) + .aggregators(builder.aggregatorSpecs) + .postAggregators(builder.postAggregatorSpecs) + .context(builder.context); + } + + public TopNQueryBuilder dataSource(String d) + { + dataSource = d; + return this; + } + + public TopNQueryBuilder dimension(String d) + { + return dimension(d, null); + } + + public TopNQueryBuilder dimension(String d, String outputName) + { + return dimension(new DefaultDimensionSpec(d, outputName)); + } + + public TopNQueryBuilder dimension(DimensionSpec d) + { + dimensionSpec = d; + return this; + } + + public TopNQueryBuilder metric(String s) + { + return metric(new NumericTopNMetricSpec(s)); + } + + public TopNQueryBuilder metric(TopNMetricSpec t) + { + topNMetricSpec = t; + return this; + } + + public TopNQueryBuilder threshold(int i) + { + threshold = i; + return this; + } + + public TopNQueryBuilder intervals(QuerySegmentSpec q) + { + querySegmentSpec = q; + return this; + } + + public TopNQueryBuilder intervals(String s) + { + querySegmentSpec = new LegacySegmentSpec(s); + return this; + } + + public TopNQueryBuilder intervals(List l) + { + querySegmentSpec = new LegacySegmentSpec(l); + return this; + } + + public TopNQueryBuilder filters(String dimensionName, String value) + { + dimFilter = new SelectorDimFilter(dimensionName, value); + return this; + } + + public TopNQueryBuilder filters(String dimensionName, String value, String... values) + { + List fields = Lists.newArrayList(new SelectorDimFilter(dimensionName, value)); + for (String val : values) { + fields.add(new SelectorDimFilter(dimensionName, val)); + } + dimFilter = new OrDimFilter(fields); + return this; + } + + public TopNQueryBuilder filters(DimFilter f) + { + dimFilter = f; + return this; + } + + public TopNQueryBuilder granularity(String g) + { + granularity = QueryGranularity.fromString(g); + return this; + } + + public TopNQueryBuilder granularity(QueryGranularity g) + { + granularity = g; + return this; + } + + public TopNQueryBuilder aggregators(List a) + { + aggregatorSpecs = a; + return this; + } + + public TopNQueryBuilder postAggregators(List p) + { + postAggregatorSpecs = p; + return this; + } + + public TopNQueryBuilder context(Map c) + { + context = c; + return this; + } +} diff --git a/indexing-service/src/main/java/io/druid/indexing/common/config/TaskLogConfig.java b/processing/src/main/java/io/druid/query/topn/TopNQueryConfig.java similarity index 63% rename from indexing-service/src/main/java/io/druid/indexing/common/config/TaskLogConfig.java rename to processing/src/main/java/io/druid/query/topn/TopNQueryConfig.java index 787878e8eb8..32de88f0cb3 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/config/TaskLogConfig.java +++ b/processing/src/main/java/io/druid/query/topn/TopNQueryConfig.java @@ -17,23 +17,23 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ -package io.druid.indexing.common.config; +package io.druid.query.topn; -import org.skife.config.Config; -import org.skife.config.Default; -import org.skife.config.DefaultNull; +import com.fasterxml.jackson.annotation.JsonProperty; +import io.druid.query.QueryConfig; -public abstract class TaskLogConfig +import javax.validation.constraints.Min; + +/** + */ +public class TopNQueryConfig extends QueryConfig { - @Config("druid.indexer.logs.type") - @Default("noop") - public abstract String getLogType(); + @JsonProperty + @Min(1) + private int minTopNThreshold = 1000; - @Config("druid.indexer.logs.s3bucket") - @DefaultNull - public abstract String getLogStorageBucket(); - - @Config("druid.indexer.logs.s3prefix") - @DefaultNull - public abstract String getLogStoragePrefix(); + public int getMinTopNThreshold() + { + return minTopNThreshold; + } } diff --git a/processing/src/main/java/io/druid/query/topn/TopNQueryEngine.java b/processing/src/main/java/io/druid/query/topn/TopNQueryEngine.java new file mode 100644 index 00000000000..09a158b31de --- /dev/null +++ b/processing/src/main/java/io/druid/query/topn/TopNQueryEngine.java @@ -0,0 +1,117 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012, 2013 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package io.druid.query.topn; + +import com.google.common.base.Function; +import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; +import com.metamx.common.guava.FunctionalIterable; +import com.metamx.common.logger.Logger; +import io.druid.collections.StupidPool; +import io.druid.granularity.QueryGranularity; +import io.druid.query.Result; +import io.druid.query.aggregation.AggregatorFactory; +import io.druid.query.filter.Filter; +import io.druid.segment.Capabilities; +import io.druid.segment.Cursor; +import io.druid.segment.StorageAdapter; +import io.druid.segment.filter.Filters; +import org.joda.time.Interval; + +import java.nio.ByteBuffer; +import java.util.List; + +/** + */ +public class TopNQueryEngine +{ + private static final Logger log = new Logger(TopNQueryEngine.class); + + private final StupidPool bufferPool; + + public TopNQueryEngine(StupidPool bufferPool) + { + this.bufferPool = bufferPool; + } + + public Iterable> query(final TopNQuery query, final StorageAdapter adapter) + { + final List queryIntervals = query.getQuerySegmentSpec().getIntervals(); + final Filter filter = Filters.convertDimensionFilters(query.getDimensionsFilter()); + final QueryGranularity granularity = query.getGranularity(); + final Function> mapFn = getMapFn(query, adapter); + + Preconditions.checkArgument( + queryIntervals.size() == 1, "Can only handle a single interval, got[%s]", queryIntervals + ); + + if (mapFn == null) { + return Lists.newArrayList(); + } + + return FunctionalIterable + .create(adapter.makeCursors(filter, queryIntervals.get(0), granularity)) + .transform( + new Function() + { + @Override + public Cursor apply(Cursor input) + { + log.debug("Running over cursor[%s]", adapter.getInterval(), input.getTime()); + return input; + } + } + ) + .keep(mapFn); + } + + private Function> getMapFn(TopNQuery query, final StorageAdapter adapter) + { + if (adapter == null) { + log.warn( + "Null storage adapter found. Probably trying to issue a query against a segment being memory unmapped. Returning empty results." + ); + return null; + } + + final Capabilities capabilities = adapter.getCapabilities(); + final int cardinality = adapter.getDimensionCardinality(query.getDimensionSpec().getDimension()); + int numBytesPerRecord = 0; + for (AggregatorFactory aggregatorFactory : query.getAggregatorSpecs()) { + numBytesPerRecord += aggregatorFactory.getMaxIntermediateSize(); + } + + final TopNAlgorithmSelector selector = new TopNAlgorithmSelector(cardinality, numBytesPerRecord); + query.initTopNAlgorithmSelector(selector); + + TopNAlgorithm topNAlgorithm = null; + if (selector.isHasDimExtractionFn()) { + topNAlgorithm = new DimExtractionTopNAlgorithm(capabilities, query); + } else if (selector.isAggregateAllMetrics()) { + topNAlgorithm = new PooledTopNAlgorithm(capabilities, query, bufferPool); + } else if (selector.isAggregateTopNMetricFirst()) { + topNAlgorithm = new AggregateTopNMetricFirstAlgorithm(capabilities, query, bufferPool); + } else { + topNAlgorithm = new PooledTopNAlgorithm(capabilities, query, bufferPool); + } + + return new TopNMapFn(query, topNAlgorithm); + } +} diff --git a/processing/src/main/java/io/druid/query/topn/TopNQueryQueryToolChest.java b/processing/src/main/java/io/druid/query/topn/TopNQueryQueryToolChest.java new file mode 100644 index 00000000000..6e1d816cbd9 --- /dev/null +++ b/processing/src/main/java/io/druid/query/topn/TopNQueryQueryToolChest.java @@ -0,0 +1,405 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012, 2013 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package io.druid.query.topn; + +import com.fasterxml.jackson.core.type.TypeReference; +import com.google.common.base.Function; +import com.google.common.base.Joiner; +import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.common.collect.Ordering; +import com.google.common.primitives.Ints; +import com.google.inject.Inject; +import com.metamx.common.ISE; +import com.metamx.common.guava.MergeSequence; +import com.metamx.common.guava.Sequence; +import com.metamx.common.guava.Sequences; +import com.metamx.common.guava.nary.BinaryFn; +import com.metamx.emitter.service.ServiceMetricEvent; +import io.druid.collections.OrderedMergeSequence; +import io.druid.granularity.QueryGranularity; +import io.druid.query.CacheStrategy; +import io.druid.query.IntervalChunkingQueryRunner; +import io.druid.query.Query; +import io.druid.query.QueryCacheHelper; +import io.druid.query.QueryRunner; +import io.druid.query.QueryToolChest; +import io.druid.query.Result; +import io.druid.query.ResultGranularTimestampComparator; +import io.druid.query.ResultMergeQueryRunner; +import io.druid.query.aggregation.AggregatorFactory; +import io.druid.query.aggregation.MetricManipulationFn; +import io.druid.query.aggregation.PostAggregator; +import io.druid.query.filter.DimFilter; +import org.joda.time.DateTime; +import org.joda.time.Interval; +import org.joda.time.Minutes; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +/** + */ +public class TopNQueryQueryToolChest extends QueryToolChest, TopNQuery> +{ + private static final byte TOPN_QUERY = 0x1; + + private static final Joiner COMMA_JOIN = Joiner.on(","); + private static final TypeReference> TYPE_REFERENCE = new TypeReference>(){}; + + private static final TypeReference OBJECT_TYPE_REFERENCE = new TypeReference(){}; + private final TopNQueryConfig config; + + @Inject + public TopNQueryQueryToolChest( + TopNQueryConfig config + ) + { + this.config = config; + } + + @Override + public QueryRunner> mergeResults(QueryRunner> runner) + { + return new ResultMergeQueryRunner>(runner) + { + @Override + protected Ordering> makeOrdering(Query> query) + { + return Ordering.from( + new ResultGranularTimestampComparator( + ((TopNQuery) query).getGranularity() + ) + ); + } + + @Override + protected BinaryFn, Result, Result> createMergeFn( + Query> input + ) + { + TopNQuery query = (TopNQuery) input; + return new TopNBinaryFn( + TopNResultMerger.identity, + query.getGranularity(), + query.getDimensionSpec(), + query.getTopNMetricSpec(), + query.getThreshold(), + query.getAggregatorSpecs(), + query.getPostAggregatorSpecs() + ); + } + }; + } + + @Override + public Sequence> mergeSequences(Sequence>> seqOfSequences) + { + return new OrderedMergeSequence>(getOrdering(), seqOfSequences); + } + + @Override + public ServiceMetricEvent.Builder makeMetricBuilder(TopNQuery query) + { + int numMinutes = 0; + for (Interval interval : query.getIntervals()) { + numMinutes += Minutes.minutesIn(interval).getMinutes(); + } + + return new ServiceMetricEvent.Builder() + .setUser2(query.getDataSource()) + .setUser4(String.format("topN/%s/%s", query.getThreshold(), query.getDimensionSpec().getDimension())) + .setUser5(COMMA_JOIN.join(query.getIntervals())) + .setUser6(String.valueOf(query.hasFilters())) + .setUser7(String.format("%,d aggs", query.getAggregatorSpecs().size())) + .setUser9(Minutes.minutes(numMinutes).toString()); + } + + @Override + public Function, Result> makeMetricManipulatorFn( + final TopNQuery query, final MetricManipulationFn fn + ) + { + return new Function, Result>() + { + private String dimension = query.getDimensionSpec().getOutputName(); + + @Override + public Result apply(@Nullable Result result) + { + List> serializedValues = Lists.newArrayList( + Iterables.transform( + result.getValue(), + new Function>() + { + @Override + public Map apply(@Nullable DimensionAndMetricValueExtractor input) + { + final Map values = Maps.newHashMap(); + for (AggregatorFactory agg : query.getAggregatorSpecs()) { + values.put(agg.getName(), fn.manipulate(agg, input.getMetric(agg.getName()))); + } + for (PostAggregator postAgg : query.getPostAggregatorSpecs()) { + values.put(postAgg.getName(), input.getMetric(postAgg.getName())); + } + values.put(dimension, input.getDimensionValue(dimension)); + + return values; + } + } + ) + ); + + return new Result( + result.getTimestamp(), + new TopNResultValue(serializedValues) + ); + } + }; + } + + @Override + public TypeReference> getResultTypeReference() + { + return TYPE_REFERENCE; + } + + @Override + public CacheStrategy, Object, TopNQuery> getCacheStrategy(final TopNQuery query) + { + return new CacheStrategy, Object, TopNQuery>() + { + private final List aggs = query.getAggregatorSpecs(); + private final List postAggs = query.getPostAggregatorSpecs(); + + @Override + public byte[] computeCacheKey(TopNQuery query) + { + final byte[] dimensionSpecBytes = query.getDimensionSpec().getCacheKey(); + final byte[] metricSpecBytes = query.getTopNMetricSpec().getCacheKey(); + + final DimFilter dimFilter = query.getDimensionsFilter(); + final byte[] filterBytes = dimFilter == null ? new byte[]{} : dimFilter.getCacheKey(); + final byte[] aggregatorBytes = QueryCacheHelper.computeAggregatorBytes(query.getAggregatorSpecs()); + final byte[] granularityBytes = query.getGranularity().cacheKey(); + + return ByteBuffer + .allocate( + 1 + dimensionSpecBytes.length + metricSpecBytes.length + 4 + + granularityBytes.length + filterBytes.length + aggregatorBytes.length + ) + .put(TOPN_QUERY) + .put(dimensionSpecBytes) + .put(metricSpecBytes) + .put(Ints.toByteArray(query.getThreshold())) + .put(granularityBytes) + .put(filterBytes) + .put(aggregatorBytes) + .array(); + } + + @Override + public TypeReference getCacheObjectClazz() + { + return OBJECT_TYPE_REFERENCE; + } + + @Override + public Function, Object> prepareForCache() + { + return new Function, Object>() + { + @Override + public Object apply(@Nullable final Result input) + { + List results = Lists.newArrayList(input.getValue()); + final List retVal = Lists.newArrayListWithCapacity(results.size() + 1); + + // make sure to preserve timezone information when caching results + retVal.add(input.getTimestamp().getMillis()); + for (DimensionAndMetricValueExtractor result : results) { + List vals = Lists.newArrayListWithCapacity(aggs.size() + 2); + vals.add(result.getStringDimensionValue(query.getDimensionSpec().getOutputName())); + for (AggregatorFactory agg : aggs) { + vals.add(result.getMetric(agg.getName())); + } + retVal.add(vals); + } + return retVal; + } + }; + } + + @Override + public Function> pullFromCache() + { + return new Function>() + { + private final QueryGranularity granularity = query.getGranularity(); + + @Override + public Result apply(@Nullable Object input) + { + List results = (List) input; + List> retVal = Lists.newArrayListWithCapacity(results.size()); + + Iterator inputIter = results.iterator(); + DateTime timestamp = granularity.toDateTime(new DateTime(inputIter.next()).getMillis()); + + while (inputIter.hasNext()) { + List result = (List) inputIter.next(); + Map vals = Maps.newLinkedHashMap(); + + Iterator aggIter = aggs.iterator(); + Iterator resultIter = result.iterator(); + + vals.put(query.getDimensionSpec().getOutputName(), resultIter.next()); + + while (aggIter.hasNext() && resultIter.hasNext()) { + final AggregatorFactory factory = aggIter.next(); + vals.put(factory.getName(), factory.deserialize(resultIter.next())); + } + + for (PostAggregator postAgg : postAggs) { + vals.put(postAgg.getName(), postAgg.compute(vals)); + } + + retVal.add(vals); + } + + return new Result(timestamp, new TopNResultValue(retVal)); + } + }; + } + + @Override + public Sequence> mergeSequences(Sequence>> seqOfSequences) + { + return new MergeSequence>(getOrdering(), seqOfSequences); + } + }; + } + + @Override + public QueryRunner> preMergeQueryDecoration(QueryRunner> runner) + { + return new IntervalChunkingQueryRunner>(runner, config.getChunkPeriod()); + } + + @Override + public QueryRunner> postMergeQueryDecoration(final QueryRunner> runner) + { + return new ThresholdAdjustingQueryRunner(runner, config.getMinTopNThreshold()); + } + + private static class ThresholdAdjustingQueryRunner implements QueryRunner> + { + private final QueryRunner> runner; + private final int minTopNThreshold; + + public ThresholdAdjustingQueryRunner( + QueryRunner> runner, + int minTopNThreshold + ) + { + this.runner = runner; + this.minTopNThreshold = minTopNThreshold; + } + + @Override + public Sequence> run(Query> input) + { + if (!(input instanceof TopNQuery)) { + throw new ISE("Can only handle [%s], got [%s]", TopNQuery.class, input.getClass()); + } + + final TopNQuery query = (TopNQuery) input; + if (query.getThreshold() > minTopNThreshold) { + return runner.run(query); + } + + final boolean isBySegment = Boolean.parseBoolean(query.getContextValue("bySegment", "false")); + + return Sequences.map( + runner.run(query.withThreshold(minTopNThreshold)), + new Function, Result>() + { + @Override + public Result apply(Result input) + { + if (isBySegment) { + BySegmentTopNResultValue value = (BySegmentTopNResultValue) input.getValue(); + + return new Result( + input.getTimestamp(), + new BySegmentTopNResultValue( + Lists.transform( + value.getResults(), + new Function, Result>() + { + @Override + public Result apply(@Nullable Result input) + { + return new Result( + input.getTimestamp(), + new TopNResultValue( + Lists.newArrayList( + Iterables.limit( + input.getValue(), + query.getThreshold() + ) + ) + ) + ); + } + } + ), + value.getSegmentId(), + value.getIntervalString() + ) + ); + } + + return new Result( + input.getTimestamp(), + new TopNResultValue( + Lists.newArrayList( + Iterables.limit( + input.getValue(), + query.getThreshold() + ) + ) + ) + ); + } + } + ); + } + } + + public Ordering> getOrdering() + { + return Ordering.natural(); + } +} diff --git a/processing/src/main/java/io/druid/query/topn/TopNQueryRunnerFactory.java b/processing/src/main/java/io/druid/query/topn/TopNQueryRunnerFactory.java new file mode 100644 index 00000000000..d342d764d50 --- /dev/null +++ b/processing/src/main/java/io/druid/query/topn/TopNQueryRunnerFactory.java @@ -0,0 +1,108 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012, 2013 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package io.druid.query.topn; + +import com.google.inject.Inject; +import com.metamx.common.ISE; +import com.metamx.common.guava.BaseSequence; +import com.metamx.common.guava.Sequence; +import io.druid.collections.StupidPool; +import io.druid.guice.annotations.Global; +import io.druid.query.ChainedExecutionQueryRunner; +import io.druid.query.Query; +import io.druid.query.QueryRunner; +import io.druid.query.QueryRunnerFactory; +import io.druid.query.QueryToolChest; +import io.druid.query.Result; +import io.druid.segment.Segment; + +import java.nio.ByteBuffer; +import java.util.Iterator; +import java.util.concurrent.ExecutorService; + +/** + */ +public class TopNQueryRunnerFactory implements QueryRunnerFactory, TopNQuery> +{ + private final StupidPool computationBufferPool; + private final TopNQueryQueryToolChest toolchest; + + @Inject + public TopNQueryRunnerFactory( + @Global StupidPool computationBufferPool, + TopNQueryQueryToolChest toolchest + ) + { + this.computationBufferPool = computationBufferPool; + this.toolchest = toolchest; + } + + @Override + public QueryRunner> createRunner(final Segment segment) + { + final TopNQueryEngine queryEngine = new TopNQueryEngine(computationBufferPool); + return new QueryRunner>() + { + @Override + public Sequence> run(Query> input) + { + if (!(input instanceof TopNQuery)) { + throw new ISE("Got a [%s] which isn't a %s", input.getClass(), TopNQuery.class); + } + + final TopNQuery legacyQuery = (TopNQuery) input; + + return new BaseSequence, Iterator>>( + new BaseSequence.IteratorMaker, Iterator>>() + { + @Override + public Iterator> make() + { + return queryEngine.query(legacyQuery, segment.asStorageAdapter()).iterator(); + } + + @Override + public void cleanup(Iterator> toClean) + { + + } + } + ); + } + }; + + } + + @Override + public QueryRunner> mergeRunners( + ExecutorService queryExecutor, Iterable>> queryRunners + ) + { + return new ChainedExecutionQueryRunner>( + queryExecutor, toolchest.getOrdering(), queryRunners + ); + } + + @Override + public QueryToolChest, TopNQuery> getToolchest() + { + return toolchest; + } +} diff --git a/processing/src/main/java/io/druid/query/topn/TopNResultBuilder.java b/processing/src/main/java/io/druid/query/topn/TopNResultBuilder.java new file mode 100644 index 00000000000..5823ee3eece --- /dev/null +++ b/processing/src/main/java/io/druid/query/topn/TopNResultBuilder.java @@ -0,0 +1,48 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012, 2013 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package io.druid.query.topn; + +import io.druid.query.Result; +import io.druid.query.aggregation.AggregatorFactory; +import io.druid.query.aggregation.PostAggregator; + +import java.util.Iterator; +import java.util.List; + +/** + */ +public interface TopNResultBuilder +{ + public TopNResultBuilder addEntry( + String dimName, + Object dimValIndex, + Object[] metricVals, + List aggFactories, + List postAggs + ); + + public TopNResultBuilder addEntry( + DimensionAndMetricValueExtractor dimensionAndMetricValueExtractor + ); + + public Iterator getTopNIterator(); + + public Result build(); +} diff --git a/processing/src/main/java/io/druid/query/topn/TopNResultMerger.java b/processing/src/main/java/io/druid/query/topn/TopNResultMerger.java new file mode 100644 index 00000000000..f4ff8ca9b06 --- /dev/null +++ b/processing/src/main/java/io/druid/query/topn/TopNResultMerger.java @@ -0,0 +1,40 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012, 2013 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package io.druid.query.topn; + +import io.druid.query.Result; + +import java.util.Comparator; + +/** + */ +public interface TopNResultMerger +{ + public static TopNResultMerger identity = new TopNResultMerger() + { + @Override + public Result getResult(Result result, Comparator comparator) + { + return result; + } + }; + + public Result getResult(Result result, Comparator comparator); +} diff --git a/processing/src/main/java/io/druid/query/topn/TopNResultValue.java b/processing/src/main/java/io/druid/query/topn/TopNResultValue.java new file mode 100644 index 00000000000..b65bb1f815a --- /dev/null +++ b/processing/src/main/java/io/druid/query/topn/TopNResultValue.java @@ -0,0 +1,107 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012, 2013 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package io.druid.query.topn; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonValue; +import com.google.common.base.Function; +import com.google.common.collect.Lists; +import com.metamx.common.IAE; + +import javax.annotation.Nullable; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +/** + */ +public class TopNResultValue implements Iterable +{ + private final List value; + + @JsonCreator + public TopNResultValue( + List value + ) + { + this.value = (value == null) ? Lists.newArrayList() : Lists.transform( + value, + new Function() + { + @Override + public DimensionAndMetricValueExtractor apply(@Nullable Object input) + { + if (input instanceof Map) { + return new DimensionAndMetricValueExtractor((Map) input); + } else if (input instanceof DimensionAndMetricValueExtractor) { + return (DimensionAndMetricValueExtractor) input; + } else { + throw new IAE("Unknown type for input[%s]", input.getClass()); + } + } + } + ); + } + + @JsonValue + public List getValue() + { + return value; + } + + @Override + public Iterator iterator() + { + return value.iterator(); + } + + @Override + public String toString() + { + return "TopNResultValue{" + + "value=" + value + + '}'; + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + + TopNResultValue that = (TopNResultValue) o; + + if (value != null ? !value.equals(that.value) : that.value != null) { + return false; + } + + return true; + } + + @Override + public int hashCode() + { + return value != null ? value.hashCode() : 0; + } +} diff --git a/processing/src/test/java/io/druid/query/TestQueryRunners.java b/processing/src/test/java/io/druid/query/TestQueryRunners.java new file mode 100644 index 00000000000..c4767c1c6f9 --- /dev/null +++ b/processing/src/test/java/io/druid/query/TestQueryRunners.java @@ -0,0 +1,83 @@ +package io.druid.query; + +import com.google.common.base.Supplier; +import io.druid.collections.StupidPool; +import io.druid.query.search.SearchQueryQueryToolChest; +import io.druid.query.search.SearchQueryRunnerFactory; +import io.druid.query.search.search.SearchQueryConfig; +import io.druid.query.timeboundary.TimeBoundaryQueryRunnerFactory; +import io.druid.query.timeseries.TimeseriesQueryRunnerFactory; +import io.druid.query.topn.TopNQueryConfig; +import io.druid.query.topn.TopNQueryQueryToolChest; +import io.druid.query.topn.TopNQueryRunnerFactory; +import io.druid.segment.Segment; + +import java.nio.ByteBuffer; + +/** + */ +public class TestQueryRunners +{ + public static final StupidPool pool = new StupidPool( + new Supplier() + { + @Override + public ByteBuffer get() + { + return ByteBuffer.allocate(1024 * 10); + } + } + ); + + public static final TopNQueryConfig topNConfig = new TopNQueryConfig(); + + public static StupidPool getPool() + { + return pool; + } + + public static QueryRunner makeTopNQueryRunner( + Segment adapter + ) + { + QueryRunnerFactory factory = new TopNQueryRunnerFactory(pool, new TopNQueryQueryToolChest(topNConfig)); + return new FinalizeResultsQueryRunner( + factory.createRunner(adapter), + factory.getToolchest() + ); + } + + public static QueryRunner makeTimeSeriesQueryRunner( + Segment adapter + ) + { + QueryRunnerFactory factory = TimeseriesQueryRunnerFactory.create(); + return new FinalizeResultsQueryRunner( + factory.createRunner(adapter), + factory.getToolchest() + ); + } + + public static QueryRunner makeSearchQueryRunner( + Segment adapter + ) + { + QueryRunnerFactory factory = new SearchQueryRunnerFactory(new SearchQueryQueryToolChest(new SearchQueryConfig())); + return new FinalizeResultsQueryRunner( + factory.createRunner(adapter), + factory.getToolchest() + ); + } + + public static QueryRunner makeTimeBoundaryQueryRunner( + Segment adapter + ) + { + QueryRunnerFactory factory = new TimeBoundaryQueryRunnerFactory(); + return new FinalizeResultsQueryRunner( + factory.createRunner(adapter), + factory.getToolchest() + ); + } + +} \ No newline at end of file diff --git a/processing/src/test/java/io/druid/query/aggregation/JavaScriptAggregatorTest.java b/processing/src/test/java/io/druid/query/aggregation/JavaScriptAggregatorTest.java index 2435211dfe9..7f087559339 100644 --- a/processing/src/test/java/io/druid/query/aggregation/JavaScriptAggregatorTest.java +++ b/processing/src/test/java/io/druid/query/aggregation/JavaScriptAggregatorTest.java @@ -27,6 +27,7 @@ import org.junit.Test; import java.nio.ByteBuffer; import java.util.Arrays; +import java.util.Collections; import java.util.Map; public class JavaScriptAggregatorTest @@ -141,6 +142,39 @@ public class JavaScriptAggregatorTest Assert.assertEquals(val, agg.get(buf, position)); } + @Test + public void testAggregateMissingColumn() + { + Map script = scriptDoubleSum; + + JavaScriptAggregator agg = new JavaScriptAggregator( + "billy", + Collections.singletonList(null), + JavaScriptAggregatorFactory.compileScript(script.get("fnAggregate"), + script.get("fnReset"), + script.get("fnCombine")) + ); + + final double val = 0; + + Assert.assertEquals("billy", agg.getName()); + + agg.reset(); + Assert.assertEquals(val, agg.get()); + Assert.assertEquals(val, agg.get()); + Assert.assertEquals(val, agg.get()); + + agg.aggregate(); + Assert.assertEquals(val, agg.get()); + Assert.assertEquals(val, agg.get()); + Assert.assertEquals(val, agg.get()); + + agg.aggregate(); + Assert.assertEquals(val, agg.get()); + Assert.assertEquals(val, agg.get()); + Assert.assertEquals(val, agg.get()); + } + public static void main(String... args) throws Exception { final LoopingFloatColumnSelector selector = new LoopingFloatColumnSelector(new float[]{42.12f, 9f}); diff --git a/processing/src/test/java/io/druid/query/extraction/extraction/JavascriptDimExtractionFnTest.java b/processing/src/test/java/io/druid/query/extraction/extraction/JavascriptDimExtractionFnTest.java index cc5a1b26b4e..22e403588fd 100644 --- a/processing/src/test/java/io/druid/query/extraction/extraction/JavascriptDimExtractionFnTest.java +++ b/processing/src/test/java/io/druid/query/extraction/extraction/JavascriptDimExtractionFnTest.java @@ -20,6 +20,7 @@ package io.druid.query.extraction.extraction; import com.google.common.collect.Iterators; +import com.google.common.collect.Lists; import io.druid.query.extraction.DimExtractionFn; import io.druid.query.extraction.JavascriptDimExtractionFn; import org.junit.Assert; @@ -52,6 +53,21 @@ public class JavascriptDimExtractionFnTest } } + @Test + public void testCastingAndNull() + { + String function = "function(x) {\n x = Number(x);\n if(isNaN(x)) return null;\n return Math.floor(x / 5) * 5;\n}"; + DimExtractionFn dimExtractionFn = new JavascriptDimExtractionFn(function); + + Iterator it = Iterators.forArray("0", "5", "5", "10", null); + + for(String str : Lists.newArrayList("1", "5", "6", "10", "CA")) { + String res = dimExtractionFn.apply(str); + String expected = it.next(); + Assert.assertEquals(expected, res); + } + } + @Test public void testJavascriptRegex() { diff --git a/processing/src/test/java/io/druid/query/topn/TopNBinaryFnTest.java b/processing/src/test/java/io/druid/query/topn/TopNBinaryFnTest.java new file mode 100644 index 00000000000..2ce63a693e2 --- /dev/null +++ b/processing/src/test/java/io/druid/query/topn/TopNBinaryFnTest.java @@ -0,0 +1,458 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012, 2013 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package io.druid.query.topn; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; +import io.druid.granularity.QueryGranularity; +import io.druid.query.Result; +import io.druid.query.aggregation.AggregatorFactory; +import io.druid.query.aggregation.CountAggregatorFactory; +import io.druid.query.aggregation.LongSumAggregatorFactory; +import io.druid.query.aggregation.PostAggregator; +import io.druid.query.aggregation.post.ArithmeticPostAggregator; +import io.druid.query.aggregation.post.ConstantPostAggregator; +import io.druid.query.aggregation.post.FieldAccessPostAggregator; +import io.druid.query.dimension.DefaultDimensionSpec; +import junit.framework.Assert; +import org.joda.time.DateTime; +import org.junit.Test; + +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +/** + */ +public class TopNBinaryFnTest +{ + final CountAggregatorFactory rowsCount = new CountAggregatorFactory("rows"); + final LongSumAggregatorFactory indexLongSum = new LongSumAggregatorFactory("index", "index"); + final ConstantPostAggregator constant = new ConstantPostAggregator("const", 1L); + final FieldAccessPostAggregator rowsPostAgg = new FieldAccessPostAggregator("rows", "rows"); + final FieldAccessPostAggregator indexPostAgg = new FieldAccessPostAggregator("index", "index"); + final ArithmeticPostAggregator addrowsindexconstant = new ArithmeticPostAggregator( + "addrowsindexconstant", + "+", + Lists.newArrayList(constant, rowsPostAgg, indexPostAgg) + ); + final List aggregatorFactories = Arrays.asList( + rowsCount, + indexLongSum + ); + final List postAggregators = Arrays.asList( + addrowsindexconstant + ); + private final DateTime currTime = new DateTime(); + + private void assertTopNMergeResult(Object o1, Object o2) + { + Iterator i1 = ((Iterable) o1).iterator(); + Iterator i2 = ((Iterable) o2).iterator(); + while (i1.hasNext() && i2.hasNext()) { + Assert.assertEquals(i1.next(), i2.next()); + } + Assert.assertTrue(!i1.hasNext() && !i2.hasNext()); + } + + @Test + public void testMerge() + { + Result result1 = new Result( + currTime, + new TopNResultValue( + ImmutableList.>of( + ImmutableMap.of( + "rows", 1L, + "index", 2L, + "testdim", "1" + ), + ImmutableMap.of( + "rows", 2L, + "index", 4L, + "testdim", "2" + ), + ImmutableMap.of( + "rows", 0L, + "index", 2L, + "testdim", "3" + ) + ) + ) + ); + Result result2 = new Result( + currTime, + new TopNResultValue( + ImmutableList.>of( + ImmutableMap.of( + "rows", 2L, + "index", 3L, + "testdim", "1" + ), + ImmutableMap.of( + "rows", 2L, + "index", 0L, + "testdim", "2" + ), + ImmutableMap.of( + "rows", 0L, + "index", 1L, + "testdim", "3" + ) + ) + ) + ); + + Result expected = new Result( + currTime, + new TopNResultValue( + ImmutableList.>of( + ImmutableMap.of( + "testdim", "1", + "rows", 3L, + "index", 5L, + "addrowsindexconstant", 9.0 + ), + + ImmutableMap.of( + "testdim", "2", + "rows", 4L, + "index", 4L, + "addrowsindexconstant", 9.0 + ) + ) + ) + ); + + Result actual = new TopNBinaryFn( + TopNResultMerger.identity, + QueryGranularity.ALL, + new DefaultDimensionSpec("testdim", null), + new NumericTopNMetricSpec("index"), + 2, + aggregatorFactories, + postAggregators + ).apply( + result1, + result2 + ); + Assert.assertEquals(expected.getTimestamp(), actual.getTimestamp()); + assertTopNMergeResult(expected.getValue(), actual.getValue()); + } + + @Test + public void testMergeDay() + { + Result result1 = new Result( + currTime, + new TopNResultValue( + ImmutableList.>of( + ImmutableMap.of( + "rows", 1L, + "index", 2L, + "testdim", "1" + ), + ImmutableMap.of( + "rows", 2L, + "index", 4L, + "testdim", "2" + ), + ImmutableMap.of( + "rows", 0L, + "index", 2L, + "testdim", "3" + ) + ) + ) + ); + Result result2 = new Result( + currTime, + new TopNResultValue( + ImmutableList.>of( + ImmutableMap.of( + "rows", 2L, + "index", 3L, + "testdim", "1" + ), + ImmutableMap.of( + "rows", 2L, + "index", 0L, + "testdim", "2" + ), + ImmutableMap.of( + "rows", 0L, + "index", 1L, + "testdim", "3" + ) + ) + ) + ); + + Result expected = new Result( + new DateTime(QueryGranularity.DAY.truncate(currTime.getMillis())), + new TopNResultValue( + ImmutableList.>of( + ImmutableMap.of( + "testdim", "1", + "rows", 3L, + "index", 5L, + "addrowsindexconstant", 9.0 + ), + ImmutableMap.of( + "testdim", "2", + "rows", 4L, + "index", 4L, + "addrowsindexconstant", 9.0 + ) + ) + ) + ); + + Result actual = new TopNBinaryFn( + TopNResultMerger.identity, + QueryGranularity.DAY, + new DefaultDimensionSpec("testdim", null), + new NumericTopNMetricSpec("index"), + 2, + aggregatorFactories, + postAggregators + ).apply( + result1, + result2 + ); + Assert.assertEquals(expected.getTimestamp(), actual.getTimestamp()); + assertTopNMergeResult(expected.getValue(), actual.getValue()); + } + + @Test + public void testMergeOneResultNull() + { + Result result1 = new Result( + currTime, + new TopNResultValue( + ImmutableList.>of( + ImmutableMap.of( + "rows", 1L, + "index", 2L, + "testdim", "1" + ), + ImmutableMap.of( + "rows", 2L, + "index", 4L, + "testdim", "2" + ), + ImmutableMap.of( + "rows", 0L, + "index", 2L, + "testdim", "3" + ) + ) + ) + ); + Result result2 = null; + + Result expected = result1; + + Result actual = new TopNBinaryFn( + TopNResultMerger.identity, + QueryGranularity.ALL, + new DefaultDimensionSpec("testdim", null), + new NumericTopNMetricSpec("index"), + 2, + aggregatorFactories, + postAggregators + ).apply( + result1, + result2 + ); + Assert.assertEquals(expected.getTimestamp(), actual.getTimestamp()); + assertTopNMergeResult(expected.getValue(), actual.getValue()); + } + + @Test + public void testMergeByPostAgg() + { + Result result1 = new Result( + currTime, + new TopNResultValue( + ImmutableList.>of( + ImmutableMap.of( + "rows", 1L, + "index", 2L, + "testdim", "1" + ), + ImmutableMap.of( + "rows", 2L, + "index", 4L, + "testdim", "2" + ), + ImmutableMap.of( + "rows", 0L, + "index", 2L, + "testdim", "3" + ) + ) + ) + ); + Result result2 = new Result( + currTime, + new TopNResultValue( + ImmutableList.>of( + ImmutableMap.of( + "rows", 2L, + "index", 3L, + "testdim", "1" + ), + ImmutableMap.of( + "rows", 2L, + "index", 0L, + "testdim", "2" + ), + ImmutableMap.of( + "rows", 0L, + "index", 1L, + "testdim", "3" + ) + ) + ) + ); + + Result expected = new Result( + currTime, + new TopNResultValue( + ImmutableList.>of( + ImmutableMap.of( + "testdim", "1", + "rows", 3L, + "index", 5L, + "addrowsindexconstant", 9.0 + ), + ImmutableMap.of( + "testdim", "2", + "rows", 4L, + "index", 4L, + "addrowsindexconstant", 9.0 + ) + ) + ) + ); + + Result actual = new TopNBinaryFn( + TopNResultMerger.identity, + QueryGranularity.ALL, + new DefaultDimensionSpec("testdim", null), + new NumericTopNMetricSpec("addrowsindexconstant"), + 2, + aggregatorFactories, + postAggregators + ).apply( + result1, + result2 + ); + Assert.assertEquals(expected.getTimestamp(), actual.getTimestamp()); + assertTopNMergeResult(expected.getValue(), actual.getValue()); + } + + @Test + public void testMergeShiftedTimestamp() + { + Result result1 = new Result( + currTime, + new TopNResultValue( + ImmutableList.>of( + ImmutableMap.of( + "rows", 1L, + "index", 2L, + "testdim", "1" + ), + ImmutableMap.of( + "rows", 2L, + "index", 4L, + "testdim", "2" + ), + ImmutableMap.of( + "rows", 0L, + "index", 2L, + "testdim", "3" + ) + ) + ) + ); + Result result2 = new Result( + currTime.plusHours(2), + new TopNResultValue( + ImmutableList.>of( + ImmutableMap.of( + "rows", 2L, + "index", 3L, + "testdim", "1" + ), + ImmutableMap.of( + "rows", 2L, + "index", 0L, + "testdim", "2" + ), + ImmutableMap.of( + "rows", 0L, + "index", 1L, + "testdim", "3" + ) + ) + ) + ); + + Result expected = new Result( + currTime, + new TopNResultValue( + ImmutableList.>of( + ImmutableMap.of( + "testdim", "1", + "rows", 3L, + "index", 5L, + "addrowsindexconstant", 9.0 + ), + ImmutableMap.of( + "testdim", "2", + "rows", 4L, + "index", 4L, + "addrowsindexconstant", 9.0 + ) + ) + ) + ); + + Result actual = new TopNBinaryFn( + TopNResultMerger.identity, + QueryGranularity.ALL, + new DefaultDimensionSpec("testdim", null), + new NumericTopNMetricSpec("index"), + 2, + aggregatorFactories, + postAggregators + ).apply( + result1, + result2 + ); + Assert.assertEquals(expected.getTimestamp(), actual.getTimestamp()); + assertTopNMergeResult(expected.getValue(), actual.getValue()); + } +} diff --git a/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java b/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java new file mode 100644 index 00000000000..839f82c3cbc --- /dev/null +++ b/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java @@ -0,0 +1,1077 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012, 2013 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package io.druid.query.topn; + +import com.google.common.base.Supplier; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; +import com.metamx.common.guava.Sequences; +import io.druid.collections.StupidPool; +import io.druid.granularity.QueryGranularity; +import io.druid.query.Druids; +import io.druid.query.QueryRunner; +import io.druid.query.Result; +import io.druid.query.TestQueryRunners; +import io.druid.query.aggregation.AggregatorFactory; +import io.druid.query.aggregation.CountAggregatorFactory; +import io.druid.query.aggregation.DoubleSumAggregatorFactory; +import io.druid.query.aggregation.LongSumAggregatorFactory; +import io.druid.query.aggregation.MaxAggregatorFactory; +import io.druid.query.aggregation.MinAggregatorFactory; +import io.druid.query.aggregation.PostAggregator; +import io.druid.query.aggregation.post.ArithmeticPostAggregator; +import io.druid.query.aggregation.post.ConstantPostAggregator; +import io.druid.query.aggregation.post.FieldAccessPostAggregator; +import io.druid.query.dimension.ExtractionDimensionSpec; +import io.druid.query.extraction.RegexDimExtractionFn; +import io.druid.query.filter.AndDimFilter; +import io.druid.query.filter.DimFilter; +import io.druid.query.spec.MultipleIntervalSegmentSpec; +import io.druid.query.spec.QuerySegmentSpec; +import io.druid.segment.TestHelper; +import org.joda.time.DateTime; +import org.joda.time.Interval; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.Map; + +/** + */ +@RunWith(Parameterized.class) +public class TopNQueryRunnerTest +{ + @Parameterized.Parameters + public static Collection constructorFeeder() throws IOException + { + List retVal = Lists.newArrayList(); + retVal.addAll( + TopNQueryRunnerTestHelper.makeQueryRunners( + new TopNQueryRunnerFactory( + TestQueryRunners.getPool(), + new TopNQueryQueryToolChest(new TopNQueryConfig()) + ) + ) + ); + retVal.addAll( + TopNQueryRunnerTestHelper.makeQueryRunners( + new TopNQueryRunnerFactory( + new StupidPool( + new Supplier() + { + @Override + public ByteBuffer get() + { + return ByteBuffer.allocate(2000); + } + } + ), + new TopNQueryQueryToolChest(new TopNQueryConfig()) + ) + ) + ); + + return retVal; + } + + private final QueryRunner runner; + + public TopNQueryRunnerTest( + QueryRunner runner + ) + { + this.runner = runner; + } + + final String dataSource = "testing"; + final QueryGranularity gran = QueryGranularity.DAY; + final QueryGranularity allGran = QueryGranularity.ALL; + final String providerDimension = "provider"; + final String qualityDimension = "quality"; + final String placementishDimension = "placementish"; + final String indexMetric = "index"; + final String addRowsIndexConstantMetric = "addRowsIndexConstant"; + final CountAggregatorFactory rowsCount = new CountAggregatorFactory("rows"); + final LongSumAggregatorFactory indexLongSum = new LongSumAggregatorFactory("index", "index"); + final DoubleSumAggregatorFactory indexDoubleSum = new DoubleSumAggregatorFactory("index", "index"); + final ConstantPostAggregator constant = new ConstantPostAggregator("const", 1L); + final FieldAccessPostAggregator rowsPostAgg = new FieldAccessPostAggregator("rows", "rows"); + final FieldAccessPostAggregator indexPostAgg = new FieldAccessPostAggregator("index", "index"); + final ArithmeticPostAggregator addRowsIndexConstant = + new ArithmeticPostAggregator( + "addRowsIndexConstant", "+", Lists.newArrayList(constant, rowsPostAgg, indexPostAgg) + ); + final List commonAggregators = Arrays.asList(rowsCount, indexDoubleSum); + + + final String[] expectedFullOnIndexValues = new String[]{ + "4500.0", "6077.949111938477", "4922.488838195801", "5726.140853881836", "4698.468170166016", + "4651.030891418457", "4398.145851135254", "4596.068244934082", "4434.630561828613", "0.0", + "6162.801361083984", "5590.292701721191", "4994.298484802246", "5179.679672241211", "6288.556800842285", + "6025.663551330566", "5772.855537414551", "5346.517524719238", "5497.331253051758", "5909.684387207031", + "5862.711364746094", "5958.373008728027", "5224.882194519043", "5456.789611816406", "5456.095397949219", + "4642.481948852539", "5023.572692871094", "5155.821723937988", "5350.3723220825195", "5236.997489929199", + "4910.097717285156", "4507.608840942383", "4659.80500793457", "5354.878845214844", "4945.796455383301", + "6459.080368041992", "4390.493583679199", "6545.758262634277", "6922.801231384277", "6023.452911376953", + "6812.107475280762", "6368.713348388672", "6381.748748779297", "5631.245086669922", "4976.192253112793", + "6541.463027954102", "5983.8513107299805", "5967.189498901367", "5567.139289855957", "4863.5944747924805", + "4681.164360046387", "6122.321441650391", "5410.308860778809", "4846.676376342773", "5333.872688293457", + "5013.053741455078", "4836.85563659668", "5264.486434936523", "4581.821243286133", "4680.233596801758", + "4771.363662719727", "5038.354717254639", "4816.808464050293", "4684.095504760742", "5023.663467407227", + "5889.72257232666", "4984.973915100098", "5664.220512390137", "5572.653915405273", "5537.123138427734", + "5980.422874450684", "6243.834693908691", "5372.147285461426", "5690.728981018066", "5827.796455383301", + "6141.0769119262695", "6082.3237228393555", "5678.771339416504", "6814.467971801758", "6626.151596069336", + "5833.2095947265625", "4679.222328186035", "5367.9403076171875", "5410.445640563965", "5689.197135925293", + "5240.5018310546875", "4790.912239074707", "4992.670921325684", "4796.888023376465", "5479.439590454102", + "5506.567192077637", "4743.144546508789", "4913.282669067383", "4723.869743347168" + }; + + final DateTime skippedDay = new DateTime("2011-01-21T00:00:00.000Z"); + + final QuerySegmentSpec firstToThird = new MultipleIntervalSegmentSpec( + Arrays.asList(new Interval("2011-04-01T00:00:00.000Z/2011-04-03T00:00:00.000Z")) + ); + final QuerySegmentSpec fullOnInterval = new MultipleIntervalSegmentSpec( + Arrays.asList(new Interval("1970-01-01T00:00:00.000Z/2020-01-01T00:00:00.000Z")) + ); + + + @Test + public void testFullOnTopN() + { + TopNQuery query = new TopNQueryBuilder() + .dataSource(dataSource) + .granularity(allGran) + .dimension(providerDimension) + .metric(indexMetric) + .threshold(4) + .intervals(fullOnInterval) + .aggregators( + Lists.newArrayList( + Iterables.concat( + commonAggregators, + Lists.newArrayList( + new MaxAggregatorFactory("maxIndex", "index"), + new MinAggregatorFactory("minIndex", "index") + ) + ) + ) + ) + .postAggregators(Arrays.asList(addRowsIndexConstant)) + .build(); + + List> expectedResults = Arrays.asList( + new Result( + new DateTime("2011-01-12T00:00:00.000Z"), + new TopNResultValue( + Arrays.>asList( + ImmutableMap.builder() + .put("provider", "total_market") + .put("rows", 186L) + .put("index", 215679.82879638672D) + .put("addRowsIndexConstant", 215866.82879638672D) + .put("maxIndex", 1743.9217529296875D) + .put("minIndex", 792.3260498046875D) + .build(), + ImmutableMap.builder() + .put("provider", "upfront") + .put("rows", 186L) + .put("index", 192046.1060180664D) + .put("addRowsIndexConstant", 192233.1060180664D) + .put("maxIndex", 1870.06103515625D) + .put("minIndex", 545.9906005859375D) + .build(), + ImmutableMap.builder() + .put("provider", "spot") + .put("rows", 837L) + .put("index", 95606.57232284546D) + .put("addRowsIndexConstant", 96444.57232284546D) + .put("maxIndex", 277.2735290527344D) + .put("minIndex", 59.02102279663086D) + .build() + ) + ) + ) + ); + + TestHelper.assertExpectedResults(expectedResults, runner.run(query)); + } + + @Test + public void testFullOnTopNOverPostAggs() + { + TopNQuery query = new TopNQueryBuilder() + .dataSource(dataSource) + .granularity(allGran) + .dimension(providerDimension) + .metric(addRowsIndexConstantMetric) + .threshold(4) + .intervals(fullOnInterval) + .aggregators( + Lists.newArrayList( + Iterables.concat( + commonAggregators, + Lists.newArrayList( + new MaxAggregatorFactory("maxIndex", "index"), + new MinAggregatorFactory("minIndex", "index") + ) + ) + ) + ) + .postAggregators(Arrays.asList(addRowsIndexConstant)) + .build(); + + List> expectedResults = Arrays.asList( + new Result( + new DateTime("2011-01-12T00:00:00.000Z"), + new TopNResultValue( + Arrays.>asList( + ImmutableMap.builder() + .put("provider", "total_market") + .put("rows", 186L) + .put("index", 215679.82879638672D) + .put("addRowsIndexConstant", 215866.82879638672D) + .put("maxIndex", 1743.9217529296875D) + .put("minIndex", 792.3260498046875D) + .build(), + ImmutableMap.builder() + .put("provider", "upfront") + .put("rows", 186L) + .put("index", 192046.1060180664D) + .put("addRowsIndexConstant", 192233.1060180664D) + .put("maxIndex", 1870.06103515625D) + .put("minIndex", 545.9906005859375D) + .build(), + ImmutableMap.builder() + .put("provider", "spot") + .put("rows", 837L) + .put("index", 95606.57232284546D) + .put("addRowsIndexConstant", 96444.57232284546D) + .put("maxIndex", 277.2735290527344D) + .put("minIndex", 59.02102279663086D) + .build() + ) + ) + ) + ); + + TestHelper.assertExpectedResults(expectedResults, runner.run(query)); + } + + @Test + public void testTopN() + { + TopNQuery query = new TopNQueryBuilder() + .dataSource(dataSource) + .granularity(allGran) + .dimension(providerDimension) + .metric(indexMetric) + .threshold(4) + .intervals(firstToThird) + .aggregators(commonAggregators) + .postAggregators(Arrays.asList(addRowsIndexConstant)) + .build(); + + + List> expectedResults = Arrays.asList( + new Result( + new DateTime("2011-04-01T00:00:00.000Z"), + new TopNResultValue( + Arrays.>asList( + ImmutableMap.of( + "provider", "total_market", + "rows", 4L, + "index", 5351.814697265625D, + "addRowsIndexConstant", 5356.814697265625D + ), + ImmutableMap.of( + "provider", "upfront", + "rows", 4L, + "index", 4875.669677734375D, + "addRowsIndexConstant", 4880.669677734375D + ), + ImmutableMap.of( + "provider", "spot", + "rows", 18L, + "index", 2231.8768157958984D, + "addRowsIndexConstant", 2250.8768157958984D + ) + ) + ) + ) + ); + + TestHelper.assertExpectedResults(expectedResults, runner.run(query)); + } + + @Test + public void testTopNWithOrFilter1() + { + TopNQuery query = new TopNQueryBuilder() + .dataSource(dataSource) + .granularity(allGran) + .filters(providerDimension, "total_market", "upfront", "spot") + .dimension(providerDimension) + .metric(indexMetric) + .threshold(4) + .intervals(firstToThird) + .aggregators(commonAggregators) + .postAggregators(Arrays.asList(addRowsIndexConstant)) + .build(); + + List> expectedResults = Arrays.asList( + new Result( + new DateTime("2011-04-01T00:00:00.000Z"), + new TopNResultValue( + Arrays.>asList( + ImmutableMap.of( + "provider", "total_market", + "rows", 4L, + "index", 5351.814697265625D, + "addRowsIndexConstant", 5356.814697265625D + ), + ImmutableMap.of( + "provider", "upfront", + "rows", 4L, + "index", 4875.669677734375D, + "addRowsIndexConstant", 4880.669677734375D + ), + ImmutableMap.of( + "provider", "spot", + "rows", 18L, + "index", 2231.8768157958984D, + "addRowsIndexConstant", 2250.8768157958984D + ) + ) + ) + ) + ); + + TestHelper.assertExpectedResults(expectedResults, runner.run(query)); + } + + @Test + public void testTopNWithOrFilter2() + { + TopNQuery query = new TopNQueryBuilder() + .dataSource(dataSource) + .granularity(allGran) + .filters(providerDimension, "total_market", "upfront") + .dimension(providerDimension) + .metric(indexMetric) + .threshold(4) + .intervals(firstToThird) + .aggregators(commonAggregators) + .postAggregators(Arrays.asList(addRowsIndexConstant)) + .build(); + + List> expectedResults = Arrays.asList( + new Result( + new DateTime("2011-04-01T00:00:00.000Z"), + new TopNResultValue( + Arrays.>asList( + ImmutableMap.of( + "provider", "total_market", + "rows", 4L, + "index", 5351.814697265625D, + "addRowsIndexConstant", 5356.814697265625D + ), + ImmutableMap.of( + "provider", "upfront", + "rows", 4L, + "index", 4875.669677734375D, + "addRowsIndexConstant", 4880.669677734375D + ) + ) + ) + ) + ); + + TestHelper.assertExpectedResults(expectedResults, runner.run(query)); + } + + @Test + public void testTopNWithFilter1() + { + TopNQuery query = new TopNQueryBuilder() + .dataSource(dataSource) + .granularity(allGran) + .filters(providerDimension, "upfront") + .dimension(providerDimension) + .metric(indexMetric) + .threshold(4) + .intervals(firstToThird) + .aggregators(commonAggregators) + .postAggregators(Arrays.asList(addRowsIndexConstant)) + .build(); + + List> expectedResults = Arrays.asList( + new Result( + new DateTime("2011-04-01T00:00:00.000Z"), + new TopNResultValue( + Arrays.>asList( + ImmutableMap.of( + "provider", "upfront", + "rows", 4L, + "index", 4875.669677734375D, + "addRowsIndexConstant", 4880.669677734375D + ) + ) + ) + ) + ); + + TestHelper.assertExpectedResults(expectedResults, runner.run(query)); + } + + @Test + public void testTopNWithFilter2() + { + TopNQuery query = new TopNQueryBuilder() + .dataSource(dataSource) + .granularity(allGran) + .filters(qualityDimension, "mezzanine") + .dimension(providerDimension) + .metric(indexMetric) + .threshold(4) + .intervals(firstToThird) + .aggregators(commonAggregators) + .postAggregators(Arrays.asList(addRowsIndexConstant)) + .build(); + + List> expectedResults = Arrays.asList( + new Result( + new DateTime("2011-04-01T00:00:00.000Z"), + new TopNResultValue( + Arrays.>asList( + ImmutableMap.of( + "provider", "upfront", + "rows", 2L, + "index", 2591.68359375D, + "addRowsIndexConstant", 2594.68359375D + ), + ImmutableMap.of( + "provider", "total_market", + "rows", 2L, + "index", 2508.39599609375D, + "addRowsIndexConstant", 2511.39599609375D + ), + ImmutableMap.of( + "provider", "spot", + "rows", 2L, + "index", 220.63774871826172D, + "addRowsIndexConstant", 223.63774871826172D + ) + ) + ) + ) + ); + + TestHelper.assertExpectedResults(expectedResults, runner.run(query)); + } + + @Test + public void testTopNWithFilter2OneDay() + { + TopNQuery query = new TopNQueryBuilder() + .dataSource(dataSource) + .granularity(allGran) + .filters(qualityDimension, "mezzanine") + .dimension(providerDimension) + .metric(indexMetric) + .threshold(4) + .intervals( + new MultipleIntervalSegmentSpec( + Arrays.asList(new Interval("2011-04-01T00:00:00.000Z/2011-04-02T00:00:00.000Z")) + ) + ) + .aggregators(commonAggregators) + .postAggregators(Arrays.asList(addRowsIndexConstant)) + .build(); + + List> expectedResults = Arrays.asList( + new Result( + new DateTime("2011-04-01T00:00:00.000Z"), + new TopNResultValue( + Arrays.>asList( + ImmutableMap.of( + "provider", "upfront", + "rows", 1L, + "index", new Float(1447.341160).doubleValue(), + "addRowsIndexConstant", new Float(1449.341160).doubleValue() + ), + ImmutableMap.of( + "provider", "total_market", + "rows", 1L, + "index", new Float(1314.839715).doubleValue(), + "addRowsIndexConstant", new Float(1316.839715).doubleValue() + ), + ImmutableMap.of( + "provider", "spot", + "rows", 1L, + "index", new Float(109.705815).doubleValue(), + "addRowsIndexConstant", new Float(111.705815).doubleValue() + ) + ) + ) + ) + ); + + TestHelper.assertExpectedResults(expectedResults, runner.run(query)); + } + + @Test + public void testTopNWithNonExistentFilterInOr() + { + TopNQuery query = new TopNQueryBuilder() + .dataSource(dataSource) + .granularity(allGran) + .filters(providerDimension, "total_market", "upfront", "billyblank") + .dimension(providerDimension) + .metric(indexMetric) + .threshold(4) + .intervals(firstToThird) + .aggregators(commonAggregators) + .postAggregators(Arrays.asList(addRowsIndexConstant)) + .build(); + + List> expectedResults = Arrays.asList( + new Result( + new DateTime("2011-04-01T00:00:00.000Z"), + new TopNResultValue( + Arrays.>asList( + ImmutableMap.of( + "provider", "total_market", + "rows", 4L, + "index", 5351.814697265625D, + "addRowsIndexConstant", 5356.814697265625D + ), + ImmutableMap.of( + "provider", "upfront", + "rows", 4L, + "index", 4875.669677734375D, + "addRowsIndexConstant", 4880.669677734375D + ) + ) + ) + ) + ); + + TestHelper.assertExpectedResults(expectedResults, runner.run(query)); + } + + @Test + public void testTopNWithNonExistentFilter() + { + TopNQuery query = new TopNQueryBuilder() + .dataSource(dataSource) + .granularity(allGran) + .filters(providerDimension, "billyblank") + .dimension(providerDimension) + .metric(indexMetric) + .threshold(4) + .intervals(firstToThird) + .aggregators(commonAggregators) + .postAggregators(Arrays.asList(addRowsIndexConstant)) + .build(); + + TestHelper.assertExpectedResults( + Lists.>newArrayList( + new Result( + new DateTime("2011-04-01T00:00:00.000Z"), + new TopNResultValue(Lists.>newArrayList()) + ) + ), + runner.run(query) + ); + } + + @Test + public void testTopNWithNonExistentFilterMultiDim() + { + AndDimFilter andDimFilter = Druids.newAndDimFilterBuilder() + .fields( + Lists.newArrayList( + Druids.newSelectorDimFilterBuilder() + .dimension(providerDimension) + .value("billyblank") + .build(), + Druids.newSelectorDimFilterBuilder() + .dimension(qualityDimension) + .value("mezzanine") + .build() + ) + ).build(); + TopNQuery query = new TopNQueryBuilder() + .dataSource(dataSource) + .granularity(allGran) + .filters(andDimFilter) + .dimension(providerDimension) + .metric(indexMetric) + .threshold(4) + .intervals(firstToThird) + .aggregators(commonAggregators) + .postAggregators(Arrays.asList(addRowsIndexConstant)) + .build(); + + TestHelper.assertExpectedResults( + Lists.>newArrayList( + new Result( + new DateTime("2011-04-01T00:00:00.000Z"), + new TopNResultValue(Lists.>newArrayList()) + ) + ), + runner.run(query) + ); + } + + @Test + public void testTopNWithMultiValueDimFilter1() + { + TopNQuery query = new TopNQueryBuilder() + .dataSource(dataSource) + .granularity(allGran) + .filters(placementishDimension, "m") + .dimension(providerDimension) + .metric(indexMetric) + .threshold(4) + .intervals(firstToThird) + .aggregators(commonAggregators) + .postAggregators(Arrays.asList(addRowsIndexConstant)) + .build(); + + TestHelper.assertExpectedResults( + Sequences.toList( + runner.run( + new TopNQueryBuilder() + .dataSource(dataSource) + .granularity(allGran) + .filters(qualityDimension, "mezzanine") + .dimension(providerDimension) + .metric(indexMetric) + .threshold(4) + .intervals(firstToThird) + .aggregators(commonAggregators) + .postAggregators(Arrays.asList(addRowsIndexConstant)) + .build() + ), Lists.>newArrayList() + ), runner.run(query) + ); + } + + @Test + public void testTopNWithMultiValueDimFilter2() + { + TopNQuery query = new TopNQueryBuilder() + .dataSource(dataSource) + .granularity(allGran) + .filters(placementishDimension, "m", "a", "b") + .dimension(qualityDimension) + .metric(indexMetric) + .threshold(4) + .intervals(firstToThird) + .aggregators(commonAggregators) + .postAggregators(Arrays.asList(addRowsIndexConstant)) + .build(); + + TestHelper.assertExpectedResults( + Sequences.toList( + runner.run( + new TopNQueryBuilder() + .dataSource(dataSource) + .granularity(allGran) + .filters(qualityDimension, "mezzanine", "automotive", "business") + .dimension(qualityDimension) + .metric(indexMetric) + .threshold(4) + .intervals(firstToThird) + .aggregators(commonAggregators) + .postAggregators(Arrays.asList(addRowsIndexConstant)) + .build() + ), Lists.>newArrayList() + ) + , runner.run(query) + ); + } + + @Test + public void testTopNWithMultiValueDimFilter3() + { + TopNQuery query = new TopNQueryBuilder() + .dataSource(dataSource) + .granularity(allGran) + .filters(placementishDimension, "a") + .dimension(placementishDimension) + .metric(indexMetric) + .threshold(4) + .intervals(firstToThird) + .aggregators(commonAggregators) + .postAggregators(Arrays.asList(addRowsIndexConstant)) + .build(); + + final ArrayList> expectedResults = Lists.newArrayList( + new Result( + new DateTime("2011-04-01T00:00:00.000Z"), + new TopNResultValue( + Arrays.>asList( + ImmutableMap.of( + "placementish", "a", + "rows", 2L, + "index", 283.31103515625D, + "addRowsIndexConstant", 286.31103515625D + ), + ImmutableMap.of( + "placementish", "preferred", + "rows", 2L, + "index", 283.31103515625D, + "addRowsIndexConstant", 286.31103515625D + ) + ) + ) + ) + ); + + TestHelper.assertExpectedResults(expectedResults, runner.run(query)); + } + + @Test + public void testTopNWithMultiValueDimFilter4() + { + TopNQuery query = new TopNQueryBuilder() + .dataSource(dataSource) + .granularity(allGran) + .filters(placementishDimension, "a", "b") + .dimension(placementishDimension) + .metric(indexMetric) + .threshold(4) + .intervals(firstToThird) + .aggregators(commonAggregators) + .postAggregators(Arrays.asList(addRowsIndexConstant)) + .build(); + + final ArrayList> expectedResults = Lists.newArrayList( + new Result( + new DateTime("2011-04-01T00:00:00.000Z"), + new TopNResultValue( + Arrays.>asList( + ImmutableMap.of( + "placementish", "preferred", + "rows", 4L, + "index", 514.868408203125D, + "addRowsIndexConstant", 519.868408203125D + ), + ImmutableMap.of( + "placementish", + "a", "rows", 2L, + "index", 283.31103515625D, + "addRowsIndexConstant", 286.31103515625D + ), + ImmutableMap.of( + "placementish", "b", + "rows", 2L, + "index", 231.557373046875D, + "addRowsIndexConstant", 234.557373046875D + ) + ) + ) + ) + ); + + TestHelper.assertExpectedResults(expectedResults, runner.run(query)); + } + + @Test + public void testTopNWithMultiValueDimFilter5() + { + TopNQuery query = new TopNQueryBuilder() + .dataSource(dataSource) + .granularity(allGran) + .filters(placementishDimension, "preferred") + .dimension(placementishDimension) + .metric(indexMetric) + .threshold(4) + .intervals(firstToThird) + .aggregators(commonAggregators) + .postAggregators(Arrays.asList(addRowsIndexConstant)) + .build(); + + final ArrayList> expectedResults = Lists.newArrayList( + new Result( + new DateTime("2011-04-01T00:00:00.000Z"), + new TopNResultValue( + Arrays.>asList( + ImmutableMap.of( + "placementish", "preferred", + "rows", 26L, + "index", 12459.361190795898D, + "addRowsIndexConstant", 12486.361190795898D + ), + ImmutableMap.of( + "placementish", "p", + "rows", 6L, + "index", 5407.213653564453D, + "addRowsIndexConstant", 5414.213653564453D + ), + ImmutableMap.of( + "placementish", "m", + "rows", 6L, + "index", 5320.717338562012D, + "addRowsIndexConstant", 5327.717338562012D + ), + ImmutableMap.of( + "placementish", "t", + "rows", 4L, + "index", 422.3440856933594D, + "addRowsIndexConstant", 427.3440856933594D + ) + ) + ) + ) + ); + + TestHelper.assertExpectedResults(expectedResults, runner.run(query)); + } + + @Test + public void testTopNLexicographic() + { + TopNQuery query = new TopNQueryBuilder() + .dataSource(dataSource) + .granularity(allGran) + .dimension(providerDimension) + .metric(new LexicographicTopNMetricSpec("")) + .threshold(4) + .intervals(firstToThird) + .aggregators(commonAggregators) + .postAggregators(Arrays.asList(addRowsIndexConstant)) + .build(); + + List> expectedResults = Arrays.asList( + new Result( + new DateTime("2011-04-01T00:00:00.000Z"), + new TopNResultValue( + Arrays.>asList( + ImmutableMap.of( + "provider", "spot", + "rows", 18L, + "index", 2231.8768157958984D, + "addRowsIndexConstant", 2250.8768157958984D + ), + ImmutableMap.of( + "provider", "total_market", + "rows", 4L, + "index", 5351.814697265625D, + "addRowsIndexConstant", 5356.814697265625D + ), + ImmutableMap.of( + "provider", "upfront", + "rows", 4L, + "index", 4875.669677734375D, + "addRowsIndexConstant", 4880.669677734375D + ) + ) + ) + ) + ); + + TestHelper.assertExpectedResults(expectedResults, runner.run(query)); + } + + @Test + public void testTopNLexicographicWithPreviousStop() + { + TopNQuery query = new TopNQueryBuilder() + .dataSource(dataSource) + .granularity(allGran) + .dimension(providerDimension) + .metric(new LexicographicTopNMetricSpec("spot")) + .threshold(4) + .intervals(firstToThird) + .aggregators(commonAggregators) + .postAggregators(Arrays.asList(addRowsIndexConstant)) + .build(); + + List> expectedResults = Arrays.asList( + new Result( + new DateTime("2011-04-01T00:00:00.000Z"), + new TopNResultValue( + Arrays.>asList( + ImmutableMap.of( + "provider", "total_market", + "rows", 4L, + "index", 5351.814697265625D, + "addRowsIndexConstant", 5356.814697265625D + ), + ImmutableMap.of( + "provider", "upfront", + "rows", 4L, + "index", 4875.669677734375D, + "addRowsIndexConstant", 4880.669677734375D + ) + ) + ) + ) + ); + + TestHelper.assertExpectedResults(expectedResults, runner.run(query)); + } + + @Test + public void testTopNLexicographicWithNonExistingPreviousStop() + { + TopNQuery query = new TopNQueryBuilder() + .dataSource(dataSource) + .granularity(allGran) + .dimension(providerDimension) + .metric(new LexicographicTopNMetricSpec("t")) + .threshold(4) + .intervals(firstToThird) + .aggregators(commonAggregators) + .postAggregators(Arrays.asList(addRowsIndexConstant)) + .build(); + + List> expectedResults = Arrays.asList( + new Result( + new DateTime("2011-04-01T00:00:00.000Z"), + new TopNResultValue( + Arrays.>asList( + ImmutableMap.of( + "provider", "total_market", + "rows", 4L, + "index", 5351.814697265625D, + "addRowsIndexConstant", 5356.814697265625D + ), + ImmutableMap.of( + "provider", "upfront", + "rows", 4L, + "index", 4875.669677734375D, + "addRowsIndexConstant", 4880.669677734375D + ) + ) + ) + ) + ); + + TestHelper.assertExpectedResults(expectedResults, runner.run(query)); + } + + @Test + public void testTopNDimExtraction() + { + TopNQuery query = new TopNQueryBuilder() + .dataSource(dataSource) + .granularity(allGran) + .dimension( + new ExtractionDimensionSpec( + providerDimension, providerDimension, new RegexDimExtractionFn("(.)") + ) + ) + .metric("rows") + .threshold(4) + .intervals(firstToThird) + .aggregators(commonAggregators) + .postAggregators(Arrays.asList(addRowsIndexConstant)) + .build(); + + List> expectedResults = Arrays.asList( + new Result( + new DateTime("2011-04-01T00:00:00.000Z"), + new TopNResultValue( + Arrays.>asList( + ImmutableMap.of( + "provider", "s", + "rows", 18L, + "index", 2231.8768157958984D, + "addRowsIndexConstant", 2250.8768157958984D + ), + ImmutableMap.of( + "provider", "t", + "rows", 4L, + "index", 5351.814697265625D, + "addRowsIndexConstant", 5356.814697265625D + ), + ImmutableMap.of( + "provider", "u", + "rows", 4L, + "index", 4875.669677734375D, + "addRowsIndexConstant", 4880.669677734375D + ) + ) + ) + ) + ); + + TestHelper.assertExpectedResults(expectedResults, runner.run(query)); + } + + @Test + public void testInvertedTopNQuery() + { + TopNQuery query = + new TopNQueryBuilder() + .dataSource(dataSource) + .granularity(allGran) + .dimension(providerDimension) + .metric(new InvertedTopNMetricSpec(new NumericTopNMetricSpec(indexMetric))) + .threshold(3) + .intervals(firstToThird) + .aggregators(commonAggregators) + .postAggregators(Arrays.asList(addRowsIndexConstant)) + .build(); + + List> expectedResults = Arrays.asList( + new Result( + new DateTime("2011-04-01T00:00:00.000Z"), + new TopNResultValue( + Arrays.>asList( + ImmutableMap.of( + "provider", "spot", + "rows", 18L, + "index", 2231.8768157958984D, + "addRowsIndexConstant", 2250.8768157958984D + ), + ImmutableMap.of( + "provider", "upfront", + "rows", 4L, + "index", 4875.669677734375D, + "addRowsIndexConstant", 4880.669677734375D + ), + ImmutableMap.of( + "provider", "total_market", + "rows", 4L, + "index", 5351.814697265625D, + "addRowsIndexConstant", 5356.814697265625D + ) + ) + ) + ) + ); + + TestHelper.assertExpectedResults(expectedResults, runner.run(query)); + } +} \ No newline at end of file diff --git a/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTestHelper.java b/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTestHelper.java new file mode 100644 index 00000000000..97b837a4b48 --- /dev/null +++ b/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTestHelper.java @@ -0,0 +1,73 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012, 2013 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package io.druid.query.topn; + +import io.druid.query.FinalizeResultsQueryRunner; +import io.druid.query.Query; +import io.druid.query.QueryRunner; +import io.druid.query.QueryRunnerFactory; +import io.druid.segment.IncrementalIndexSegment; +import io.druid.segment.QueryableIndex; +import io.druid.segment.QueryableIndexSegment; +import io.druid.segment.Segment; +import io.druid.segment.TestIndex; +import io.druid.segment.incremental.IncrementalIndex; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; + +public class TopNQueryRunnerTestHelper +{ + @SuppressWarnings("unchecked") + public static Collection makeQueryRunners( + QueryRunnerFactory factory + ) + throws IOException + { + final IncrementalIndex rtIndex = TestIndex.getIncrementalTestIndex(); + final QueryableIndex mMappedTestIndex = TestIndex.getMMappedTestIndex(); + final QueryableIndex mergedRealtimeIndex = TestIndex.mergedRealtimeIndex(); + return Arrays.asList( + new Object[][]{ + { + makeQueryRunner(factory, new IncrementalIndexSegment(rtIndex, null)) + }, + { + makeQueryRunner(factory, new QueryableIndexSegment(null, mMappedTestIndex)) + }, + { + makeQueryRunner(factory, new QueryableIndexSegment(null, mergedRealtimeIndex)) + } + } + ); + } + + public static QueryRunner makeQueryRunner( + QueryRunnerFactory> factory, + Segment adapter + ) + { + return new FinalizeResultsQueryRunner( + factory.createRunner(adapter), + factory.getToolchest() + ); + } +} \ No newline at end of file diff --git a/publications/whitepaper/druid.pdf b/publications/whitepaper/druid.pdf index 3fe978b7661..6d835efc381 100644 Binary files a/publications/whitepaper/druid.pdf and b/publications/whitepaper/druid.pdf differ diff --git a/publications/whitepaper/druid.tex b/publications/whitepaper/druid.tex index 0c040a70b4d..0d1305d51f4 100644 --- a/publications/whitepaper/druid.tex +++ b/publications/whitepaper/druid.tex @@ -144,14 +144,14 @@ applications \cite{tschetter2011druid}. In the early days of Metamarkets, we were focused on building a hosted dashboard that would allow users to arbitrary explore and visualize event streams. The data store powering the dashboard needed to return queries fast enough that the data visualizations built on top -of it could update provide users with an interactive experience. +of it could provide users with an interactive experience. In addition to the query latency needs, the system had to be multi-tenant and highly available. The Metamarkets product is used in a highly concurrent environment. Downtime is costly and many businesses cannot afford to wait if a system is unavailable in the face of software upgrades or network failure. -Downtime for startups, who often do not have internal operations teams, can -determine whether a business succeeds or fails. +Downtime for startups, who often lack proper internal operations management, can +determine business success or failure. Finally, another key problem that Metamarkets faced in its early days was to allow users and alerting systems to be able to make business decisions in @@ -170,15 +170,15 @@ analytics platform in multiple companies. \label{sec:architecture} A Druid cluster consists of different types of nodes and each node type is designed to perform a specific set of things. We believe this design separates -concerns and simplifies the complexity of the system. There is minimal -interaction between the different node types and hence, intra-cluster -communication failures have minimal impact on data availability. The different -node types operate fairly independent of each other and to solve complex data -analysis problems, they come together to form a fully working system. -The name Druid comes from the Druid class in many role-playing games: it is a -shape-shifter, capable of taking on many different forms to fulfill various -different roles in a group. The composition of and flow of data in a Druid -cluster are shown in Figure~\ref{fig:cluster}. +concerns and simplifies the complexity of the system. The different node types +operate fairly independent of each other and there is minimal interaction +between them. Hence, intra-cluster communication failures have minimal impact +on data availability. To solve complex data analysis problems, the different +node types come together to form a fully working system. The name Druid comes +from the Druid class in many role-playing games: it is a shape-shifter, capable +of taking on many different forms to fulfill various different roles in a +group. The composition of and flow of data in a Druid cluster are shown in +Figure~\ref{fig:cluster}. \begin{figure*} \centering @@ -213,10 +213,10 @@ still be queried. Figure~\ref{fig:realtime_flow} illustrates the process. \begin{figure} \centering \includegraphics[width = 2.8in]{realtime_flow} -\caption{Real-time nodes first buffer events in memory. After some period of -time, in-memory indexes are persisted to disk. After another period of time, -all persisted indexes are merged together and handed off. Queries on data hit -the in-memory index and the persisted indexes.} +\caption{Real-time nodes first buffer events in memory. On a periodic basis, +the in-memory index is persisted to disk. On another periodic basis, all +persisted indexes are merged together and handed off. Queries for data will hit the +in-memory index and the persisted indexes.} \label{fig:realtime_flow} \end{figure} @@ -325,14 +325,14 @@ serves whatever data it finds. \begin{figure} \centering -\includegraphics[width = 2.8in]{historical_download} -\caption{Historical nodes download immutable segments from deep storage.} +\includegraphics[width = 2.6in]{historical_download} +\caption{Historical nodes download immutable segments from deep storage. Segments must be loaded in memory before they can be queried.} \label{fig:historical_download} \end{figure} Historical nodes can support read consistency because they only deal with immutable data. Immutable data blocks also enable a simple parallelization -model: historical nodes can scan and aggregate immutable blocks concurrently +model: historical nodes can concurrently scan and aggregate immutable blocks without blocking. \subsubsection{Tiers} @@ -385,7 +385,7 @@ caching the results would be unreliable. \includegraphics[width = 4.5in]{caching} \caption{Broker nodes cache per segment results. Every Druid query is mapped to a set of segments. Queries often combine cached segment results with those that -need tobe computed on historical and real-time nodes.} +need to be computed on historical and real-time nodes.} \label{fig:caching} \end{figure*} @@ -399,7 +399,7 @@ nodes are unable to communicate to Zookeeper, they use their last known view of the cluster and continue to forward queries to real-time and historical nodes. Broker nodes make the assumption that the structure of the cluster is the same as it was before the outage. In practice, this availability model has allowed -our Druid cluster to continue serving queries for several hours while we +our Druid cluster to continue serving queries for a significant period of time while we diagnosed Zookeeper outages. \subsection{Coordinator Nodes} @@ -564,9 +564,9 @@ In this case, we compress the raw values as opposed to their dictionary representations. \subsection{Indices for Filtering Data} -In most real world OLAP workflows, queries are issued for the aggregated -results for some set of metrics where some set of dimension specifications are -met. An example query may ask "How many Wikipedia edits were done by users in +In many real world OLAP workflows, queries are issued for the aggregated +results of some set of metrics where some set of dimension specifications are +met. An example query may be asked is: "How many Wikipedia edits were done by users in San Francisco who are also male?". This query is filtering the Wikipedia data set in Table~\ref{tab:sample_data} based on a Boolean expression of dimension values. In many real world data sets, dimension columns contain strings and @@ -712,7 +712,7 @@ equal to "Ke\$ha". The results will be bucketed by day and will be a JSON array Druid supports many types of aggregations including double sums, long sums, minimums, maximums, and several others. Druid also supports complex aggregations -such as cardinality estimation and approxmiate quantile estimation. The +such as cardinality estimation and approximate quantile estimation. The results of aggregations can be combined in mathematical expressions to form other aggregations. The query API is highly customizable and can be extended to filter and group results based on almost any arbitrary condition. It is beyond @@ -892,10 +892,9 @@ support computation directly in the storage layer. There are also other data stores designed for some of the same of the data warehousing issues that Druid is meant to solve. These systems include include in-memory databases such as SAP’s HANA \cite{farber2012sap} and VoltDB \cite{voltdb2010voltdb}. These data -stores lack Druid's low latency ingestion characteristics. Similar to -\cite{paraccel2013}, Druid has analytical features built in, however, it is -much easier to do system wide rolling software updates in Druid (with no -downtime). +stores lack Druid's low latency ingestion characteristics. Druid also has +native analytical features baked in, similar to \cite{paraccel2013}, however, +Druid allows system wide rolling software updates with no downtime. Druid's low latency data ingestion features share some similarities with Trident/Storm \cite{marz2013storm} and Streaming Spark diff --git a/publications/whitepaper/figures/historical_download.png b/publications/whitepaper/figures/historical_download.png index 416d44bfc6c..a9db3fd3609 100644 Binary files a/publications/whitepaper/figures/historical_download.png and b/publications/whitepaper/figures/historical_download.png differ diff --git a/publications/whitepaper/figures/realtime_flow.png b/publications/whitepaper/figures/realtime_flow.png index ec9bfcc27d6..4f50a5c54b5 100644 Binary files a/publications/whitepaper/figures/realtime_flow.png and b/publications/whitepaper/figures/realtime_flow.png differ diff --git a/rabbitmq/pom.xml b/rabbitmq/pom.xml index 8fc0ffe682f..e72c0aa9100 100644 --- a/rabbitmq/pom.xml +++ b/rabbitmq/pom.xml @@ -9,7 +9,7 @@ io.druid druid - 0.6.27-SNAPSHOT + 0.6.48-SNAPSHOT diff --git a/s3-extensions/pom.xml b/s3-extensions/pom.xml index ccc97398ad2..11c189ad9b2 100644 --- a/s3-extensions/pom.xml +++ b/s3-extensions/pom.xml @@ -28,7 +28,7 @@ io.druid druid - 0.6.27-SNAPSHOT + 0.6.48-SNAPSHOT @@ -66,4 +66,19 @@ test + + + + maven-jar-plugin + + + + true + true + + + + + + diff --git a/s3-extensions/src/main/java/io/druid/firehose/s3/StaticS3FirehoseFactory.java b/s3-extensions/src/main/java/io/druid/firehose/s3/StaticS3FirehoseFactory.java index e4d6741f414..9797a28b6d4 100644 --- a/s3-extensions/src/main/java/io/druid/firehose/s3/StaticS3FirehoseFactory.java +++ b/s3-extensions/src/main/java/io/druid/firehose/s3/StaticS3FirehoseFactory.java @@ -34,7 +34,7 @@ import io.druid.data.input.impl.FileIteratingFirehose; import io.druid.data.input.impl.StringInputRowParser; import org.apache.commons.io.IOUtils; import org.apache.commons.io.LineIterator; -import org.jets3t.service.S3Service; +import org.jets3t.service.impl.rest.httpclient.RestS3Service; import org.jets3t.service.model.S3Bucket; import org.jets3t.service.model.S3Object; @@ -55,13 +55,13 @@ public class StaticS3FirehoseFactory implements FirehoseFactory { private static final Logger log = new Logger(StaticS3FirehoseFactory.class); - private final S3Service s3Client; + private final RestS3Service s3Client; private final StringInputRowParser parser; private final List uris; @JsonCreator public StaticS3FirehoseFactory( - @JacksonInject("s3Client") S3Service s3Client, + @JacksonInject("s3Client") RestS3Service s3Client, @JsonProperty("parser") StringInputRowParser parser, @JsonProperty("uris") List uris ) diff --git a/s3-extensions/src/main/java/io/druid/storage/s3/S3DataSegmentArchiver.java b/s3-extensions/src/main/java/io/druid/storage/s3/S3DataSegmentArchiver.java new file mode 100644 index 00000000000..0da038352f0 --- /dev/null +++ b/s3-extensions/src/main/java/io/druid/storage/s3/S3DataSegmentArchiver.java @@ -0,0 +1,58 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012, 2013 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package io.druid.storage.s3; + +import com.google.common.collect.ImmutableMap; +import com.google.inject.Inject; +import io.druid.segment.loading.DataSegmentArchiver; +import io.druid.segment.loading.SegmentLoadingException; +import io.druid.timeline.DataSegment; +import org.jets3t.service.impl.rest.httpclient.RestS3Service; + + +public class S3DataSegmentArchiver extends S3DataSegmentMover implements DataSegmentArchiver +{ + private final S3DataSegmentArchiverConfig config; + + @Inject + public S3DataSegmentArchiver( + RestS3Service s3Client, + S3DataSegmentArchiverConfig config + ) + { + super(s3Client); + this.config = config; + } + + @Override + public DataSegment archive(DataSegment segment) throws SegmentLoadingException + { + String targetS3Bucket = config.getArchiveBucket(); + String targetS3BaseKey = config.getArchiveBaseKey(); + + return move( + segment, + ImmutableMap.of( + "bucket", targetS3Bucket, + "baseKey", targetS3BaseKey + ) + ); + } +} diff --git a/indexing-service/src/main/java/io/druid/indexing/overlord/config/IndexerDbConnectorConfig.java b/s3-extensions/src/main/java/io/druid/storage/s3/S3DataSegmentArchiverConfig.java similarity index 62% rename from indexing-service/src/main/java/io/druid/indexing/overlord/config/IndexerDbConnectorConfig.java rename to s3-extensions/src/main/java/io/druid/storage/s3/S3DataSegmentArchiverConfig.java index 3318975c7ca..5eb33eb1b5d 100644 --- a/indexing-service/src/main/java/io/druid/indexing/overlord/config/IndexerDbConnectorConfig.java +++ b/s3-extensions/src/main/java/io/druid/storage/s3/S3DataSegmentArchiverConfig.java @@ -17,23 +17,25 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ -package io.druid.indexing.overlord.config; +package io.druid.storage.s3; import com.fasterxml.jackson.annotation.JsonProperty; -import io.druid.db.DbConnectorConfig; -import org.skife.config.Config; -public abstract class IndexerDbConnectorConfig extends DbConnectorConfig +public class S3DataSegmentArchiverConfig { - @JsonProperty("taskTable") - @Config("druid.database.taskTable") - public abstract String getTaskTable(); + @JsonProperty + public String archiveBucket = ""; - @JsonProperty("taskLockTable") - @Config("druid.database.taskLockTable") - public abstract String getTaskLockTable(); + @JsonProperty + public String archiveBaseKey = ""; - @JsonProperty("taskLogTable") - @Config("druid.database.taskLogTable") - public abstract String getTaskLogTable(); + public String getArchiveBucket() + { + return archiveBucket; + } + + public String getArchiveBaseKey() + { + return archiveBaseKey; + } } diff --git a/s3-extensions/src/main/java/io/druid/storage/s3/S3DataSegmentKiller.java b/s3-extensions/src/main/java/io/druid/storage/s3/S3DataSegmentKiller.java index b9a44f631c6..0e4fde44d76 100644 --- a/s3-extensions/src/main/java/io/druid/storage/s3/S3DataSegmentKiller.java +++ b/s3-extensions/src/main/java/io/druid/storage/s3/S3DataSegmentKiller.java @@ -53,7 +53,7 @@ public class S3DataSegmentKiller implements DataSegmentKiller Map loadSpec = segment.getLoadSpec(); String s3Bucket = MapUtils.getString(loadSpec, "bucket"); String s3Path = MapUtils.getString(loadSpec, "key"); - String s3DescriptorPath = s3Path.substring(0, s3Path.lastIndexOf("/")) + "/descriptor.json"; + String s3DescriptorPath = S3Utils.descriptorPathForSegmentPath(s3Path); if (s3Client.isObjectInBucket(s3Bucket, s3Path)) { log.info("Removing index file[s3://%s/%s] from s3!", s3Bucket, s3Path); diff --git a/s3-extensions/src/main/java/io/druid/storage/s3/S3DataSegmentMover.java b/s3-extensions/src/main/java/io/druid/storage/s3/S3DataSegmentMover.java new file mode 100644 index 00000000000..fbf18df4c18 --- /dev/null +++ b/s3-extensions/src/main/java/io/druid/storage/s3/S3DataSegmentMover.java @@ -0,0 +1,156 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012, 2013 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package io.druid.storage.s3; + +import com.google.common.base.Predicate; +import com.google.common.base.Throwables; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Maps; +import com.google.inject.Inject; +import com.metamx.common.MapUtils; +import com.metamx.common.logger.Logger; +import io.druid.segment.loading.DataSegmentMover; +import io.druid.segment.loading.SegmentLoadingException; +import io.druid.timeline.DataSegment; +import org.jets3t.service.ServiceException; +import org.jets3t.service.impl.rest.httpclient.RestS3Service; +import org.jets3t.service.model.S3Object; + +import java.util.Map; +import java.util.concurrent.Callable; + +public class S3DataSegmentMover implements DataSegmentMover +{ + private static final Logger log = new Logger(S3DataSegmentMover.class); + + private final RestS3Service s3Client; + + @Inject + public S3DataSegmentMover( + RestS3Service s3Client + ) + { + this.s3Client = s3Client; + } + + @Override + public DataSegment move(DataSegment segment, Map targetLoadSpec) throws SegmentLoadingException + { + try { + Map loadSpec = segment.getLoadSpec(); + String s3Bucket = MapUtils.getString(loadSpec, "bucket"); + String s3Path = MapUtils.getString(loadSpec, "key"); + String s3DescriptorPath = S3Utils.descriptorPathForSegmentPath(s3Path); + + final String targetS3Bucket = MapUtils.getString(targetLoadSpec, "bucket"); + final String targetS3BaseKey = MapUtils.getString(targetLoadSpec, "baseKey"); + + final String targetS3Path = S3Utils.constructSegmentPath(targetS3BaseKey, segment); + String targetS3DescriptorPath = S3Utils.descriptorPathForSegmentPath(targetS3Path); + + if (targetS3Bucket.isEmpty()) { + throw new SegmentLoadingException("Target S3 bucket is not specified"); + } + if (targetS3Path.isEmpty()) { + throw new SegmentLoadingException("Target S3 baseKey is not specified"); + } + + safeMove(s3Bucket, s3Path, targetS3Bucket, targetS3Path); + safeMove(s3Bucket, s3DescriptorPath, targetS3Bucket, targetS3DescriptorPath); + + return segment.withLoadSpec( + ImmutableMap.builder() + .putAll( + Maps.filterKeys( + loadSpec, new Predicate() + { + @Override + public boolean apply(String input) + { + return !(input.equals("bucket") || input.equals("key")); + } + } + ) + ) + .put("bucket", targetS3Bucket) + .put("key", targetS3Path) + .build() + ); + } + catch (ServiceException e) { + throw new SegmentLoadingException(e, "Unable to move segment[%s]", segment.getIdentifier()); + } + } + + private void safeMove( + final String s3Bucket, + final String s3Path, + final String targetS3Bucket, + final String targetS3Path + ) throws ServiceException, SegmentLoadingException + { + try { + S3Utils.retryS3Operation( + new Callable() + { + @Override + public Void call() throws Exception + { + if (s3Client.isObjectInBucket(s3Bucket, s3Path)) { + if (s3Bucket.equals(targetS3Bucket) && s3Path.equals(targetS3Path)) { + log.info("No need to move file[s3://%s/%s] onto itself", s3Bucket, s3Path); + } else { + log.info( + "Moving file[s3://%s/%s] to [s3://%s/%s]", + s3Bucket, + s3Path, + targetS3Bucket, + targetS3Path + ); + s3Client.moveObject(s3Bucket, s3Path, targetS3Bucket, new S3Object(targetS3Path), false); + } + } else { + // ensure object exists in target location + if (s3Client.isObjectInBucket(targetS3Bucket, targetS3Path)) { + log.info( + "Not moving file [s3://%s/%s], already present in target location [s3://%s/%s]", + s3Bucket, s3Path, + targetS3Bucket, targetS3Path + ); + } else { + throw new SegmentLoadingException( + "Unable to move file [s3://%s/%s] to [s3://%s/%s], not present in either source or target location", + s3Bucket, s3Path, + targetS3Bucket, targetS3Path + ); + } + } + return null; + } + } + ); + } + catch (Exception e) { + Throwables.propagateIfInstanceOf(e, ServiceException.class); + Throwables.propagateIfInstanceOf(e, SegmentLoadingException.class); + throw Throwables.propagate(e); + } + } +} diff --git a/s3-extensions/src/main/java/io/druid/storage/s3/S3DataSegmentPuller.java b/s3-extensions/src/main/java/io/druid/storage/s3/S3DataSegmentPuller.java index ff286e262ac..1e310a9b90f 100644 --- a/s3-extensions/src/main/java/io/druid/storage/s3/S3DataSegmentPuller.java +++ b/s3-extensions/src/main/java/io/druid/storage/s3/S3DataSegmentPuller.java @@ -160,15 +160,12 @@ public class S3DataSegmentPuller implements DataSegmentPuller } ); } - catch (InterruptedException e) { + catch (S3ServiceException | IOException e) { + throw new SegmentLoadingException(e, "S3 fail! Key[%s]", coords); + } + catch (Exception e) { throw Throwables.propagate(e); } - catch (IOException e) { - throw new SegmentLoadingException(e, "S3 fail! Key[%s]", coords); - } - catch (S3ServiceException e) { - throw new SegmentLoadingException(e, "S3 fail! Key[%s]", coords); - } } @Override @@ -188,12 +185,12 @@ public class S3DataSegmentPuller implements DataSegmentPuller ); return objDetails.getLastModifiedDate().getTime(); } - catch (InterruptedException e) { - throw Throwables.propagate(e); - } catch (S3ServiceException | IOException e) { throw new SegmentLoadingException(e, e.getMessage()); } + catch (Exception e) { + throw Throwables.propagate(e); + } } private static class S3Coords diff --git a/s3-extensions/src/main/java/io/druid/storage/s3/S3DataSegmentPusher.java b/s3-extensions/src/main/java/io/druid/storage/s3/S3DataSegmentPusher.java index 735a9365628..664c270799b 100644 --- a/s3-extensions/src/main/java/io/druid/storage/s3/S3DataSegmentPusher.java +++ b/s3-extensions/src/main/java/io/druid/storage/s3/S3DataSegmentPusher.java @@ -20,7 +20,6 @@ package io.druid.storage.s3; import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.base.Joiner; import com.google.common.base.Throwables; import com.google.common.collect.ImmutableMap; import com.google.common.io.ByteStreams; @@ -29,7 +28,6 @@ import com.google.inject.Inject; import com.metamx.emitter.EmittingLogger; import io.druid.segment.SegmentUtils; import io.druid.segment.loading.DataSegmentPusher; -import io.druid.segment.loading.DataSegmentPusherUtil; import io.druid.timeline.DataSegment; import io.druid.utils.CompressionUtils; import org.jets3t.service.ServiceException; @@ -45,7 +43,6 @@ import java.util.concurrent.Callable; public class S3DataSegmentPusher implements DataSegmentPusher { private static final EmittingLogger log = new EmittingLogger(S3DataSegmentPusher.class); - private static final Joiner JOINER = Joiner.on("/").skipNulls(); private final RestS3Service s3Client; private final S3DataSegmentPusherConfig config; @@ -73,10 +70,7 @@ public class S3DataSegmentPusher implements DataSegmentPusher public DataSegment push(final File indexFilesDir, final DataSegment inSegment) throws IOException { log.info("Uploading [%s] to S3", indexFilesDir); - final String outputKey = JOINER.join( - config.getBaseKey().isEmpty() ? null : config.getBaseKey(), - DataSegmentPusherUtil.getStorageDir(inSegment) - ); + final String s3Path = S3Utils.constructSegmentPath(config.getBaseKey(), inSegment); final File zipOutFile = File.createTempFile("druid", "index.zip"); final long indexSize = CompressionUtils.zip(indexFilesDir, zipOutFile); @@ -90,8 +84,10 @@ public class S3DataSegmentPusher implements DataSegmentPusher S3Object toPush = new S3Object(zipOutFile); final String outputBucket = config.getBucket(); + final String s3DescriptorPath = S3Utils.descriptorPathForSegmentPath(s3Path); + toPush.setBucketName(outputBucket); - toPush.setKey(outputKey + "/index.zip"); + toPush.setKey(s3Path); if (!config.getDisableAcl()) { toPush.setAcl(AccessControlList.REST_CANNED_AUTHENTICATED_READ); } @@ -116,7 +112,7 @@ public class S3DataSegmentPusher implements DataSegmentPusher Files.copy(ByteStreams.newInputStreamSupplier(jsonMapper.writeValueAsBytes(inSegment)), descriptorFile); S3Object descriptorObject = new S3Object(descriptorFile); descriptorObject.setBucketName(outputBucket); - descriptorObject.setKey(outputKey + "/descriptor.json"); + descriptorObject.setKey(s3DescriptorPath); if (!config.getDisableAcl()) { descriptorObject.setAcl(GSAccessControlList.REST_CANNED_BUCKET_OWNER_FULL_CONTROL); } @@ -138,8 +134,8 @@ public class S3DataSegmentPusher implements DataSegmentPusher catch (ServiceException e) { throw new IOException(e); } - catch (InterruptedException e) { + catch (Exception e) { throw Throwables.propagate(e); } } -} \ No newline at end of file +} diff --git a/s3-extensions/src/main/java/io/druid/storage/s3/S3StorageDruidModule.java b/s3-extensions/src/main/java/io/druid/storage/s3/S3StorageDruidModule.java index f2675251f19..d30f49f976a 100644 --- a/s3-extensions/src/main/java/io/druid/storage/s3/S3StorageDruidModule.java +++ b/s3-extensions/src/main/java/io/druid/storage/s3/S3StorageDruidModule.java @@ -51,8 +51,11 @@ public class S3StorageDruidModule implements DruidModule Binders.dataSegmentPullerBinder(binder).addBinding("s3_zip").to(S3DataSegmentPuller.class).in(LazySingleton.class); Binders.dataSegmentKillerBinder(binder).addBinding("s3_zip").to(S3DataSegmentKiller.class).in(LazySingleton.class); + Binders.dataSegmentMoverBinder(binder).addBinding("s3_zip").to(S3DataSegmentMover.class).in(LazySingleton.class); + Binders.dataSegmentArchiverBinder(binder).addBinding("s3_zip").to(S3DataSegmentArchiver.class).in(LazySingleton.class); Binders.dataSegmentPusherBinder(binder).addBinding("s3").to(S3DataSegmentPusher.class).in(LazySingleton.class); JsonConfigProvider.bind(binder, "druid.storage", S3DataSegmentPusherConfig.class); + JsonConfigProvider.bind(binder, "druid.storage", S3DataSegmentArchiverConfig.class); Binders.taskLogsBinder(binder).addBinding("s3").to(S3TaskLogs.class); JsonConfigProvider.bind(binder, "druid.indexer.logs", S3TaskLogsConfig.class); diff --git a/s3-extensions/src/main/java/io/druid/storage/s3/S3Utils.java b/s3-extensions/src/main/java/io/druid/storage/s3/S3Utils.java index 598153ec778..6cf481fa2f9 100644 --- a/s3-extensions/src/main/java/io/druid/storage/s3/S3Utils.java +++ b/s3-extensions/src/main/java/io/druid/storage/s3/S3Utils.java @@ -19,15 +19,17 @@ package io.druid.storage.s3; -import com.google.common.base.Throwables; -import com.metamx.common.logger.Logger; -import org.jets3t.service.S3ServiceException; +import com.google.common.base.Joiner; +import com.google.common.base.Predicate; +import com.metamx.common.RetryUtils; +import org.jets3t.service.ServiceException; +import io.druid.segment.loading.DataSegmentPusherUtil; +import io.druid.timeline.DataSegment; import org.jets3t.service.impl.rest.httpclient.RestS3Service; import org.jets3t.service.model.S3Bucket; import org.jets3t.service.model.S3Object; import java.io.IOException; -import java.util.Random; import java.util.concurrent.Callable; /** @@ -35,7 +37,7 @@ import java.util.concurrent.Callable; */ public class S3Utils { - private static final Logger log = new Logger(S3Utils.class); + private static final Joiner JOINER = Joiner.on("/").skipNulls(); public static void closeStreamsQuietly(S3Object s3Obj) { @@ -55,69 +57,61 @@ public class S3Utils * Retries S3 operations that fail due to io-related exceptions. Service-level exceptions (access denied, file not * found, etc) are not retried. */ - public static T retryS3Operation(Callable f) throws IOException, S3ServiceException, InterruptedException + public static T retryS3Operation(Callable f) throws Exception { - int nTry = 0; + final Predicate shouldRetry = new Predicate() + { + @Override + public boolean apply(Throwable e) + { + if (e instanceof IOException) { + return true; + } else if (e instanceof ServiceException) { + final boolean isIOException = e.getCause() instanceof IOException; + final boolean isTimeout = "RequestTimeout".equals(((ServiceException) e).getErrorCode()); + return isIOException || isTimeout; + } else { + return false; + } + } + }; final int maxTries = 10; - while (true) { - try { - nTry++; - return f.call(); - } - catch (IOException e) { - if (nTry <= maxTries) { - awaitNextRetry(e, nTry); - } else { - throw e; - } - } - catch (S3ServiceException e) { - if (nTry <= maxTries && - (e.getCause() instanceof IOException || - (e.getS3ErrorCode() != null && e.getS3ErrorCode().equals("RequestTimeout")))) { - awaitNextRetry(e, nTry); - } else { - throw e; - } - } - catch (Exception e) { - throw Throwables.propagate(e); - } - } - } - - private static void awaitNextRetry(Exception e, int nTry) throws InterruptedException - { - final long baseSleepMillis = 1000; - final long maxSleepMillis = 60000; - final double fuzzyMultiplier = Math.min(Math.max(1 + 0.2 * new Random().nextGaussian(), 0), 2); - final long sleepMillis = (long) (Math.min(maxSleepMillis, baseSleepMillis * Math.pow(2, nTry)) * fuzzyMultiplier); - log.warn("S3 fail on try %d, retrying in %,dms.", nTry, sleepMillis); - Thread.sleep(sleepMillis); + return RetryUtils.retry(f, shouldRetry, maxTries); } public static boolean isObjectInBucket(RestS3Service s3Client, String bucketName, String objectKey) - throws S3ServiceException + throws ServiceException { - try { - s3Client.getObjectDetails(new S3Bucket(bucketName), objectKey); + try { + s3Client.getObjectDetails(new S3Bucket(bucketName), objectKey); + } + catch (ServiceException e) { + if (404 == e.getResponseCode() + || "NoSuchKey".equals(e.getErrorCode()) + || "NoSuchBucket".equals(e.getErrorCode())) { + return false; } - catch (S3ServiceException e) { - if (404 == e.getResponseCode() - || "NoSuchKey".equals(e.getS3ErrorCode()) - || "NoSuchBucket".equals(e.getS3ErrorCode())) - { - return false; - } - if ("AccessDenied".equals(e.getS3ErrorCode())) - { - // Object is inaccessible to current user, but does exist. - return true; - } - // Something else has gone wrong - throw e; + if ("AccessDenied".equals(e.getErrorCode())) { + // Object is inaccessible to current user, but does exist. + return true; } + // Something else has gone wrong + throw e; + } return true; } + + public static String constructSegmentPath(String baseKey, DataSegment segment) + { + return JOINER.join( + baseKey.isEmpty() ? null : baseKey, + DataSegmentPusherUtil.getStorageDir(segment) + ) + "/index.zip"; + } + + public static String descriptorPathForSegmentPath(String s3Path) + { + return s3Path.substring(0, s3Path.lastIndexOf("/")) + "/descriptor.json"; + } } diff --git a/s3-extensions/src/test/java/io/druid/storage/s3/S3DataSegmentMoverTest.java b/s3-extensions/src/test/java/io/druid/storage/s3/S3DataSegmentMoverTest.java new file mode 100644 index 00000000000..6206da881a4 --- /dev/null +++ b/s3-extensions/src/test/java/io/druid/storage/s3/S3DataSegmentMoverTest.java @@ -0,0 +1,161 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012, 2013 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package io.druid.storage.s3; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; +import com.metamx.common.MapUtils; +import io.druid.segment.loading.SegmentLoadingException; +import io.druid.timeline.DataSegment; +import io.druid.timeline.partition.NoneShardSpec; +import org.jets3t.service.S3ServiceException; +import org.jets3t.service.ServiceException; +import org.jets3t.service.impl.rest.httpclient.RestS3Service; +import org.jets3t.service.model.S3Object; +import org.jets3t.service.model.StorageObject; +import org.joda.time.Interval; +import org.junit.Assert; +import org.junit.Test; + +import java.util.Map; +import java.util.Set; + +public class S3DataSegmentMoverTest +{ + private static final DataSegment sourceSegment = new DataSegment( + "test", + new Interval("2013-01-01/2013-01-02"), + "1", + ImmutableMap.of( + "key", + "baseKey/test/2013-01-01T00:00:00.000Z_2013-01-02T00:00:00.000Z/1/0/index.zip", + "bucket", + "main" + ), + ImmutableList.of("dim1", "dim1"), + ImmutableList.of("metric1", "metric2"), + new NoneShardSpec(), + 0, + 1 + ); + + @Test + public void testMove() throws Exception + { + MockStorageService mockS3Client = new MockStorageService(); + S3DataSegmentMover mover = new S3DataSegmentMover(mockS3Client); + + mockS3Client.putObject("main", new S3Object("baseKey/test/2013-01-01T00:00:00.000Z_2013-01-02T00:00:00.000Z/1/0/index.zip")); + mockS3Client.putObject("main", new S3Object("baseKey/test/2013-01-01T00:00:00.000Z_2013-01-02T00:00:00.000Z/1/0/descriptor.json")); + + DataSegment movedSegment = mover.move( + sourceSegment, + ImmutableMap.of("baseKey", "targetBaseKey", "bucket", "archive") + ); + + Map targetLoadSpec = movedSegment.getLoadSpec(); + Assert.assertEquals("targetBaseKey/test/2013-01-01T00:00:00.000Z_2013-01-02T00:00:00.000Z/1/0/index.zip", MapUtils.getString(targetLoadSpec, "key")); + Assert.assertEquals("archive", MapUtils.getString(targetLoadSpec, "bucket")); + Assert.assertTrue(mockS3Client.didMove()); + } + + @Test + public void testMoveNoop() throws Exception + { + MockStorageService mockS3Client = new MockStorageService(); + S3DataSegmentMover mover = new S3DataSegmentMover(mockS3Client); + + mockS3Client.putObject("archive", new S3Object("targetBaseKey/test/2013-01-01T00:00:00.000Z_2013-01-02T00:00:00.000Z/1/0/index.zip")); + mockS3Client.putObject("archive", new S3Object("targetBaseKey/test/2013-01-01T00:00:00.000Z_2013-01-02T00:00:00.000Z/1/0/descriptor.json")); + + DataSegment movedSegment = mover.move( + sourceSegment, + ImmutableMap.of("baseKey", "targetBaseKey", "bucket", "archive") + ); + + Map targetLoadSpec = movedSegment.getLoadSpec(); + + Assert.assertEquals("targetBaseKey/test/2013-01-01T00:00:00.000Z_2013-01-02T00:00:00.000Z/1/0/index.zip", MapUtils.getString(targetLoadSpec, "key")); + Assert.assertEquals("archive", MapUtils.getString(targetLoadSpec, "bucket")); + Assert.assertFalse(mockS3Client.didMove()); + } + + @Test(expected = SegmentLoadingException.class) + public void testMoveException() throws Exception + { + MockStorageService mockS3Client = new MockStorageService(); + S3DataSegmentMover mover = new S3DataSegmentMover(mockS3Client); + + mover.move( + sourceSegment, + ImmutableMap.of("baseKey", "targetBaseKey", "bucket", "archive") + ); + } + + private class MockStorageService extends RestS3Service { + Map> storage = Maps.newHashMap(); + boolean moved = false; + + private MockStorageService() throws S3ServiceException + { + super(null); + } + + public boolean didMove() { + return moved; + } + + @Override + public boolean isObjectInBucket(String bucketName, String objectKey) throws ServiceException + { + Set objects = storage.get(bucketName); + return (objects != null && objects.contains(objectKey)); + } + + @Override + public Map moveObject( + String sourceBucketName, + String sourceObjectKey, + String destinationBucketName, + StorageObject destinationObject, + boolean replaceMetadata + ) throws ServiceException + { + moved = true; + if(isObjectInBucket(sourceBucketName, sourceObjectKey)) { + this.putObject(destinationBucketName, new S3Object(destinationObject.getKey())); + storage.get(sourceBucketName).remove(sourceObjectKey); + } + return null; + } + + @Override + public S3Object putObject(String bucketName, S3Object object) throws S3ServiceException + { + if (!storage.containsKey(bucketName)) { + storage.put(bucketName, Sets.newHashSet()); + } + storage.get(bucketName).add(object.getKey()); + return object; + } + } +} diff --git a/server/pom.xml b/server/pom.xml index a85223056cc..a354a0728ed 100644 --- a/server/pom.xml +++ b/server/pom.xml @@ -28,7 +28,7 @@ io.druid druid - 0.6.27-SNAPSHOT + 0.6.48-SNAPSHOT @@ -37,24 +37,14 @@ druid-processing ${project.parent.version} - - - com.metamx - emitter - com.metamx http-client - - com.metamx - java-util - com.metamx server-metrics - commons-cli commons-cli @@ -64,21 +54,13 @@ commons-lang - commons-io - commons-io + javax.inject + javax.inject com.amazonaws aws-java-sdk - - com.ning - compress-lzf - - - org.skife.config - config-magic - org.apache.curator curator-framework @@ -87,42 +69,14 @@ org.apache.curator curator-x-discovery - - it.uniroma3.mat - extendedset - - - com.google.guava - guava - - - com.google.inject - guice - - - com.fasterxml.jackson.core - jackson-core - com.fasterxml.jackson.jaxrs jackson-jaxrs-json-provider - - com.fasterxml.jackson.core - jackson-databind - com.fasterxml.jackson.dataformat jackson-dataformat-smile - - javax.inject - javax.inject - - - org.jdbi - jdbi - com.sun.jersey jersey-server @@ -131,6 +85,10 @@ com.sun.jersey jersey-core + + com.google.inject.extensions + guice-servlet + com.sun.jersey.contribs jersey-guice @@ -139,22 +97,10 @@ org.eclipse.jetty jetty-server - - joda-time - joda-time - com.google.code.findbugs jsr305 - - log4j - log4j - - - org.slf4j - slf4j-log4j12 - io.tesla.aether tesla-aether @@ -220,8 +166,6 @@ caliper test - - @@ -235,6 +179,14 @@ + + + + true + true + + + org.antlr diff --git a/server/src/main/java/io/druid/curator/discovery/DiscoveryModule.java b/server/src/main/java/io/druid/curator/discovery/DiscoveryModule.java index f3fc56d59a7..bc1558f60f6 100644 --- a/server/src/main/java/io/druid/curator/discovery/DiscoveryModule.java +++ b/server/src/main/java/io/druid/curator/discovery/DiscoveryModule.java @@ -40,14 +40,7 @@ import io.druid.guice.annotations.Self; import io.druid.server.DruidNode; import io.druid.server.initialization.CuratorDiscoveryConfig; import org.apache.curator.framework.CuratorFramework; -import org.apache.curator.x.discovery.ProviderStrategy; -import org.apache.curator.x.discovery.ServiceCache; -import org.apache.curator.x.discovery.ServiceCacheBuilder; -import org.apache.curator.x.discovery.ServiceDiscovery; -import org.apache.curator.x.discovery.ServiceDiscoveryBuilder; -import org.apache.curator.x.discovery.ServiceInstance; -import org.apache.curator.x.discovery.ServiceProvider; -import org.apache.curator.x.discovery.ServiceProviderBuilder; +import org.apache.curator.x.discovery.*; import org.apache.curator.x.discovery.details.ServiceCacheListener; import java.io.IOException; @@ -389,8 +382,12 @@ public class DiscoveryModule implements Module } @Override - public ServiceProviderBuilder refreshPaddingMs(int refreshPaddingMs) - { + public ServiceProviderBuilder downInstancePolicy(DownInstancePolicy downInstancePolicy) { + return this; + } + + @Override + public ServiceProviderBuilder additionalFilter(InstanceFilter tInstanceFilter) { return this; } } @@ -409,6 +406,11 @@ public class DiscoveryModule implements Module return null; } + @Override + public void noteError(ServiceInstance tServiceInstance) { + + } + @Override public void close() throws IOException { diff --git a/server/src/main/java/io/druid/curator/discovery/ServerDiscoveryFactory.java b/server/src/main/java/io/druid/curator/discovery/ServerDiscoveryFactory.java index c436289e70e..0e66df0b9ed 100644 --- a/server/src/main/java/io/druid/curator/discovery/ServerDiscoveryFactory.java +++ b/server/src/main/java/io/druid/curator/discovery/ServerDiscoveryFactory.java @@ -62,6 +62,11 @@ public class ServerDiscoveryFactory return null; } + @Override + public void noteError(ServiceInstance tServiceInstance) { + // do nothing + } + @Override public void close() throws IOException { diff --git a/server/src/main/java/io/druid/db/DatabaseRuleManager.java b/server/src/main/java/io/druid/db/DatabaseRuleManager.java index 0ba44c52c85..036acda23d6 100644 --- a/server/src/main/java/io/druid/db/DatabaseRuleManager.java +++ b/server/src/main/java/io/druid/db/DatabaseRuleManager.java @@ -207,7 +207,7 @@ public class DatabaseRuleManager String.format( "SELECT r.dataSource, r.payload " + "FROM %1$s r " - + "INNER JOIN(SELECT dataSource, max(version) as version, payload FROM %1$s GROUP BY dataSource) ds " + + "INNER JOIN(SELECT dataSource, max(version) as version FROM %1$s GROUP BY dataSource) ds " + "ON r.datasource = ds.datasource and r.version = ds.version", getRulesTable() ) diff --git a/server/src/main/java/io/druid/db/DatabaseSegmentManager.java b/server/src/main/java/io/druid/db/DatabaseSegmentManager.java index 203d082b806..3d68ad46978 100644 --- a/server/src/main/java/io/druid/db/DatabaseSegmentManager.java +++ b/server/src/main/java/io/druid/db/DatabaseSegmentManager.java @@ -213,7 +213,7 @@ public class DatabaseSegmentManager for (DataSegment segment : segments) { batch.add( String.format( - "UPDATE %s SET used=1 WHERE id = '%s'", + "UPDATE %s SET used=true WHERE id = '%s'", getSegmentsTable(), segment.getIdentifier() ) @@ -244,7 +244,7 @@ public class DatabaseSegmentManager public Void withHandle(Handle handle) throws Exception { handle.createStatement( - String.format("UPDATE %s SET used=1 WHERE id = :id", getSegmentsTable()) + String.format("UPDATE %s SET used=true WHERE id = :id", getSegmentsTable()) ) .bind("id", segmentId) .execute(); @@ -278,7 +278,7 @@ public class DatabaseSegmentManager public Void withHandle(Handle handle) throws Exception { handle.createStatement( - String.format("UPDATE %s SET used=0 WHERE dataSource = :dataSource", getSegmentsTable()) + String.format("UPDATE %s SET used=false WHERE dataSource = :dataSource", getSegmentsTable()) ) .bind("dataSource", ds) .execute(); @@ -308,7 +308,7 @@ public class DatabaseSegmentManager public Void withHandle(Handle handle) throws Exception { handle.createStatement( - String.format("UPDATE %s SET used=0 WHERE id = :segmentID", getSegmentsTable()) + String.format("UPDATE %s SET used=false WHERE id = :segmentID", getSegmentsTable()) ).bind("segmentID", segmentID) .execute(); @@ -408,7 +408,7 @@ public class DatabaseSegmentManager public List> withHandle(Handle handle) throws Exception { return handle.createQuery( - String.format("SELECT payload FROM %s WHERE used=1", getSegmentsTable()) + String.format("SELECT payload FROM %s WHERE used=true", getSegmentsTable()) ).list(); } } @@ -465,4 +465,4 @@ public class DatabaseSegmentManager private String getSegmentsTable() { return dbTables.get().getSegmentsTable(); } -} \ No newline at end of file +} diff --git a/server/src/main/java/io/druid/guice/HttpClientModule.java b/server/src/main/java/io/druid/guice/HttpClientModule.java index 8bf7c70b698..700aff7baee 100644 --- a/server/src/main/java/io/druid/guice/HttpClientModule.java +++ b/server/src/main/java/io/druid/guice/HttpClientModule.java @@ -103,7 +103,7 @@ public class HttpClientModule implements Module private int numConnections = 5; @JsonProperty - private Period readTimeout = null; + private Period readTimeout = new Period("PT5M"); public int getNumConnections() { diff --git a/server/src/main/java/io/druid/guice/QueryRunnerFactoryModule.java b/server/src/main/java/io/druid/guice/QueryRunnerFactoryModule.java index 58dc8c82499..6f4dc80b059 100644 --- a/server/src/main/java/io/druid/guice/QueryRunnerFactoryModule.java +++ b/server/src/main/java/io/druid/guice/QueryRunnerFactoryModule.java @@ -37,6 +37,8 @@ import io.druid.query.timeboundary.TimeBoundaryQuery; import io.druid.query.timeboundary.TimeBoundaryQueryRunnerFactory; import io.druid.query.timeseries.TimeseriesQuery; import io.druid.query.timeseries.TimeseriesQueryRunnerFactory; +import io.druid.query.topn.TopNQuery; +import io.druid.query.topn.TopNQueryRunnerFactory; import java.util.Map; @@ -52,6 +54,7 @@ public class QueryRunnerFactoryModule extends QueryToolChestModule .put(SegmentMetadataQuery.class, SegmentMetadataQueryRunnerFactory.class) .put(GroupByQuery.class, GroupByQueryRunnerFactory.class) .put(SelectQuery.class, SelectQueryRunnerFactory.class) + .put(TopNQuery.class, TopNQueryRunnerFactory.class) .build(); @Override diff --git a/server/src/main/java/io/druid/guice/QueryToolChestModule.java b/server/src/main/java/io/druid/guice/QueryToolChestModule.java index 96625d4e9ed..ed5b59591a9 100644 --- a/server/src/main/java/io/druid/guice/QueryToolChestModule.java +++ b/server/src/main/java/io/druid/guice/QueryToolChestModule.java @@ -40,6 +40,9 @@ import io.druid.query.timeboundary.TimeBoundaryQuery; import io.druid.query.timeboundary.TimeBoundaryQueryQueryToolChest; import io.druid.query.timeseries.TimeseriesQuery; import io.druid.query.timeseries.TimeseriesQueryQueryToolChest; +import io.druid.query.topn.TopNQuery; +import io.druid.query.topn.TopNQueryConfig; +import io.druid.query.topn.TopNQueryQueryToolChest; import java.util.Map; @@ -55,6 +58,7 @@ public class QueryToolChestModule implements Module .put(SegmentMetadataQuery.class, SegmentMetadataQueryQueryToolChest.class) .put(GroupByQuery.class, GroupByQueryQueryToolChest.class) .put(SelectQuery.class, SelectQueryQueryToolChest.class) + .put(TopNQuery.class, TopNQueryQueryToolChest.class) .build(); @Override @@ -70,5 +74,6 @@ public class QueryToolChestModule implements Module JsonConfigProvider.bind(binder, "druid.query", QueryConfig.class); JsonConfigProvider.bind(binder, "druid.query.groupBy", GroupByQueryConfig.class); JsonConfigProvider.bind(binder, "druid.query.search", SearchQueryConfig.class); + JsonConfigProvider.bind(binder, "druid.query.topN", TopNQueryConfig.class); } } diff --git a/server/src/main/java/io/druid/initialization/Initialization.java b/server/src/main/java/io/druid/initialization/Initialization.java index 408cc2c1d0d..5e8e0461202 100644 --- a/server/src/main/java/io/druid/initialization/Initialization.java +++ b/server/src/main/java/io/druid/initialization/Initialization.java @@ -52,7 +52,6 @@ import io.druid.guice.QueryableModule; import io.druid.guice.ServerModule; import io.druid.guice.ServerViewModule; import io.druid.guice.StorageNodeModule; -import io.druid.guice.TaskLogsModule; import io.druid.guice.annotations.Client; import io.druid.guice.annotations.Json; import io.druid.guice.annotations.Smile; @@ -85,6 +84,7 @@ import java.net.URI; import java.net.URISyntaxException; import java.net.URL; import java.net.URLClassLoader; +import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.Map; @@ -102,11 +102,26 @@ public class Initialization "io.druid", "com.metamx.druid" ); + private final static Map extensionsMap = Maps.newHashMap(); - public synchronized static List getFromExtensions(ExtensionsConfig config, Class clazz) + /** + * @param clazz Module class + * @param + * @return Returns the set of modules loaded. + */ + public static Set getLoadedModules(Class clazz) + { + Set retVal = extensionsMap.get(clazz); + if (retVal == null) { + return Sets.newHashSet(); + } + return retVal; + } + + public synchronized static Collection getFromExtensions(ExtensionsConfig config, Class clazz) { final TeslaAether aether = getAetherClient(config); - List retVal = Lists.newArrayList(); + Set retVal = Sets.newHashSet(); if (config.searchCurrentClassloader()) { for (T module : ServiceLoader.load(clazz, Initialization.class.getClassLoader())) { @@ -132,6 +147,9 @@ public class Initialization } } + // update the map with currently loaded modules + extensionsMap.put(clazz, retVal); + return retVal; } @@ -299,7 +317,6 @@ public class Initialization new JacksonConfigManagerModule(), new IndexingServiceDiscoveryModule(), new DataSegmentPusherPullerModule(), - new TaskLogsModule(), new FirehoseModule() ); diff --git a/server/src/main/java/io/druid/segment/loading/OmniDataSegmentArchiver.java b/server/src/main/java/io/druid/segment/loading/OmniDataSegmentArchiver.java new file mode 100644 index 00000000000..bf34bbe17bb --- /dev/null +++ b/server/src/main/java/io/druid/segment/loading/OmniDataSegmentArchiver.java @@ -0,0 +1,57 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012, 2013 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package io.druid.segment.loading; + +import com.google.inject.Inject; +import com.metamx.common.MapUtils; +import io.druid.timeline.DataSegment; + +import java.util.Map; + +public class OmniDataSegmentArchiver implements DataSegmentArchiver +{ + private final Map archivers; + + @Inject + public OmniDataSegmentArchiver( + Map archivers + ) + { + this.archivers = archivers; + } + + @Override + public DataSegment archive(DataSegment segment) throws SegmentLoadingException + { + return getArchiver(segment).archive(segment); + } + + private DataSegmentArchiver getArchiver(DataSegment segment) throws SegmentLoadingException + { + String type = MapUtils.getString(segment.getLoadSpec(), "type"); + DataSegmentArchiver archiver = archivers.get(type); + + if (archiver == null) { + throw new SegmentLoadingException("Unknown loader type[%s]. Known types are %s", type, archivers.keySet()); + } + + return archiver; + } +} diff --git a/server/src/main/java/io/druid/segment/loading/OmniDataSegmentMover.java b/server/src/main/java/io/druid/segment/loading/OmniDataSegmentMover.java new file mode 100644 index 00000000000..d585b0b7db9 --- /dev/null +++ b/server/src/main/java/io/druid/segment/loading/OmniDataSegmentMover.java @@ -0,0 +1,57 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012, 2013 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package io.druid.segment.loading; + +import com.google.inject.Inject; +import com.metamx.common.MapUtils; +import io.druid.timeline.DataSegment; + +import java.util.Map; + +public class OmniDataSegmentMover implements DataSegmentMover +{ + private final Map movers; + + @Inject + public OmniDataSegmentMover( + Map movers + ) + { + this.movers = movers; + } + + @Override + public DataSegment move(DataSegment segment, Map targetLoadSpec) throws SegmentLoadingException + { + return getMover(segment).move(segment, targetLoadSpec); + } + + private DataSegmentMover getMover(DataSegment segment) throws SegmentLoadingException + { + String type = MapUtils.getString(segment.getLoadSpec(), "type"); + DataSegmentMover mover = movers.get(type); + + if (mover == null) { + throw new SegmentLoadingException("Unknown loader type[%s]. Known types are %s", type, movers.keySet()); + } + + return mover; + } +} diff --git a/server/src/main/java/io/druid/segment/realtime/RealtimeMetricsMonitor.java b/server/src/main/java/io/druid/segment/realtime/RealtimeMetricsMonitor.java index 246654ffd8f..5714e98cf62 100644 --- a/server/src/main/java/io/druid/segment/realtime/RealtimeMetricsMonitor.java +++ b/server/src/main/java/io/druid/segment/realtime/RealtimeMetricsMonitor.java @@ -20,6 +20,7 @@ package io.druid.segment.realtime; import com.google.common.collect.Maps; +import com.google.inject.Inject; import com.metamx.emitter.service.ServiceEmitter; import com.metamx.emitter.service.ServiceMetricEvent; import com.metamx.metrics.AbstractMonitor; @@ -34,6 +35,7 @@ public class RealtimeMetricsMonitor extends AbstractMonitor private final Map previousValues; private final List fireDepartments; + @Inject public RealtimeMetricsMonitor(List fireDepartments) { this.fireDepartments = fireDepartments; diff --git a/server/src/main/java/io/druid/segment/realtime/firehose/LocalFirehoseFactory.java b/server/src/main/java/io/druid/segment/realtime/firehose/LocalFirehoseFactory.java index f78bc0ac390..df96aa45f5e 100644 --- a/server/src/main/java/io/druid/segment/realtime/firehose/LocalFirehoseFactory.java +++ b/server/src/main/java/io/druid/segment/realtime/firehose/LocalFirehoseFactory.java @@ -23,6 +23,7 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import com.google.api.client.repackaged.com.google.common.base.Throwables; import com.google.common.collect.Lists; +import com.metamx.common.ISE; import io.druid.data.input.Firehose; import io.druid.data.input.FirehoseFactory; import io.druid.data.input.impl.FileIteratingFirehose; @@ -78,21 +79,26 @@ public class LocalFirehoseFactory implements FirehoseFactory @Override public Firehose connect() throws IOException { - final LinkedList files = Lists.newLinkedList( - Arrays.asList( - baseDir.listFiles( - new FilenameFilter() - { - @Override - public boolean accept(File file, String name) - { - return name.contains(filter); - } - } - ) - ) + File[] foundFiles = baseDir.listFiles( + new FilenameFilter() + { + @Override + public boolean accept(File file, String name) + { + return name.contains(filter); + } + } ); + if (foundFiles == null || foundFiles.length == 0) { + throw new ISE("Found no files to ingest! Check your schema."); + } + + final LinkedList files = Lists.newLinkedList( + Arrays.asList(foundFiles) + ); + + return new FileIteratingFirehose( new Iterator() { diff --git a/server/src/main/java/io/druid/server/StatusResource.java b/server/src/main/java/io/druid/server/StatusResource.java index a2b30268d4d..44aa50a0ee7 100644 --- a/server/src/main/java/io/druid/server/StatusResource.java +++ b/server/src/main/java/io/druid/server/StatusResource.java @@ -19,11 +19,17 @@ package io.druid.server; +import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonProperty; +import io.druid.initialization.DruidModule; +import io.druid.initialization.Initialization; import javax.ws.rs.GET; import javax.ws.rs.Path; import javax.ws.rs.Produces; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; /** */ @@ -34,20 +40,25 @@ public class StatusResource @Produces("application/json") public Status doGet() { - return new Status( - StatusResource.class.getPackage().getImplementationVersion(), - new Memory(Runtime.getRuntime()) - ); + return new Status(Initialization.getLoadedModules(DruidModule.class)); } - public static class Status { + public static class Status + { final String version; + final List modules; final Memory memory; - public Status(String version, Memory memory) + public Status(Collection modules) { - this.version = version; - this.memory = memory; + this.version = getDruidVersion(); + this.modules = getExtensionVersions(modules); + this.memory = new Memory(Runtime.getRuntime()); + } + + private String getDruidVersion() + { + return Status.class.getPackage().getImplementationVersion(); } @JsonProperty @@ -56,20 +67,107 @@ public class StatusResource return version; } + @JsonProperty + public List getModules() + { + return modules; + } + @JsonProperty public Memory getMemory() { return memory; } + + @Override + public String toString() + { + final String NL = System.getProperty("line.separator"); + StringBuilder output = new StringBuilder(); + output.append(String.format("Druid version - %s", version)).append(NL).append(NL); + + if (modules.size() > 0) { + output.append("Registered Druid Modules").append(NL); + } else { + output.append("No Druid Modules loaded !"); + } + + for (ModuleVersion moduleVersion : modules) { + output.append(moduleVersion).append(NL); + } + return output.toString(); + } + + /** + * Load the unique extensions and return their implementation-versions + * + * @return map of extensions loaded with their respective implementation versions. + */ + private List getExtensionVersions(Collection druidModules) + { + List moduleVersions = new ArrayList<>(); + for (DruidModule module : druidModules) { + String artifact = module.getClass().getPackage().getImplementationTitle(); + String version = module.getClass().getPackage().getImplementationVersion(); + + moduleVersions.add(new ModuleVersion(module.getClass().getCanonicalName(), artifact, version)); + } + return moduleVersions; + } } - public static class Memory { + @JsonInclude(JsonInclude.Include.NON_NULL) + public static class ModuleVersion + { + final String name; + final String artifact; + final String version; + + public ModuleVersion(String name, String artifact, String version) + { + this.name = name; + this.artifact = artifact; + this.version = version; + } + + @JsonProperty + public String getName() + { + return name; + } + + @JsonProperty + public String getArtifact() + { + return artifact; + } + + @JsonProperty + public String getVersion() + { + return version; + } + + @Override + public String toString() + { + if (artifact == null || artifact.isEmpty()) { + return String.format(" - %s ", name); + } else { + return String.format(" - %s (%s-%s)", name, artifact, version); + } + } + } + + public static class Memory + { final long maxMemory; final long totalMemory; final long freeMemory; final long usedMemory; - public Memory(Runtime runtime) { + public Memory(Runtime runtime) + { maxMemory = runtime.maxMemory(); totalMemory = runtime.totalMemory(); freeMemory = runtime.freeMemory(); diff --git a/server/src/main/java/io/druid/server/coordination/ServerManager.java b/server/src/main/java/io/druid/server/coordination/ServerManager.java index 4cbe042350a..950be651e86 100644 --- a/server/src/main/java/io/druid/server/coordination/ServerManager.java +++ b/server/src/main/java/io/druid/server/coordination/ServerManager.java @@ -116,7 +116,13 @@ public class ServerManager implements QuerySegmentWalker return segmentLoader.isSegmentLoaded(segment); } - public void loadSegment(final DataSegment segment) throws SegmentLoadingException + /** + * Load a single segment. + * @param segment segment to load + * @return true if the segment was newly loaded, false if it was already loaded + * @throws SegmentLoadingException if the segment cannot be loaded + */ + public boolean loadSegment(final DataSegment segment) throws SegmentLoadingException { final Segment adapter; try { @@ -150,8 +156,8 @@ public class ServerManager implements QuerySegmentWalker segment.getVersion() ); if ((entry != null) && (entry.getChunk(segment.getShardSpec().getPartitionNum()) != null)) { - log.info("Told to load a adapter for a segment[%s] that already exists", segment.getIdentifier()); - throw new SegmentLoadingException("Segment already exists[%s]", segment.getIdentifier()); + log.warn("Told to load a adapter for a segment[%s] that already exists", segment.getIdentifier()); + return false; } loadedIntervals.add( @@ -165,6 +171,7 @@ public class ServerManager implements QuerySegmentWalker synchronized (dataSourceCounts) { dataSourceCounts.add(dataSource, 1L); } + return true; } } diff --git a/server/src/main/java/io/druid/server/coordination/ZkCoordinator.java b/server/src/main/java/io/druid/server/coordination/ZkCoordinator.java index a55341a75a1..246415f57d0 100644 --- a/server/src/main/java/io/druid/server/coordination/ZkCoordinator.java +++ b/server/src/main/java/io/druid/server/coordination/ZkCoordinator.java @@ -230,34 +230,37 @@ public class ZkCoordinator implements DataSegmentChangeHandler try { log.info("Loading segment %s", segment.getIdentifier()); + final boolean loaded; try { - serverManager.loadSegment(segment); + loaded = serverManager.loadSegment(segment); } catch (Exception e) { removeSegment(segment); throw new SegmentLoadingException(e, "Exception loading segment[%s]", segment.getIdentifier()); } - File segmentInfoCacheFile = new File(config.getInfoDir(), segment.getIdentifier()); - if (!segmentInfoCacheFile.exists()) { + if (loaded) { + File segmentInfoCacheFile = new File(config.getInfoDir(), segment.getIdentifier()); + if (!segmentInfoCacheFile.exists()) { + try { + jsonMapper.writeValue(segmentInfoCacheFile, segment); + } + catch (IOException e) { + removeSegment(segment); + throw new SegmentLoadingException( + e, "Failed to write to disk segment info cache file[%s]", segmentInfoCacheFile + ); + } + } + try { - jsonMapper.writeValue(segmentInfoCacheFile, segment); + announcer.announceSegment(segment); } catch (IOException e) { - removeSegment(segment); - throw new SegmentLoadingException( - e, "Failed to write to disk segment info cache file[%s]", segmentInfoCacheFile - ); + throw new SegmentLoadingException(e, "Failed to announce segment[%s]", segment.getIdentifier()); } } - try { - announcer.announceSegment(segment); - } - catch (IOException e) { - throw new SegmentLoadingException(e, "Failed to announce segment[%s]", segment.getIdentifier()); - } - } catch (SegmentLoadingException e) { log.makeAlert(e, "Failed to load segment for dataSource") @@ -275,8 +278,9 @@ public class ZkCoordinator implements DataSegmentChangeHandler for (DataSegment segment : segments) { log.info("Loading segment %s", segment.getIdentifier()); + final boolean loaded; try { - serverManager.loadSegment(segment); + loaded = serverManager.loadSegment(segment); } catch (Exception e) { log.error(e, "Exception loading segment[%s]", segment.getIdentifier()); @@ -285,20 +289,22 @@ public class ZkCoordinator implements DataSegmentChangeHandler continue; } - File segmentInfoCacheFile = new File(config.getInfoDir(), segment.getIdentifier()); - if (!segmentInfoCacheFile.exists()) { - try { - jsonMapper.writeValue(segmentInfoCacheFile, segment); + if (loaded) { + File segmentInfoCacheFile = new File(config.getInfoDir(), segment.getIdentifier()); + if (!segmentInfoCacheFile.exists()) { + try { + jsonMapper.writeValue(segmentInfoCacheFile, segment); + } + catch (IOException e) { + log.error(e, "Failed to write to disk segment info cache file[%s]", segmentInfoCacheFile); + removeSegment(segment); + segmentFailures.add(segment.getIdentifier()); + continue; + } } - catch (IOException e) { - log.error(e, "Failed to write to disk segment info cache file[%s]", segmentInfoCacheFile); - removeSegment(segment); - segmentFailures.add(segment.getIdentifier()); - continue; - } - } - validSegments.add(segment); + validSegments.add(segment); + } } try { diff --git a/server/src/main/java/io/druid/server/coordinator/DruidCoordinator.java b/server/src/main/java/io/druid/server/coordinator/DruidCoordinator.java index 0ae18185015..71a4d0eb08c 100644 --- a/server/src/main/java/io/druid/server/coordinator/DruidCoordinator.java +++ b/server/src/main/java/io/druid/server/coordinator/DruidCoordinator.java @@ -46,10 +46,13 @@ import io.druid.client.ServerInventoryView; import io.druid.client.indexing.IndexingServiceClient; import io.druid.common.config.JacksonConfigManager; import io.druid.concurrent.Execs; +import io.druid.curator.discovery.ServiceAnnouncer; import io.druid.db.DatabaseRuleManager; import io.druid.db.DatabaseSegmentManager; import io.druid.guice.ManageLifecycle; +import io.druid.guice.annotations.Self; import io.druid.segment.IndexIO; +import io.druid.server.DruidNode; import io.druid.server.initialization.ZkPathsConfig; import io.druid.timeline.DataSegment; import org.apache.curator.framework.CuratorFramework; @@ -99,6 +102,8 @@ public class DruidCoordinator private final LoadQueueTaskMaster taskMaster; private final Map loadManagementPeons; private final AtomicReference leaderLatch; + private final ServiceAnnouncer serviceAnnouncer; + private final DruidNode self; @Inject public DruidCoordinator( @@ -112,7 +117,9 @@ public class DruidCoordinator ServiceEmitter emitter, ScheduledExecutorFactory scheduledExecutorFactory, IndexingServiceClient indexingServiceClient, - LoadQueueTaskMaster taskMaster + LoadQueueTaskMaster taskMaster, + ServiceAnnouncer serviceAnnouncer, + @Self DruidNode self ) { this( @@ -127,6 +134,8 @@ public class DruidCoordinator scheduledExecutorFactory, indexingServiceClient, taskMaster, + serviceAnnouncer, + self, Maps.newConcurrentMap() ); } @@ -143,6 +152,8 @@ public class DruidCoordinator ScheduledExecutorFactory scheduledExecutorFactory, IndexingServiceClient indexingServiceClient, LoadQueueTaskMaster taskMaster, + ServiceAnnouncer serviceAnnouncer, + DruidNode self, ConcurrentMap loadQueuePeonMap ) { @@ -157,6 +168,8 @@ public class DruidCoordinator this.emitter = emitter; this.indexingServiceClient = indexingServiceClient; this.taskMaster = taskMaster; + this.serviceAnnouncer = serviceAnnouncer; + this.self = self; this.exec = scheduledExecutorFactory.create(1, "Coordinator-Exec--%d"); @@ -474,6 +487,7 @@ public class DruidCoordinator databaseSegmentManager.start(); databaseRuleManager.start(); serverInventoryView.start(); + serviceAnnouncer.announce(self); final List> coordinatorRunnables = Lists.newArrayList(); dynamicConfigs = configManager.watch( @@ -554,8 +568,10 @@ public class DruidCoordinator } loadManagementPeons.clear(); - databaseSegmentManager.stop(); + serviceAnnouncer.unannounce(self); serverInventoryView.stop(); + databaseRuleManager.stop(); + databaseSegmentManager.stop(); leader = false; } catch (Exception e) { diff --git a/server/src/main/java/io/druid/server/http/BackwardsCompatiableInfoResource.java b/server/src/main/java/io/druid/server/http/BackwardsCompatibleInfoResource.java similarity index 93% rename from server/src/main/java/io/druid/server/http/BackwardsCompatiableInfoResource.java rename to server/src/main/java/io/druid/server/http/BackwardsCompatibleInfoResource.java index 11fe97da9be..ed1cf580887 100644 --- a/server/src/main/java/io/druid/server/http/BackwardsCompatiableInfoResource.java +++ b/server/src/main/java/io/druid/server/http/BackwardsCompatibleInfoResource.java @@ -32,10 +32,10 @@ import javax.ws.rs.Path; /** */ @Path("/static/info") -public class BackwardsCompatiableInfoResource extends InfoResource +public class BackwardsCompatibleInfoResource extends InfoResource { @Inject - public BackwardsCompatiableInfoResource( + public BackwardsCompatibleInfoResource( DruidCoordinator coordinator, InventoryView serverInventoryView, DatabaseSegmentManager databaseSegmentManager, diff --git a/server/src/main/java/io/druid/timeline/partition/LinearShardSpec.java b/server/src/main/java/io/druid/timeline/partition/LinearShardSpec.java index 5b716b20a73..ea7d3256229 100644 --- a/server/src/main/java/io/druid/timeline/partition/LinearShardSpec.java +++ b/server/src/main/java/io/druid/timeline/partition/LinearShardSpec.java @@ -24,8 +24,6 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Preconditions; import io.druid.data.input.InputRow; -import java.util.Map; - public class LinearShardSpec implements ShardSpec { private int partitionNum; @@ -53,4 +51,12 @@ public class LinearShardSpec implements ShardSpec public boolean isInChunk(InputRow inputRow) { return true; } + + @Override + public String toString() + { + return "LinearShardSpec{" + + "partitionNum=" + partitionNum + + '}'; + } } diff --git a/server/src/main/java/io/druid/timeline/partition/NumberedShardSpec.java b/server/src/main/java/io/druid/timeline/partition/NumberedShardSpec.java index 7c09787b5cc..73a3437a80a 100644 --- a/server/src/main/java/io/druid/timeline/partition/NumberedShardSpec.java +++ b/server/src/main/java/io/druid/timeline/partition/NumberedShardSpec.java @@ -25,8 +25,6 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Preconditions; import io.druid.data.input.InputRow; -import java.util.Map; - public class NumberedShardSpec implements ShardSpec { @JsonIgnore @@ -71,4 +69,13 @@ public class NumberedShardSpec implements ShardSpec { return true; } + + @Override + public String toString() + { + return "NumberedShardSpec{" + + "partitionNum=" + partitionNum + + ", partitions=" + partitions + + '}'; + } } diff --git a/server/src/test/java/io/druid/curator/inventory/CuratorInventoryManagerTest.java b/server/src/test/java/io/druid/curator/inventory/CuratorInventoryManagerTest.java index 1c13475912c..e325b2c59df 100644 --- a/server/src/test/java/io/druid/curator/inventory/CuratorInventoryManagerTest.java +++ b/server/src/test/java/io/druid/curator/inventory/CuratorInventoryManagerTest.java @@ -29,6 +29,7 @@ import org.apache.curator.framework.api.CuratorEventType; import org.apache.curator.framework.api.CuratorListener; import org.apache.zookeeper.CreateMode; import org.apache.zookeeper.Watcher; +import org.junit.After; import org.junit.Assert; import org.junit.Before; import org.junit.Test; @@ -50,6 +51,12 @@ public class CuratorInventoryManagerTest extends io.druid.curator.CuratorTestBas exec = Execs.singleThreaded("curator-inventory-manager-test-%s"); } + @After + public void tearDown() throws Exception + { + tearDownServerAndCurator(); + } + @Test public void testSanity() throws Exception { diff --git a/server/src/test/java/io/druid/initialization/InitializationTest.java b/server/src/test/java/io/druid/initialization/InitializationTest.java new file mode 100644 index 00000000000..d1cb0afd0c5 --- /dev/null +++ b/server/src/test/java/io/druid/initialization/InitializationTest.java @@ -0,0 +1,152 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012, 2013 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package io.druid.initialization; + +import com.fasterxml.jackson.databind.Module; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.base.Function; +import com.google.common.collect.Collections2; +import com.google.common.collect.ImmutableList; +import com.google.inject.Binder; +import com.google.inject.Injector; +import io.druid.server.initialization.ExtensionsConfig; +import junit.framework.Assert; +import org.junit.After; +import org.junit.Before; +import org.junit.FixMethodOrder; +import org.junit.Test; +import org.junit.runners.MethodSorters; + +import javax.annotation.Nullable; +import java.util.Collection; +import java.util.List; +import java.util.Set; + +@FixMethodOrder(MethodSorters.NAME_ASCENDING) +public class InitializationTest +{ + private String oldService; + private String oldHost; + private String oldPort; + + @Before + public void messWithSystemProperties() + { + // required to test Initialization.makeInjectorWithModules + oldService = System.setProperty("druid.service", "test-service"); + oldHost = System.setProperty("druid.host", "test-host"); + oldPort = System.setProperty("druid.port", "8080"); + } + + @After + public void cleanup() + { + System.setProperty("druid.service", oldService == null ? "" : oldService); + System.setProperty("druid.host", oldHost == null ? "" : oldHost); + System.setProperty("druid.port", oldPort == null ? "" : oldPort); + } + + @Test + public void test01InitialModulesEmpty() throws Exception + { + Assert.assertEquals( + "Initial set of loaded modules must be empty", + 0, + Initialization.getLoadedModules(DruidModule.class).size() + ); + } + + @Test + public void test02MakeStartupInjector() throws Exception + { + Injector startupInjector = Initialization.makeStartupInjector(); + Assert.assertNotNull(startupInjector); + Assert.assertNotNull(startupInjector.getInstance(ObjectMapper.class)); + } + + @Test + public void test03ClassLoaderExtensionsLoading() + { + Injector startupInjector = Initialization.makeStartupInjector(); + + Function fnClassName = new Function() + { + @Nullable + @Override + public String apply(@Nullable DruidModule input) + { + return input.getClass().getCanonicalName(); + } + }; + + Assert.assertFalse( + "modules does not contain TestDruidModule", + Collections2.transform(Initialization.getLoadedModules(DruidModule.class), fnClassName) + .contains("io.druid.initialization.InitializationTest.TestDruidModule") + ); + + Collection modules = Initialization.getFromExtensions( + startupInjector.getInstance(ExtensionsConfig.class), + DruidModule.class + ); + + Assert.assertTrue( + "modules contains TestDruidModule", + Collections2.transform(modules, fnClassName) + .contains("io.druid.initialization.InitializationTest.TestDruidModule") + ); + } + + @Test + public void test04MakeInjectorWithModules() throws Exception + { + Injector startupInjector = Initialization.makeStartupInjector(); + Injector injector = Initialization.makeInjectorWithModules(startupInjector, ImmutableList.of()); + Assert.assertNotNull(injector); + } + + @Test + public void testGetLoadedModules() + { + + Set modules = Initialization.getLoadedModules(DruidModule.class); + + Set loadedModules = Initialization.getLoadedModules(DruidModule.class); + Assert.assertEquals("Set from loaded modules #1 should be same!", modules, loadedModules); + + Set loadedModules2 = Initialization.getLoadedModules(DruidModule.class); + Assert.assertEquals("Set from loaded modules #2 should be same!", modules, loadedModules2); + } + + public static class TestDruidModule implements DruidModule + { + @Override + public List getJacksonModules() + { + return ImmutableList.of(); + } + + @Override + public void configure(Binder binder) + { + // Do nothing + } + } +} diff --git a/server/src/test/java/io/druid/server/StatusResourceTest.java b/server/src/test/java/io/druid/server/StatusResourceTest.java new file mode 100644 index 00000000000..9075f97ce81 --- /dev/null +++ b/server/src/test/java/io/druid/server/StatusResourceTest.java @@ -0,0 +1,59 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012, 2013 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package io.druid.server; + +import com.google.common.collect.ImmutableList; +import io.druid.initialization.DruidModule; +import io.druid.initialization.InitializationTest; +import junit.framework.Assert; +import org.junit.Test; + +import java.util.Collection; +import java.util.List; + +import static io.druid.server.StatusResource.ModuleVersion; + +/** + */ +public class StatusResourceTest +{ + @Test + public void testLoadedModules() + { + + Collection modules = ImmutableList.of((DruidModule)new InitializationTest.TestDruidModule()); + List statusResourceModuleList = new StatusResource.Status(modules).getModules(); + + Assert.assertEquals("Status should have all modules loaded!", modules.size(), statusResourceModuleList.size()); + + for (DruidModule module : modules) { + String moduleName = module.getClass().getCanonicalName(); + + boolean contains = Boolean.FALSE; + for (ModuleVersion version : statusResourceModuleList) { + if (version.getName().equals(moduleName)) { + contains = Boolean.TRUE; + } + } + Assert.assertTrue("Status resource should contain module " + moduleName, contains); + } + } +} + diff --git a/server/src/test/java/io/druid/server/coordinator/DruidCoordinatorTest.java b/server/src/test/java/io/druid/server/coordinator/DruidCoordinatorTest.java index 8f55a93948c..58323faa863 100644 --- a/server/src/test/java/io/druid/server/coordinator/DruidCoordinatorTest.java +++ b/server/src/test/java/io/druid/server/coordinator/DruidCoordinatorTest.java @@ -23,8 +23,10 @@ import com.google.common.collect.MapMaker; import com.metamx.common.concurrent.ScheduledExecutorFactory; import io.druid.client.DruidServer; import io.druid.client.SingleServerInventoryView; +import io.druid.curator.discovery.NoopServiceAnnouncer; import io.druid.curator.inventory.InventoryManagerConfig; import io.druid.db.DatabaseSegmentManager; +import io.druid.server.DruidNode; import io.druid.server.initialization.ZkPathsConfig; import io.druid.server.metrics.NoopServiceEmitter; import io.druid.timeline.DataSegment; @@ -111,6 +113,8 @@ public class DruidCoordinatorTest scheduledExecutorFactory, null, taskMaster, + new NoopServiceAnnouncer(), + new DruidNode("hey", "what", 1234), loadManagementPeons ); } diff --git a/server/src/test/resources/META-INF/services/io.druid.initialization.DruidModule b/server/src/test/resources/META-INF/services/io.druid.initialization.DruidModule new file mode 100644 index 00000000000..b5bc03d5265 --- /dev/null +++ b/server/src/test/resources/META-INF/services/io.druid.initialization.DruidModule @@ -0,0 +1 @@ +io.druid.initialization.InitializationTest$TestDruidModule diff --git a/services/pom.xml b/services/pom.xml index 0dcad5375c3..fba4cc40f45 100644 --- a/services/pom.xml +++ b/services/pom.xml @@ -27,7 +27,7 @@ io.druid druid - 0.6.27-SNAPSHOT + 0.6.48-SNAPSHOT @@ -51,12 +51,24 @@ ${project.parent.version} + + + maven-jar-plugin + + + + true + true + + + + org.apache.maven.plugins maven-shade-plugin - 1.6 + 2.2 package @@ -77,6 +89,9 @@ + + + diff --git a/services/src/main/java/io/druid/cli/CliBroker.java b/services/src/main/java/io/druid/cli/CliBroker.java index 17ab46de12c..ec37aefbe82 100644 --- a/services/src/main/java/io/druid/cli/CliBroker.java +++ b/services/src/main/java/io/druid/cli/CliBroker.java @@ -53,7 +53,7 @@ import java.util.List; */ @Command( name = "broker", - description = "Runs a broker node, see http://druid.io/docs/0.6.26/Broker.html for a description" + description = "Runs a broker node, see http://druid.io/docs/0.6.46/Broker.html for a description" ) public class CliBroker extends ServerRunnable { diff --git a/services/src/main/java/io/druid/cli/CliCoordinator.java b/services/src/main/java/io/druid/cli/CliCoordinator.java index 937d11a88d7..27c3f8021ab 100644 --- a/services/src/main/java/io/druid/cli/CliCoordinator.java +++ b/services/src/main/java/io/druid/cli/CliCoordinator.java @@ -28,7 +28,6 @@ import com.metamx.common.concurrent.ScheduledExecutorFactory; import com.metamx.common.logger.Logger; import io.airlift.command.Command; import io.druid.client.indexing.IndexingServiceClient; -import io.druid.curator.discovery.DiscoveryModule; import io.druid.db.DatabaseRuleManager; import io.druid.db.DatabaseRuleManagerConfig; import io.druid.db.DatabaseRuleManagerProvider; @@ -41,18 +40,16 @@ import io.druid.guice.JsonConfigProvider; import io.druid.guice.LazySingleton; import io.druid.guice.LifecycleModule; import io.druid.guice.ManageLifecycle; -import io.druid.guice.annotations.Self; import io.druid.server.coordinator.DruidCoordinator; import io.druid.server.coordinator.DruidCoordinatorConfig; import io.druid.server.coordinator.LoadQueueTaskMaster; -import io.druid.server.http.BackwardsCompatiableInfoResource; +import io.druid.server.http.BackwardsCompatibleInfoResource; import io.druid.server.http.CoordinatorDynamicConfigsResource; import io.druid.server.http.CoordinatorRedirectInfo; import io.druid.server.http.CoordinatorResource; import io.druid.server.http.InfoResource; import io.druid.server.http.RedirectFilter; import io.druid.server.http.RedirectInfo; -import io.druid.server.http.RedirectServlet; import io.druid.server.initialization.JettyServerInitializer; import org.apache.curator.framework.CuratorFramework; import org.eclipse.jetty.server.Server; @@ -63,7 +60,7 @@ import java.util.List; */ @Command( name = "coordinator", - description = "Runs the Coordinator, see http://druid.io/docs/0.6.26/Coordinator.html for a description." + description = "Runs the Coordinator, see http://druid.io/docs/0.6.46/Coordinator.html for a description." ) public class CliCoordinator extends ServerRunnable { @@ -88,8 +85,8 @@ public class CliCoordinator extends ServerRunnable JsonConfigProvider.bind(binder, "druid.manager.segments", DatabaseSegmentManagerConfig.class); JsonConfigProvider.bind(binder, "druid.manager.rules", DatabaseRuleManagerConfig.class); - binder.bind(RedirectServlet.class).in(LazySingleton.class); binder.bind(RedirectFilter.class).in(LazySingleton.class); + binder.bind(RedirectInfo.class).to(CoordinatorRedirectInfo.class).in(LazySingleton.class); binder.bind(DatabaseSegmentManager.class) .toProvider(DatabaseSegmentManagerProvider.class) @@ -101,15 +98,12 @@ public class CliCoordinator extends ServerRunnable binder.bind(IndexingServiceClient.class).in(LazySingleton.class); - binder.bind(RedirectInfo.class).to(CoordinatorRedirectInfo.class).in(LazySingleton.class); - binder.bind(DruidCoordinator.class); LifecycleModule.register(binder, DruidCoordinator.class); - DiscoveryModule.register(binder, Self.class); binder.bind(JettyServerInitializer.class).toInstance(new CoordinatorJettyServerInitializer()); - Jerseys.addResource(binder, BackwardsCompatiableInfoResource.class); + Jerseys.addResource(binder, BackwardsCompatibleInfoResource.class); Jerseys.addResource(binder, InfoResource.class); Jerseys.addResource(binder, CoordinatorResource.class); Jerseys.addResource(binder, CoordinatorDynamicConfigsResource.class); diff --git a/services/src/main/java/io/druid/cli/CliHadoopIndexer.java b/services/src/main/java/io/druid/cli/CliHadoopIndexer.java index 580bdd8f3da..a667b967b30 100644 --- a/services/src/main/java/io/druid/cli/CliHadoopIndexer.java +++ b/services/src/main/java/io/druid/cli/CliHadoopIndexer.java @@ -41,7 +41,7 @@ import java.util.List; */ @Command( name = "hadoop", - description = "Runs the batch Hadoop Druid Indexer, see http://druid.io/docs/0.6.26/Batch-ingestion.html for a description." + description = "Runs the batch Hadoop Druid Indexer, see http://druid.io/docs/0.6.46/Batch-ingestion.html for a description." ) public class CliHadoopIndexer implements Runnable { diff --git a/services/src/main/java/io/druid/cli/CliHistorical.java b/services/src/main/java/io/druid/cli/CliHistorical.java index 15abef7cae5..2be3c013820 100644 --- a/services/src/main/java/io/druid/cli/CliHistorical.java +++ b/services/src/main/java/io/druid/cli/CliHistorical.java @@ -42,7 +42,7 @@ import java.util.List; */ @Command( name = "historical", - description = "Runs a Historical node, see http://druid.io/docs/0.6.26/Historical.html for a description" + description = "Runs a Historical node, see http://druid.io/docs/0.6.46/Historical.html for a description" ) public class CliHistorical extends ServerRunnable { diff --git a/services/src/main/java/io/druid/cli/CliMiddleManager.java b/services/src/main/java/io/druid/cli/CliMiddleManager.java index 95319e7ee55..d0867b42aa6 100644 --- a/services/src/main/java/io/druid/cli/CliMiddleManager.java +++ b/services/src/main/java/io/druid/cli/CliMiddleManager.java @@ -28,6 +28,7 @@ import com.metamx.common.logger.Logger; import io.airlift.command.Command; import io.druid.guice.IndexingServiceFirehoseModule; import io.druid.guice.IndexingServiceModuleHelper; +import io.druid.guice.IndexingServiceTaskLogsModule; import io.druid.guice.Jerseys; import io.druid.guice.JsonConfigProvider; import io.druid.guice.LazySingleton; @@ -103,7 +104,8 @@ public class CliMiddleManager extends ServerRunnable ); } }, - new IndexingServiceFirehoseModule() + new IndexingServiceFirehoseModule(), + new IndexingServiceTaskLogsModule() ); } } diff --git a/services/src/main/java/io/druid/cli/CliOverlord.java b/services/src/main/java/io/druid/cli/CliOverlord.java index c0a2d14b54c..020178f7b33 100644 --- a/services/src/main/java/io/druid/cli/CliOverlord.java +++ b/services/src/main/java/io/druid/cli/CliOverlord.java @@ -32,6 +32,7 @@ import com.metamx.common.logger.Logger; import io.airlift.command.Command; import io.druid.guice.IndexingServiceFirehoseModule; import io.druid.guice.IndexingServiceModuleHelper; +import io.druid.guice.IndexingServiceTaskLogsModule; import io.druid.guice.JacksonConfigProvider; import io.druid.guice.Jerseys; import io.druid.guice.JsonConfigProvider; @@ -43,6 +44,7 @@ import io.druid.guice.PolyBind; import io.druid.indexing.common.actions.LocalTaskActionClientFactory; import io.druid.indexing.common.actions.TaskActionClientFactory; import io.druid.indexing.common.actions.TaskActionToolbox; +import io.druid.indexing.common.config.TaskStorageConfig; import io.druid.indexing.common.index.ChatHandlerProvider; import io.druid.indexing.common.tasklogs.SwitchingTaskLogStreamer; import io.druid.indexing.common.tasklogs.TaskRunnerTaskLogStreamer; @@ -53,11 +55,10 @@ import io.druid.indexing.overlord.IndexerDBCoordinator; import io.druid.indexing.overlord.RemoteTaskRunnerFactory; import io.druid.indexing.overlord.TaskLockbox; import io.druid.indexing.overlord.TaskMaster; -import io.druid.indexing.overlord.TaskQueue; import io.druid.indexing.overlord.TaskRunnerFactory; import io.druid.indexing.overlord.TaskStorage; import io.druid.indexing.overlord.TaskStorageQueryAdapter; -import io.druid.indexing.overlord.http.OldOverlordResource; +import io.druid.indexing.overlord.config.TaskQueueConfig; import io.druid.indexing.overlord.http.OverlordRedirectInfo; import io.druid.indexing.overlord.http.OverlordResource; import io.druid.indexing.overlord.scaling.AutoScalingStrategy; @@ -70,6 +71,7 @@ import io.druid.indexing.overlord.scaling.ResourceManagementStrategy; import io.druid.indexing.overlord.scaling.SimpleResourceManagementConfig; import io.druid.indexing.overlord.scaling.SimpleResourceManagementStrategy; import io.druid.indexing.overlord.setup.WorkerSetupData; +import io.druid.indexing.worker.config.WorkerConfig; import io.druid.server.http.RedirectFilter; import io.druid.server.http.RedirectInfo; import io.druid.server.initialization.JettyServerInitializer; @@ -77,9 +79,7 @@ import io.druid.tasklogs.TaskLogStreamer; import io.druid.tasklogs.TaskLogs; import org.eclipse.jetty.server.Handler; import org.eclipse.jetty.server.Server; -import org.eclipse.jetty.server.handler.DefaultHandler; import org.eclipse.jetty.server.handler.HandlerList; -import org.eclipse.jetty.server.handler.ResourceHandler; import org.eclipse.jetty.servlet.DefaultServlet; import org.eclipse.jetty.servlet.FilterHolder; import org.eclipse.jetty.servlet.ServletContextHandler; @@ -93,7 +93,7 @@ import java.util.List; */ @Command( name = "overlord", - description = "Runs an Overlord node, see http://druid.io/docs/0.6.26/Indexing-Service.html for a description" + description = "Runs an Overlord node, see http://druid.io/docs/0.6.46/Indexing-Service.html for a description" ) public class CliOverlord extends ServerRunnable { @@ -113,10 +113,16 @@ public class CliOverlord extends ServerRunnable @Override public void configure(Binder binder) { + JsonConfigProvider.bind(binder, "druid.indexer.queue", TaskQueueConfig.class); + binder.bind(TaskMaster.class).in(ManageLifecycle.class); binder.bind(TaskLogStreamer.class).to(SwitchingTaskLogStreamer.class).in(LazySingleton.class); - binder.bind(new TypeLiteral>(){}) + binder.bind( + new TypeLiteral>() + { + } + ) .toProvider( new ListProvider() .add(TaskRunnerTaskLogStreamer.class) @@ -126,7 +132,6 @@ public class CliOverlord extends ServerRunnable binder.bind(TaskActionClientFactory.class).to(LocalTaskActionClientFactory.class).in(LazySingleton.class); binder.bind(TaskActionToolbox.class).in(LazySingleton.class); - binder.bind(TaskQueue.class).in(LazySingleton.class); // Lifecycle managed by TaskMaster instead binder.bind(IndexerDBCoordinator.class).in(LazySingleton.class); binder.bind(TaskLockbox.class).in(LazySingleton.class); binder.bind(TaskStorageQueryAdapter.class).in(LazySingleton.class); @@ -145,17 +150,21 @@ public class CliOverlord extends ServerRunnable binder.bind(JettyServerInitializer.class).toInstance(new OverlordJettyServerInitializer()); Jerseys.addResource(binder, OverlordResource.class); - Jerseys.addResource(binder, OldOverlordResource.class); LifecycleModule.register(binder, Server.class); } private void configureTaskStorage(Binder binder) { + JsonConfigProvider.bind(binder, "druid.indexer.storage", TaskStorageConfig.class); + PolyBind.createChoice( binder, "druid.indexer.storage.type", Key.get(TaskStorage.class), Key.get(HeapMemoryTaskStorage.class) ); - final MapBinder storageBinder = PolyBind.optionBinder(binder, Key.get(TaskStorage.class)); + final MapBinder storageBinder = PolyBind.optionBinder( + binder, + Key.get(TaskStorage.class) + ); storageBinder.addBinding("local").to(HeapMemoryTaskStorage.class); binder.bind(HeapMemoryTaskStorage.class).in(LazySingleton.class); @@ -166,13 +175,18 @@ public class CliOverlord extends ServerRunnable private void configureRunners(Binder binder) { + JsonConfigProvider.bind(binder, "druid.worker", WorkerConfig.class); + PolyBind.createChoice( binder, "druid.indexer.runner.type", Key.get(TaskRunnerFactory.class), Key.get(ForkingTaskRunnerFactory.class) ); - final MapBinder biddy = PolyBind.optionBinder(binder, Key.get(TaskRunnerFactory.class)); + final MapBinder biddy = PolyBind.optionBinder( + binder, + Key.get(TaskRunnerFactory.class) + ); IndexingServiceModuleHelper.configureTaskRunnerConfigs(binder); biddy.addBinding("local").to(ForkingTaskRunnerFactory.class); @@ -185,7 +199,9 @@ public class CliOverlord extends ServerRunnable private void configureAutoscale(Binder binder) { JsonConfigProvider.bind(binder, "druid.indexer.autoscale", ResourceManagementSchedulerConfig.class); - binder.bind(ResourceManagementStrategy.class).to(SimpleResourceManagementStrategy.class).in(LazySingleton.class); + binder.bind(ResourceManagementStrategy.class) + .to(SimpleResourceManagementStrategy.class) + .in(LazySingleton.class); JacksonConfigProvider.bind(binder, WorkerSetupData.CONFIG_KEY, WorkerSetupData.class, null); @@ -208,19 +224,24 @@ public class CliOverlord extends ServerRunnable JsonConfigProvider.bind(binder, "druid.indexer.autoscale", SimpleResourceManagementConfig.class); } }, - new IndexingServiceFirehoseModule() + new IndexingServiceFirehoseModule(), + new IndexingServiceTaskLogsModule() ); } /** - */ + */ private static class OverlordJettyServerInitializer implements JettyServerInitializer { @Override public void initialize(Server server, Injector injector) { - ResourceHandler resourceHandler = new ResourceHandler(); - resourceHandler.setBaseResource( + final ServletContextHandler root = new ServletContextHandler(ServletContextHandler.SESSIONS); + + ServletHolder holderPwd = new ServletHolder("default", DefaultServlet.class); + + root.addServlet(holderPwd, "/"); + root.setBaseResource( new ResourceCollection( new String[]{ TaskMaster.class.getClassLoader().getResource("static").toExternalForm(), @@ -228,18 +249,17 @@ public class CliOverlord extends ServerRunnable } ) ); + root.addFilter(new FilterHolder(injector.getInstance(RedirectFilter.class)), "/*", null); + root.addFilter(GzipFilter.class, "/*", null); - final ServletContextHandler root = new ServletContextHandler(ServletContextHandler.SESSIONS); - root.setContextPath("/"); + // Can't use /* here because of Guice and Jetty static content conflicts + root.addFilter(GuiceFilter.class, "/status/*", null); + root.addFilter(GuiceFilter.class, "/druid/*", null); HandlerList handlerList = new HandlerList(); - handlerList.setHandlers(new Handler[]{resourceHandler, root, new DefaultHandler()}); - server.setHandler(handlerList); + handlerList.setHandlers(new Handler[]{root}); - root.addServlet(new ServletHolder(new DefaultServlet()), "/*"); - root.addFilter(GzipFilter.class, "/*", null); - root.addFilter(new FilterHolder(injector.getInstance(RedirectFilter.class)), "/*", null); - root.addFilter(GuiceFilter.class, "/*", null); + server.setHandler(handlerList); } } } diff --git a/services/src/main/java/io/druid/cli/CliPeon.java b/services/src/main/java/io/druid/cli/CliPeon.java index db60016fc4a..7204e5fc63a 100644 --- a/services/src/main/java/io/druid/cli/CliPeon.java +++ b/services/src/main/java/io/druid/cli/CliPeon.java @@ -53,7 +53,6 @@ import io.druid.indexing.common.index.NoopChatHandlerProvider; import io.druid.indexing.common.index.ServiceAnnouncingChatHandlerProvider; import io.druid.indexing.overlord.HeapMemoryTaskStorage; import io.druid.indexing.overlord.IndexerDBCoordinator; -import io.druid.indexing.overlord.TaskQueue; import io.druid.indexing.overlord.TaskRunner; import io.druid.indexing.overlord.TaskStorage; import io.druid.indexing.overlord.ThreadPoolTaskRunner; @@ -61,8 +60,12 @@ import io.druid.indexing.worker.executor.ChatHandlerResource; import io.druid.indexing.worker.executor.ExecutorLifecycle; import io.druid.indexing.worker.executor.ExecutorLifecycleConfig; import io.druid.query.QuerySegmentWalker; +import io.druid.segment.loading.DataSegmentArchiver; import io.druid.segment.loading.DataSegmentKiller; +import io.druid.segment.loading.DataSegmentMover; +import io.druid.segment.loading.OmniDataSegmentArchiver; import io.druid.segment.loading.OmniDataSegmentKiller; +import io.druid.segment.loading.OmniDataSegmentMover; import io.druid.segment.loading.SegmentLoaderConfig; import io.druid.segment.loading.StorageLocationConfig; import io.druid.server.QueryResource; @@ -130,6 +133,10 @@ public class CliPeon extends GuiceRunnable // Build it to make it bind even if nothing binds to it. Binders.dataSegmentKillerBinder(binder); binder.bind(DataSegmentKiller.class).to(OmniDataSegmentKiller.class).in(LazySingleton.class); + Binders.dataSegmentMoverBinder(binder); + binder.bind(DataSegmentMover.class).to(OmniDataSegmentMover.class).in(LazySingleton.class); + Binders.dataSegmentArchiverBinder(binder); + binder.bind(DataSegmentArchiver.class).to(OmniDataSegmentArchiver.class).in(LazySingleton.class); binder.bind(ExecutorLifecycle.class).in(ManageLifecycle.class); binder.bind(ExecutorLifecycleConfig.class).toInstance( @@ -174,7 +181,6 @@ public class CliPeon extends GuiceRunnable .to(LocalTaskActionClientFactory.class).in(LazySingleton.class); // all of these bindings are so that we can run the peon in local mode binder.bind(TaskStorage.class).to(HeapMemoryTaskStorage.class).in(LazySingleton.class); - binder.bind(TaskQueue.class).in(LazySingleton.class); binder.bind(TaskActionToolbox.class).in(LazySingleton.class); binder.bind(IndexerDBCoordinator.class).in(LazySingleton.class); taskActionBinder.addBinding("remote") diff --git a/services/src/main/java/io/druid/cli/CliRealtime.java b/services/src/main/java/io/druid/cli/CliRealtime.java index e01a1f62bac..bfc05eb748c 100644 --- a/services/src/main/java/io/druid/cli/CliRealtime.java +++ b/services/src/main/java/io/druid/cli/CliRealtime.java @@ -30,7 +30,7 @@ import java.util.List; */ @Command( name = "realtime", - description = "Runs a realtime node, see http://druid.io/docs/0.6.26/Realtime.html for a description" + description = "Runs a realtime node, see http://druid.io/docs/0.6.46/Realtime.html for a description" ) public class CliRealtime extends ServerRunnable { diff --git a/services/src/main/java/io/druid/cli/CliRealtimeExample.java b/services/src/main/java/io/druid/cli/CliRealtimeExample.java index 659810cffe6..dd2605140c2 100644 --- a/services/src/main/java/io/druid/cli/CliRealtimeExample.java +++ b/services/src/main/java/io/druid/cli/CliRealtimeExample.java @@ -42,7 +42,7 @@ import java.util.concurrent.Executor; */ @Command( name = "realtime", - description = "Runs a standalone realtime node for examples, see http://druid.io/docs/0.6.26/Realtime.html for a description" + description = "Runs a standalone realtime node for examples, see http://druid.io/docs/0.6.46/Realtime.html for a description" ) public class CliRealtimeExample extends ServerRunnable { diff --git a/services/src/main/java/io/druid/cli/CoordinatorJettyServerInitializer.java b/services/src/main/java/io/druid/cli/CoordinatorJettyServerInitializer.java index bf5fcd14cc8..be7a59ea2a8 100644 --- a/services/src/main/java/io/druid/cli/CoordinatorJettyServerInitializer.java +++ b/services/src/main/java/io/druid/cli/CoordinatorJettyServerInitializer.java @@ -26,9 +26,7 @@ import io.druid.server.http.RedirectFilter; import io.druid.server.initialization.JettyServerInitializer; import org.eclipse.jetty.server.Handler; import org.eclipse.jetty.server.Server; -import org.eclipse.jetty.server.handler.DefaultHandler; import org.eclipse.jetty.server.handler.HandlerList; -import org.eclipse.jetty.server.handler.ResourceHandler; import org.eclipse.jetty.servlet.DefaultServlet; import org.eclipse.jetty.servlet.FilterHolder; import org.eclipse.jetty.servlet.ServletContextHandler; @@ -36,25 +34,30 @@ import org.eclipse.jetty.servlet.ServletHolder; import org.eclipse.jetty.servlets.GzipFilter; /** -*/ + */ class CoordinatorJettyServerInitializer implements JettyServerInitializer { @Override public void initialize(Server server, Injector injector) { - ResourceHandler resourceHandler = new ResourceHandler(); - resourceHandler.setResourceBase(DruidCoordinator.class.getClassLoader().getResource("static").toExternalForm()); - final ServletContextHandler root = new ServletContextHandler(ServletContextHandler.SESSIONS); - root.setContextPath("/"); + + ServletHolder holderPwd = new ServletHolder("default", DefaultServlet.class); + + root.addServlet(holderPwd, "/"); + root.setResourceBase(DruidCoordinator.class.getClassLoader().getResource("static").toExternalForm()); + root.addFilter(new FilterHolder(injector.getInstance(RedirectFilter.class)), "/*", null); + root.addFilter(GzipFilter.class, "/*", null); + + // Can't use '/*' here because of Guice and Jetty static content conflicts + // The coordinator really needs a standarized api path + root.addFilter(GuiceFilter.class, "/status/*", null); + root.addFilter(GuiceFilter.class, "/info/*", null); + root.addFilter(GuiceFilter.class, "/coordinator/*", null); HandlerList handlerList = new HandlerList(); - handlerList.setHandlers(new Handler[]{resourceHandler, root, new DefaultHandler()}); - server.setHandler(handlerList); + handlerList.setHandlers(new Handler[]{root}); - root.addServlet(new ServletHolder(new DefaultServlet()), "/*"); - root.addFilter(GzipFilter.class, "/*", null); - root.addFilter(new FilterHolder(injector.getInstance(RedirectFilter.class)), "/*", null); - root.addFilter(GuiceFilter.class, "/*", null); + server.setHandler(handlerList); } } diff --git a/services/src/main/java/io/druid/cli/Main.java b/services/src/main/java/io/druid/cli/Main.java index 551acea0706..8fc06ffcdc5 100644 --- a/services/src/main/java/io/druid/cli/Main.java +++ b/services/src/main/java/io/druid/cli/Main.java @@ -27,7 +27,10 @@ import io.druid.cli.convert.ConvertProperties; import io.druid.cli.validate.DruidJsonValidator; import io.druid.initialization.Initialization; import io.druid.server.initialization.ExtensionsConfig; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import java.util.Collection; import java.util.List; /** @@ -41,7 +44,7 @@ public class Main builder.withDescription("Druid command-line runner.") .withDefaultCommand(Help.class) - .withCommands(Help.class); + .withCommands(Help.class, Version.class); builder.withGroup("server") .withDescription("Run one of the Druid server types.") @@ -73,7 +76,7 @@ public class Main final Injector injector = Initialization.makeStartupInjector(); final ExtensionsConfig config = injector.getInstance(ExtensionsConfig.class); - final List extensionCommands = Initialization.getFromExtensions(config, CliCommandCreator.class); + final Collection extensionCommands = Initialization.getFromExtensions(config, CliCommandCreator.class); for (CliCommandCreator creator : extensionCommands) { creator.addCommands(builder); diff --git a/services/src/main/java/io/druid/cli/Version.java b/services/src/main/java/io/druid/cli/Version.java new file mode 100644 index 00000000000..45c258de803 --- /dev/null +++ b/services/src/main/java/io/druid/cli/Version.java @@ -0,0 +1,38 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012, 2013 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package io.druid.cli; + +import io.airlift.command.Command; +import io.druid.initialization.DruidModule; +import io.druid.initialization.Initialization; +import io.druid.server.StatusResource; + +@Command( + name = "version", + description = "Returns Druid version information" +) +public class Version implements Runnable +{ + @Override + public void run() + { + System.out.println(new StatusResource.Status(Initialization.getLoadedModules(DruidModule.class))); + } +} diff --git a/services/src/main/java/io/druid/cli/convert/ConvertProperties.java b/services/src/main/java/io/druid/cli/convert/ConvertProperties.java index 3cc867700d0..fdbf2c9f2f0 100644 --- a/services/src/main/java/io/druid/cli/convert/ConvertProperties.java +++ b/services/src/main/java/io/druid/cli/convert/ConvertProperties.java @@ -178,7 +178,7 @@ public class ConvertProperties implements Runnable } updatedProps.setProperty( - "druid.monitoring.monitors", "[\"io.druid.server.metrics.ServerMonitor\", \"com.metamx.metrics.SysMonitor\"]" + "druid.monitoring.monitors", "[\"com.metamx.metrics.SysMonitor\"]" ); BufferedWriter out = null;