New extension loading mechanism

1) Remove maven client from downloading extensions at runtime.
2) Provide a way to load Druid extensions and hadoop dependencies through file system.
3) Refactor pull-deps so that it can download extensions into extension directories.
4) Add documents on how to use this new extension loading mechanism.
5) Change the way how Druid tarball is generated. Now all the extensions + hadoop-client 2.3.0
are packaged within the Druid tarball.
This commit is contained in:
Bingkun Guo 2015-07-07 22:51:44 -05:00
parent b7c68ec449
commit 4914925d65
27 changed files with 1323 additions and 561 deletions

View File

@ -38,24 +38,69 @@
<artifactId>druid-services</artifactId>
<version>${project.parent.version}</version>
</dependency>
<dependency>
<groupId>io.druid</groupId>
<artifactId>extensions-distribution</artifactId>
<version>${project.parent.version}</version>
<classifier>extensions-repo</classifier>
<type>zip</type>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
<executions>
<execution>
<phase>install</phase>
<goals>
<goal>exec</goal>
</goals>
<configuration>
<executable>java</executable>
<arguments>
<argument>-classpath</argument>
<classpath/>
<argument>-Ddruid.extensions.loadList=[]</argument>
<argument>io.druid.cli.Main</argument>
<argument>tools</argument>
<argument>pull-deps</argument>
<argument>--clean</argument>
<argument>--defaultVersion</argument>
<argument>${project.parent.version}</argument>
<argument>-c</argument>
<argument>io.druid.extensions:druid-examples</argument>
<argument>-c</argument>
<argument>io.druid.extensions:druid-azure-extensions</argument>
<argument>-c</argument>
<argument>io.druid.extensions:druid-cassandra-storage</argument>
<argument>-c</argument>
<argument>io.druid.extensions:druid-hdfs-storage</argument>
<argument>-c</argument>
<argument>io.druid.extensions:druid-histogram</argument>
<argument>-c</argument>
<argument>io.druid.extensions:druid-kafka-eight</argument>
<argument>-c</argument>
<argument>io.druid.extensions:druid-kafka-eight-simple-consumer</argument>
<argument>-c</argument>
<argument>io.druid.extensions:druid-kafka-extraction-namespace</argument>
<argument>-c</argument>
<argument>io.druid.extensions:mysql-metadata-storage</argument>
<argument>-c</argument>
<argument>io.druid.extensions:druid-namespace-lookup</argument>
<argument>-c</argument>
<argument>io.druid.extensions:postgresql-metadata-storage</argument>
<argument>-c</argument>
<argument>io.druid.extensions:druid-rabbitmq</argument>
<argument>-c</argument>
<argument>io.druid.extensions:druid-s3-extensions</argument>
</arguments>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<executions>
<execution>
<id>distro-assembly</id>
<phase>package</phase>
<phase>install</phase>
<goals>
<goal>single</goal>
</goals>
@ -67,6 +112,20 @@
</descriptors>
</configuration>
</execution>
<execution>
<id>mysql-distro-assembly</id>
<phase>install</phase>
<goals>
<goal>single</goal>
</goals>
<configuration>
<finalName>mysql-metdata-storage</finalName>
<tarLongFileMode>posix</tarLongFileMode>
<descriptors>
<descriptor>src/assembly/mysql_assembly.xml</descriptor>
</descriptors>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
@ -81,6 +140,20 @@
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-clean-plugin</artifactId>
<configuration>
<filesets>
<fileset>
<directory>${project.basedir}/druid_extensions</directory>
</fileset>
<fileset>
<directory>${project.basedir}/hadoop_druid_dependencies</directory>
</fileset>
</filesets>
</configuration>
</plugin>
</plugins>
</build>
</project>

View File

@ -24,6 +24,23 @@
<format>tar.gz</format>
</formats>
<fileSets>
<fileSet>
<directory>druid_extensions</directory>
<includes>
<include>*/*</include>
</includes>
<excludes>
<exclude>mysql-metadata-storage/**</exclude>
</excludes>
<outputDirectory>druid_extensions</outputDirectory>
</fileSet>
<fileSet>
<directory>hadoop_druid_dependencies</directory>
<includes>
<include>*/*/*</include>
</includes>
<outputDirectory>hadoop_druid_dependencies</outputDirectory>
</fileSet>
<fileSet>
<directory>../examples/config</directory>
<includes>

View File

@ -0,0 +1,35 @@
<?xml version="1.0"?>
<!--
~ Druid - a distributed column store.
~ Copyright 2012 - 2015 Metamarkets Group Inc.
~
~ Licensed under the Apache License, Version 2.0 (the "License");
~ you may not use this file except in compliance with the License.
~ You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.3"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.3 http://maven.apache.org/xsd/assembly-1.1.3.xsd">
<id>bin</id>
<formats>
<format>tar.gz</format>
</formats>
<fileSets>
<fileSet>
<directory>druid_extensions/mysql-metadata-storage</directory>
<includes>
<include>*</include>
</includes>
<outputDirectory>./</outputDirectory>
</fileSet>
</fileSets>
</assembly>

View File

@ -21,10 +21,9 @@ Many of Druid's external dependencies can be plugged in as modules. Extensions c
|Property|Description|Default|
|--------|-----------|-------|
|`druid.extensions.remoteRepositories`|This is a JSON Array list of remote repositories to load dependencies from. If this is not set to '[]', Druid will try to download extensions at the specified remote repository.|["http://repo1.maven.org/maven2/", "https://metamx.artifactoryonline.com/metamx/pub-libs-releases-local"]|
|`druid.extensions.localRepository`|. The way maven gets dependencies is that it downloads them to a "local repository" on your local disk and then collects the paths to each of the jars. This specifies the directory to consider the "local repository". If this is set, remoteRepositories is not required.|`~/.m2/repository`|
|`druid.extensions.coordinates`|This is a JSON array of "groupId:artifactId[:version]" maven coordinates. For artifacts without version specified, Druid will append the default version. Notice: extensions explicitly specified in this property will have precedence over ones included in the classpath when Druid loads extensions. If there are duplicate extensions, Druid will only load ones explicitly specified here|[]|
|`druid.extensions.defaultVersion`|Version to use for extension artifacts without version information.|`druid-server` artifact version.|
|`druid.extensions.directory`|The root extension directory where user can put extensions related files. Druid will load extensions stored under this directory.|`druid_extensions` (This is a relative path to Druid's working directory)|
|`druid.extensions.hadoopDependenciesDir`|The root hadoop dependencies directory where user can put hadoop related dependencies files. Druid will load the dependencies based on the hadoop coordinate specified in the hadoop index task.|`hadoop_druid_dependencies` (This is a relative path to Druid's working directory|
|`druid.extensions.loadList`|A JSON array of extensions to load from extension directories by Druid. If it is not specified, its value will be `null` and Druid will load all the extensions under `druid.extensions.directory`. If its value is empty list `[]`, then no extensions will be loaded at all.|null|
|`druid.extensions.searchCurrentClassloader`|This is a boolean flag that determines if Druid will search the main classloader for extensions. It defaults to true but can be turned off if you have reason to not automatically add all modules on the classpath.|true|
### Zookeeper

View File

@ -15,8 +15,13 @@ The following metadata storage engines are supported:
* MySQL (io.druid.extensions:mysql-metadata-storage)
* PostgreSQL (io.druid.extensions:postgresql-metadata-storage)
To choose a metadata storage, set the `druid.extensions` configuration to
include the extension for the metadata storage you plan to use.
To choose a metadata storage,
1. Make sure Druid can pick up the extension files from either classpath or
extensions directory, see [Including Extensions](../operations/including-extensions.html) for more information.
2. set the `druid.extensions` configuration to include the extension for the
metadata storage you plan to use. See below.
## Setting up MySQL
@ -55,13 +60,18 @@ include the extension for the metadata storage you plan to use.
with the hostname of the database.
```properties
druid.extensions.coordinates=[\"io.druid.extensions:mysql-metadata-storage"]
druid.extensions.loadList=["mysql-metadata-storage"]
druid.metadata.storage.type=mysql
druid.metadata.storage.connector.connectURI=jdbc:mysql://<host>/druid_test
druid.metadata.storage.connector.user=druid
druid.metadata.storage.connector.password=diurd
```
Note: metadata storage extension is not packaged within the main Druid tarball, it is
packaged in a separate tarball that can be downloaded from [here](http://druid.io/downloads.html).
However, you can always get it using [pull-deps](../pull-deps.html), or you can even build
it from source code, see [Build from Source](../development/build.html)
## Setting up PostgreSQL
1. Install PostgreSQL
@ -97,7 +107,7 @@ include the extension for the metadata storage you plan to use.
with the hostname of the database.
```properties
druid.extensions.coordinates=[\"io.druid.extensions:postgresql-metadata-storage"]
druid.extensions.loadList=["postgresql-metadata-storage"]
druid.metadata.storage.type=postgresql
druid.metadata.storage.connector.connectURI=jdbc:postgresql://<host>/druid_test
druid.metadata.storage.connector.user=druid

View File

@ -16,11 +16,14 @@ To do so, run these commands:
```
git clone git@github.com:druid-io/druid.git
cd druid
mvn clean package
mvn clean install
```
This will compile the project and create the Druid binary distribution tar under
`services/target/druid-VERSION-bin.tar.gz`.
`distribution/target/druid-VERSION-bin.tar.gz`.
This will also create a tarball that contains `mysql-metadata-storage` extension under
`distribution/target/mysql-metdata-storage-bin.tar.gz`. If you want Druid to load `mysql-metadata-storage`, you can first untar `druid-VERSION-bin.tar.gz`, then go to ```druid-<version>/druid_extensions```, untar `mysql-metdata-storage-bin.tar.gz` there. Now just specifiy `mysql-metadata-storage` in `druid.extensions.loadList` so that Druid will pick it up. See [Including Extensions](../operations/including-extensions.html) for more infomation.
You can find the example executables in the examples/bin directory:

View File

@ -413,7 +413,7 @@ The tuningConfig is optional and default parameters will be used if no tuningCon
### Running the Task
The Hadoop Index Config submitted as part of an Hadoop Index Task is identical to the Hadoop Index Config used by the `HadoopDruidIndexer` except that three fields must be omitted: `segmentOutputPath`, `workingPath`, `updaterJobSpec`. The Indexing Service takes care of setting these fields internally.
The Hadoop Index Config submitted as part of an Hadoop Index Task is identical to the Hadoop Index Config used by the `HadoopDruidIndexer` except that three fields must be omitted: `segmentOutputPath`, `workingPath`, `metadataUpdateSpec`. The Indexing Service takes care of setting these fields internally.
To run the task:

View File

@ -123,7 +123,7 @@ The indexSpec is optional and default parameters will be used if not specified.
|dimensionCompression|compression format for dimension columns (currently only affects single-value dimensions, multi-value dimensions are always uncompressed)|`"uncompressed"`, `"lz4"`, `"lzf"`|`"lz4"`|no|
|metricCompression|compression format for metric columns, defaults to LZ4|`"lz4"`, `"lzf"`|`"lz4"`|no|
### Index Hadoop Task
### Hadoop Index Task
The Hadoop Index Task is used to index larger data sets that require the parallelization and processing power of a Hadoop cluster.
@ -138,14 +138,17 @@ The Hadoop Index Task is used to index larger data sets that require the paralle
|--------|-----------|---------|
|type|The task type, this should always be "index_hadoop".|yes|
|spec|A Hadoop Index Spec. See [Batch Ingestion](../ingestion/batch-ingestion.html)|yes|
|hadoopCoordinates|The Maven \<groupId\>:\<artifactId\>:\<version\> of Hadoop to use. The default is "org.apache.hadoop:hadoop-client:2.3.0".|no|
|hadoopDependencyCoordinates|A JSON array of Hadoop dependency coordinates that Druid will use, this property will override the default Hadoop coordinates. Once specified, Druid will look for those Hadoop dependencies from the location specified by `druid.extensions.hadoopDependenciesDir`|no|
|classpathPrefix|Classpath that will be pre-appended for the peon process.|no|
The Hadoop Index Config submitted as part of an Hadoop Index Task is identical to the Hadoop Index Config used by the `HadoopDruidIndexer` except that three fields must be omitted: `segmentOutputPath`, `workingPath`, `metadataUpdateSpec`. The Indexing Service takes care of setting these fields internally.
The Hadoop Index Config submitted as part of an Hadoop Index Task is identical to the Hadoop Index Config used by the `HadoopDruidIndexer` except that three fields must be omitted: `segmentOutputPath`, `workingPath`, `updaterJobSpec`. The Indexing Service takes care of setting these fields internally.
Note: Before using Hadoop Index Task, please make sure to include Hadoop dependencies so that Druid knows where to pick them up during runtime, see [Include Hadoop Dependencies](../operations/other-hadoop.html).
Druid uses hadoop-client 2.3.0 as the default Hadoop version, you can get it from the released Druid tarball(under folder ```hadoop_druid_dependencies```) or use [pull-deps](../pull-deps.html).
#### Using your own Hadoop distribution
Druid is compiled against Apache hadoop-client 2.3.0. However, if you happen to use a different flavor of hadoop that is API compatible with hadoop-client 2.3.0, you should only have to change the hadoopCoordinates property to point to the maven artifact used by your distribution. For non-API compatible versions, please see [here](../operations/other-hadoop.html).
Druid is compiled against Apache hadoop-client 2.3.0. However, if you happen to use a different flavor of Hadoop that is API compatible with hadoop-client 2.3.0, you should first make sure Druid knows where to pick it up, then you should only have to change the `hadoopDependencyCoordinates` property to point to the list of maven artifact used by your distribution. For non-API compatible versions and more information, please see [here](../operations/other-hadoop.html).
#### Resolving dependency conflicts running HadoopIndexTask

View File

@ -13,22 +13,58 @@ Druid extensions can be specified in the `common.runtime.properties`. There are
If you add your extension jar to the classpath at runtime, Druid will load it into the system. This mechanism is relatively easy to reason about, but it also means that you have to ensure that all dependency jars on the classpath are compatible. That is, Druid makes no provisions while using this method to maintain class loader isolation so you must make sure that the jars on your classpath are mutually compatible.
### Specify maven coordinates
### Add to the extension directory
Druid has the ability to automatically load extension jars from maven at runtime. With this mechanism, Druid also loads up the dependencies of the extension jar into an isolated class loader. That means that your extension can depend on a different version of a library that Druid also uses and both can co-exist.
If you don't want to fiddle with classpath, you can create an extension directory and tell Druid to load extensions from there.
### I want classloader isolation, but I don't want my production machines downloading their own dependencies. What should I do?
To let Druid load your extensions, follow the steps below
If you want to take advantage of the maven-based classloader isolation but you are also rightly frightened by the prospect of each of your production machines downloading their own dependencies on deploy, this section is for you.
1) Specify `druid.extensions.directory` (root directory for normal Druid extensions). If you don' specify it, Druid will use their default value, see [Configuration](../configuration/index.html).
The trick to doing this is
2) Prepare normal extension directories under root extension directory. Under the root extension directory, you should create sub-directories for each extension you might want to load. Inside each sub-directory, you can put extension related files in it. (If you don't want to manually setup the extension directory, Druid also provides a [pull-deps](../pull-deps.html) tool that can help you genereate these directories automatically)
1) Specify a local directory for `druid.extensions.localRepository`
Example:
2) Run the `tools pull-deps` command to pull all the specified dependencies down into your local repository
Suppose you specify `druid.extensions.directory=/usr/local/druid/druid_extensions`, and want Druid to load normal extensions ```druid-examples```, ```druid-kafka-eight``` and ```mysql-metadata-storage```.
3) Bundle up the local repository along with your other Druid stuff into whatever you use for a deployable artifact
Then under ```druid_extensions```, it should look like this,
4) Run Your druid processes with `druid.extensions.remoteRepositories=[]` and a local repository set to wherever your bundled "local" repository is located
```
druid_extensions/
├── druid-examples
│   ├── commons-beanutils-1.8.3.jar
│   ├── commons-digester-1.8.jar
│   ├── commons-logging-1.1.1.jar
│   ├── commons-validator-1.4.0.jar
│   ├── druid-examples-0.8.0-rc1.jar
│   ├── twitter4j-async-3.0.3.jar
│   ├── twitter4j-core-3.0.3.jar
│   └── twitter4j-stream-3.0.3.jar
├── druid-kafka-eight
│   ├── druid-kafka-eight-0.7.3.jar
│   ├── jline-0.9.94.jar
│   ├── jopt-simple-3.2.jar
│   ├── kafka-clients-0.8.2.1.jar
│   ├── kafka_2.10-0.8.2.1.jar
│   ├── log4j-1.2.16.jar
│   ├── lz4-1.3.0.jar
│   ├── metrics-core-2.2.0.jar
│   ├── netty-3.7.0.Final.jar
│   ├── scala-library-2.10.4.jar
│   ├── slf4j-log4j12-1.6.1.jar
│   ├── snappy-java-1.1.1.6.jar
│   ├── zkclient-0.3.jar
│   └── zookeeper-3.4.6.jar
└── mysql-metadata-storage
├── jdbi-2.32.jar
├── mysql-connector-java-5.1.34.jar
└── mysql-metadata-storage-0.8.0-rc1.jar
```
The Druid processes will then only load up jars from the local repository and will not try to go out onto the internet to find the maven dependencies.
As you can see, under ```druid_extensions``` there are three sub-directories ```druid-examples```, ```druid-kafka-eight``` and ```mysql-metadata-storage```, each sub-directory denotes an extension that Druid might load.
3) Tell Druid which extensions to load. Now you have prepared your extension directories, if you want Druid to load a specific list of extensions under root extension directory, you need to specify `druid.extensions.loadList`. Using the example above, if you want Druid to load ```druid-kafka-eight``` and ```mysql-metadata-storage```, you can specify `druid.extensions.loadList=["druid-kafka-eight", "mysql-metadata-storage"]`.
If you specify `druid.extensions.loadList=[]`, Druid won't load any extension from file system.
If you don't specify `druid.extensions.loadList`, Druid will load all the extensions under root extension directory.

View File

@ -1,21 +1,72 @@
---
layout: doc_page
---
Working with different versions of Hadoop may require a bit of extra work for the time being. We will make changes to support different Hadoop versions in the near future. If you have problems outside of these instructions, please feel free to contact us in IRC or on the [forum](https://groups.google.com/forum/#!forum/druid-development).
# Work with different versions of Hadoop
Working with Hadoop 2.x
-----------------------
The default version of Hadoop bundled with Druid is 2.3. This should work out of the box.
## Include Hadoop dependencies
To override the default Hadoop version, both the Hadoop Index Task and the standalone Hadoop indexer support the parameter `hadoopDependencyCoordinates`. You can pass another set of Hadoop coordinates through this parameter (e.g. You can specify coordinates for Hadoop 2.4.0 as `["org.apache.hadoop:hadoop-client:2.4.0"]`).
There are two different ways to let Druid pick up your Hadoop version, choose the one that fits your need.
### Add your Hadoop dependencies to the Hadoop dependencies directory
You can create a Hadoop dependency directory and tell Druid to load your Hadoop jars from there.
To make this work, follow the steps below
(1) Specify `druid.extensions.hadoopDependenciesDir` (root directory for Hadoop related dependencies). If you don't specify it, Druid will use its default value, see [Configuration](../configuration/index.html).
(2) Set-up Hadoop dependencies directories under root Hadoop dependency directory. Under the root directory, you should create sub-directories for each Hadoop dependencies. Inside each sub-directory, created a sub-sub-directory whose name is the version of Hadoop it contains, and inside that sub-sub-directory, put Hadoop jars in it. This file structure is almost same as normal Druid extensions described in [Including-Extensions](../including-extensions.html), except that there is an extra layer of folder that specifies the version of Hadoop. (If you don't want to manually setup this directory, Druid also provides a [pull-deps](../pull-deps.html) tool that can help you generate these directories automatically)
Example:
Suppose you specify `druid.extensions.hadoopDependenciesDir=/usr/local/druid/hadoop_druid_dependencies`, and you want to prepare both `hadoop-client` 2.3.0 and 2.4.0 for Druid,
Then you can either use [pull-deps](../pull-deps.html) or manually set up Hadoop dependencies directories such that under ```hadoop_druid_dependencies```, it looks like this,
```
hadoop_druid_dependencies/
└── hadoop-client
├── 2.3.0
│   ├── activation-1.1.jar
│   ├── avro-1.7.4.jar
│   ├── commons-beanutils-1.7.0.jar
│   ├── commons-beanutils-core-1.8.0.jar
│   ├── commons-cli-1.2.jar
│   ├── commons-codec-1.4.jar
..... lots of jars
└── 2.4.0
├── activation-1.1.jar
├── avro-1.7.4.jar
├── commons-beanutils-1.7.0.jar
├── commons-beanutils-core-1.8.0.jar
├── commons-cli-1.2.jar
├── commons-codec-1.4.jar
..... lots of jars
```
As you can see, under ```hadoop-client```, there are two sub-directories, each denotes a version of ```hadoop-client```. During runtime, Druid will look for these directories and load appropriate ```hadoop-client``` based on `hadoopDependencyCoordinates` passed to [Hadoop Index Task](../misc/tasks.html).
### Append your Hadoop jars to the Druid classpath
If you really don't like the way above, and you just want to use one specific Hadoop version, and don't want Druid to work with different Hadoop versions, then you can
(1) Set `druid.indexer.task.defaultHadoopCoordinates=[]`. `druid.indexer.task.defaultHadoopCoordinates` specifies the default Hadoop coordinates that Druid uses. Its default value is `["org.apache.hadoop:hadoop-client:2.3.0"]`. By setting it to an empty list, Druid will not load any other Hadoop dependencies except the ones specified in the classpath.
(2) Append your Hadoop jars to the classpath, Druid will load them into the system. This mechanism is relatively easy to reason about, but it also means that you have to ensure that all dependency jars on the classpath are compatible. That is, Druid makes no provisions while using this method to maintain class loader isolation so you must make sure that the jars on your classpath are mutually compatible.
## Working with Hadoop 2.x
The default version of Hadoop bundled with Druid is 2.3.
To override the default Hadoop version, both the Hadoop Index Task and the standalone Hadoop indexer support the parameter `hadoopDependencyCoordinates`(See [Index Hadoop Task](../misc/tasks.html). You can pass another set of Hadoop coordinates through this parameter (e.g. You can specify coordinates for Hadoop 2.4.0 as `["org.apache.hadoop:hadoop-client:2.4.0"]`), which will overwrite the default Hadoop coordinates Druid uses.
The Hadoop Index Task takes this parameter has part of the task JSON and the standalone Hadoop indexer takes this parameter as a command line argument.
If you are still having problems, include all relevant hadoop jars at the beginning of the classpath of your indexing or historical nodes.
Working with CDH
----------------
Members of the community have reported dependency conflicts between the version of Jackson used in CDH and Druid. Currently, our best workaround is to edit Druid's pom.xml dependencies to match the version of Jackson in your hadoop version and recompile Druid.
## Working with CDH
Members of the community have reported dependency conflicts between the version of Jackson used in CDH and Druid. Currently, our best workaround is to edit Druid's pom.xml dependencies to match the version of Jackson in your Hadoop version and recompile Druid.
For more about building Druid, please see [Building Druid](../development/build.html).
@ -29,7 +80,7 @@ Another workaround solution is to build a custom fat jar of Druid using [sbt](ht
You can always add more building targets or remove the ones you don't need.
(4) In the same directory creat a new directory named 'project'.
(4) In the same directory create a new directory named 'project'.
(5) Put the druid source code into 'druid_build/project'.
@ -42,10 +93,10 @@ addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.13.0")
(8) In the 'druid_build/target/scala-2.10' folder, you will find the fat jar you just build.
(9) Make sure the jars you've uploaded has been completely removed. The hdfs directory is by default '/tmp/druid-indexing/classpath'.
(9) Make sure the jars you've uploaded has been completely removed. The HDFS directory is by default '/tmp/druid-indexing/classpath'.
(10) Include the fat jar in the classpath when you start the indexing service. Make sure you've removed 'lib/*' from your classpath because now the fat jar includes all you need.
Working with Hadoop 1.x and older
---------------------------------
## Working with Hadoop 1.x and older
We recommend recompiling Druid with your particular version of Hadoop by changing the dependencies in Druid's pom.xml files. Make sure to also either override the default `hadoopDependencyCoordinates` in the code or pass your Hadoop version in as part of indexing.

View File

@ -0,0 +1,94 @@
---
layout: doc_page
---
# pull-deps Tool
`pull-deps` is a tool that can pull down dependencies to the local repository and lay dependencies out into the extension directory as needed.
`pull-deps` has several command line options, they are as follows:
`-c` or `--coordinate` (Can be specified multiply times)
Extension coordinate to pull down, followed by a maven coordinate, e.g. io.druid.extensions:mysql-metadata-storage
`-h` or `--hadoop-coordinate` (Can be specified multiply times)
Hadoop dependency to pull down, followed by a maven coordinate, e.g. org.apache.hadoop:hadoop-client:2.4.0
`--no-default-hadoop`
Don't pull down the default hadoop coordinate, i.e., org.apache.hadoop:hadoop-client:2.3.0. If `-h` option is supplied, then default hadoop coordinate will not be downloaded.
`--clean`
Remove exisiting extension and hadoop dependencies directories before pulling down dependencies.
`-l` or `--localRepository`
A local repostiry that Maven will use to put downloaded files. Then pull-deps will lay these files out into the extensions directory as needed.
`-r` or `--remoteRepositories`
A JSON Array list of remote repositories to load dependencies from.
`-d` or `--defaultVersion`
Version to use for extension coordinate that doesn't have a version information. For example, if extension coordinate is `io.druid.extensions:mysql-metadata-storage`, and default version is `0.8.0`, then this coordinate will be treated as `io.druid.extensions:mysql-metadata-storage:0.8.0`
To run `pull-deps`, you should
1) Specify `druid.extensions.directory` and `druid.extensions.hadoopDependenciesDir`, these two properties tell `pull-deps` where to put extensions. If you don't specify them, default values will be used, see [Configuration](../configuration/index.html).
2) Tell `pull-deps` what to download using `-c` or `-h` option, which are followed by a maven coordinate.
Example:
Suppose you want to download ```druid-examples```, ```mysql-metadata-storage``` and ```hadoop-client```(both 2.3.0 and 2.4.0) with a specific version, you can run `pull-deps` command with `-c io.druid.extensions:druid-examples:0.8.0`, `-c io.druid.extensions:mysql-metadata-storage:0.8.0`, `-h org.apache.hadoop:hadoop-client:2.3.0` and `-h org.apache.hadoop:hadoop-client:2.4.0`, an example command would be:
```java -classpath "/my/druid/library/*" io.druid.cli.Main tools pull-deps --clean -c io.druid.extensions:mysql-metadata-storage:0.8.0 -c io.druid.extensions:druid-examples:0.8.0 -h org.apache.hadoop:hadoop-client:2.3.0 -h org.apache.hadoop:hadoop-client:2.4.0```
Because `--clean` is supplied, this command will first remove the directories specified at `druid.extensions.directory` and `druid.extensions.hadoopDependenciesDir`, then recreate them and start downloading the extensions there. After finishing downloading, if you go to the extension directories you specified, you will see
```
tree druid_extensions
druid_extensions
├── druid-examples
│   ├── commons-beanutils-1.8.3.jar
│   ├── commons-digester-1.8.jar
│   ├── commons-logging-1.1.1.jar
│   ├── commons-validator-1.4.0.jar
│   ├── druid-examples-0.8.0.jar
│   ├── twitter4j-async-3.0.3.jar
│   ├── twitter4j-core-3.0.3.jar
│   └── twitter4j-stream-3.0.3.jar
└── mysql-metadata-storage
├── jdbi-2.32.jar
├── mysql-connector-java-5.1.34.jar
└── mysql-metadata-storage-0.8.0.jar
```
```
tree hadoop_druid_dependencies
hadoop_druid_dependencies/
└── hadoop-client
├── 2.3.0
│   ├── activation-1.1.jar
│   ├── avro-1.7.4.jar
│   ├── commons-beanutils-1.7.0.jar
│   ├── commons-beanutils-core-1.8.0.jar
│   ├── commons-cli-1.2.jar
│   ├── commons-codec-1.4.jar
..... lots of jars
└── 2.4.0
├── activation-1.1.jar
├── avro-1.7.4.jar
├── commons-beanutils-1.7.0.jar
├── commons-beanutils-core-1.8.0.jar
├── commons-cli-1.2.jar
├── commons-codec-1.4.jar
..... lots of jars
```
Note that if you specify `--defaultVersion`, you don't have to put version information in the coordinate. For example, if you want both `druid-examples` and `mysql-metadata-storage` to use version `0.8.0`, you can change the command above to
```java -classpath "/my/druid/library/*" io.druid.cli.Main tools pull-deps --defaultVersion 0.8.0 --clean -c io.druid.extensions:mysql-metadata-storage -c io.druid.extensions:druid-examples -h org.apache.hadoop:hadoop-client:2.3.0 -h org.apache.hadoop:hadoop-client:2.4.0```

View File

@ -4,23 +4,7 @@ layout: doc_page
What to Do When You Have a Firewall
-----------------------------------
When you are behind a firewall, the Maven Druid dependencies will not be accessible, as well as the IRC wikipedia channels that feed realtime data into Druid. To workaround those two challenges, you will need to:
1. Make the Maven Druid dependencies available offline
2. Make the Wikipedia example GeoLite DB dependency available offline
## Making Maven Druid Dependencies Available Offline
1. Extract Druid to a machine that has internet access; e.g. `/Users/foo/druid-<version>`
2. Create a repository directory to download the dependencies to; e.g. `/Users/foo/druid-<version>\repo`
3. Create property `druid.extensions.localRepository=`*`path to repo directory`* in the *`Druid Directory`*`\config\_common/common.runtime.properties` file; e.g. `druid.extensions.localRepository=/Users/foo/druid-<version>/repo`
4. From within Druid directory, run the `pull-deps` command to download all Druid dependencies to the repository specified in the `common.runtime.properties` file:
```
java -classpath "config\_common;lib\*" io.druid.cli.Main tools pull-deps
```
5. Once all dependencies have been downloaded successfully, replicate the `repo` directory to the machine behind the firewall; e.g. `/opt/druid-<version>/repo`
6. Create property `druid.extensions.localRepository=`*`path to repo directory`* in the *`Druid Directory`*`/config/_common/common.runtime.properties` file; e.g. `druid.extensions.localRepository=/opt/druid-<version>/repo`
When you are behind a firewall, if the IRC wikipedia channels that feed realtime data into Druid are not accessible, then there is nothing you can do. If IRC channels are accessible, but downloading Geolite DB from maxmind is firewalled, you can workaround this challenge by making GeoLite DB dependency available offline, see below.
## Making the Wikipedia Example GeoLite DB Dependency Available Offline
1. Download GeoLite2 City DB from http://dev.maxmind.com/geoip/geoip2/geolite2/

View File

@ -74,6 +74,8 @@ You should see a bunch of files:
* run_example_server.sh
* run_example_client.sh
* LICENSE, config, examples, lib directories
* druid_extensions (This folder contains all the extensions that could be loaded by Druid. Note that extension `mysql-metadata-storage` is packaged in a separate tarball that can be downloaded from [here](http://druid.io/downloads.html). See [Including Extensions](../operations/including-extensions.html) for more information about loading extensions.
* hadoop_druid_dependencies (This folder contains hadoop-client:2.3.0, see [Different Hadoop Versions](../operations/other-hadoop.html) for more information about how Druid picks up Hadoop dependencies)
## External Dependencies

View File

@ -66,7 +66,7 @@ Note: If Zookeeper isn't running, you'll have to start it again as described in
To start the Indexing Service:
```bash
java -Xmx2g -Duser.timezone=UTC -Dfile.encoding=UTF-8 -classpath config/_common:config/overlord:lib/*:<hadoop_config_path> io.druid.cli.Main server overlord
java -Xmx2g -Duser.timezone=UTC -Dfile.encoding=UTF-8 -classpath config/_common:config/overlord:lib/* io.druid.cli.Main server overlord
```
To start the Coordinator Node:
@ -247,7 +247,7 @@ Most common data ingestion problems are around timestamp formats and other malfo
Druid is designed for large data volumes, and most real-world data sets require batch indexing be done through a Hadoop job.
For this tutorial, we used [Hadoop 2.3.0](https://archive.apache.org/dist/hadoop/core/hadoop-2.3.0/). There are many pages on the Internet showing how to set up a single-node (standalone) Hadoop cluster, which is all that's needed for this example.
For this tutorial, we used [Hadoop 2.3.0](https://archive.apache.org/dist/hadoop/core/hadoop-2.3.0/), which is included under ```hadoop_druid_dependencies```. There are many pages on the Internet showing how to set up a single-node (standalone) Hadoop cluster, which is all that's needed for this example. For more information about how Druid picks up your Hadoop version, see [here](../operations/other-hadoop.html).
Before indexing the data, make sure you have a valid Hadoop cluster running. To build our Druid segment, we are going to submit a [Hadoop index task](../misc/tasks.html) to the indexing service. The grammar for the Hadoop index task is very similar to the index task of the last tutorial. The tutorial Hadoop index task should be located at:

View File

@ -28,7 +28,6 @@ cd ${CURR_DIR}
# start process
JAVA_ARGS="${JAVA_ARGS} -Xmx512m -Duser.timezone=UTC -Dfile.encoding=UTF-8"
JAVA_ARGS="${JAVA_ARGS} -Ddruid.extensions.localRepository=${MAVEN_DIR}"
DRUID_CP="${SCRIPT_DIR}/config/_common"
DRUID_CP="${DRUID_CP}:${SCRIPT_DIR}/config/$SERVER_TYPE"

View File

@ -37,8 +37,6 @@ fi
# start process
JAVA_ARGS="-Xmx512m -Duser.timezone=UTC -Dfile.encoding=UTF-8"
JAVA_ARGS="${JAVA_ARGS} -Ddruid.realtime.specFile=${SPEC_FILE}"
JAVA_ARGS="${JAVA_ARGS} -Ddruid.extensions.localRepository=${MAVEN_DIR}"
JAVA_ARGS="${JAVA_ARGS} -Ddruid.extensions.remoteRepositories=[]"
JAVA_ARGS="${JAVA_ARGS} -Ddruid.publish.type=noop"
DRUID_CP=${EXAMPLE_LOC}

View File

@ -15,10 +15,14 @@
# limitations under the License.
#
# Extensions (no deep storage model is listed - using local fs for deep storage - not recommended for production)
# Also, for production to use mysql add, "io.druid.extensions:mysql-metadata-storage"
druid.extensions.coordinates=["io.druid.extensions:druid-examples","io.druid.extensions:druid-kafka-eight"]
druid.extensions.localRepository=extensions-repo
# Extensions specified in the load list will be loaded by Druid (no deep storage model is listed - using local fs
# for deep storage - not recommended for production)
# Also, use mysql for production, add "mysql-metadata-storage"
# If you specify `druid.extensions.loadList=[]`, Druid won't load any extension from file system.
# If you don't specify `druid.extensions.loadList`, Druid will load all the extensions under root extension directory.
# More info: http://druid.io/docs/latest/operations/including-extensions.html
druid.extensions.loadList=["druid-examples","druid-kafka-eight"]
# Zookeeper
druid.zk.service.host=localhost

View File

@ -1,146 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
~ Druid - a distributed column store.
~ Copyright 2012 - 2015 Metamarkets Group Inc.
~
~ Licensed under the Apache License, Version 2.0 (the "License");
~ you may not use this file except in compliance with the License.
~ You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<packaging>pom</packaging>
<artifactId>extensions-distribution</artifactId>
<name>extensions-distribution</name>
<description>extensions-distribution</description>
<parent>
<artifactId>druid</artifactId>
<groupId>io.druid</groupId>
<version>0.9.0-SNAPSHOT</version>
</parent>
<dependencies>
<!-- extensions to include in the extensions-repo -->
<!-- must be marked as optional to avoid interfering with distribution packaging -->
<dependency>
<groupId>io.druid.extensions</groupId>
<artifactId>druid-examples</artifactId>
<version>${project.parent.version}</version>
<optional>true</optional>
</dependency>
<dependency>
<groupId>io.druid.extensions</groupId>
<artifactId>druid-namespace-lookup</artifactId>
<version>${project.parent.version}</version>
<optional>true</optional>
</dependency>
<dependency>
<groupId>io.druid.extensions</groupId>
<artifactId>druid-cassandra-storage</artifactId>
<version>${project.parent.version}</version>
<optional>true</optional>
</dependency>
<dependency>
<groupId>io.druid.extensions</groupId>
<artifactId>druid-kafka-eight</artifactId>
<version>${project.parent.version}</version>
<optional>true</optional>
</dependency>
<dependency>
<groupId>io.druid.extensions</groupId>
<artifactId>druid-azure-extensions</artifactId>
<version>${project.parent.version}</version>
<optional>true</optional>
</dependency>
<dependency>
<groupId>io.druid.extensions</groupId>
<artifactId>mysql-metadata-storage</artifactId>
<version>${project.parent.version}</version>
<optional>true</optional>
</dependency>
<dependency>
<groupId>io.druid.extensions</groupId>
<artifactId>druid-hdfs-storage</artifactId>
<version>${project.parent.version}</version>
<optional>true</optional>
</dependency>
<dependency>
<groupId>io.druid.extensions</groupId>
<artifactId>postgresql-metadata-storage</artifactId>
<version>${project.parent.version}</version>
<optional>true</optional>
</dependency>
<dependency>
<groupId>io.druid.extensions</groupId>
<artifactId>druid-kafka-extraction-namespace</artifactId>
<version>${project.parent.version}</version>
<optional>true</optional>
</dependency>
<dependency>
<groupId>io.druid.extensions</groupId>
<artifactId>druid-rabbitmq</artifactId>
<version>${project.parent.version}</version>
<optional>true</optional>
</dependency>
<dependency>
<groupId>io.druid.extensions</groupId>
<artifactId>druid-s3-extensions</artifactId>
<version>${project.parent.version}</version>
<optional>true</optional>
</dependency>
<dependency>
<groupId>io.druid.extensions</groupId>
<artifactId>druid-histogram</artifactId>
<version>${project.parent.version}</version>
<optional>true</optional>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<executions>
<execution>
<id>distro-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
<configuration>
<finalName>extensions-repo</finalName>
<descriptors>
<descriptor>src/assembly/assembly.xml</descriptor>
</descriptors>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View File

@ -1,35 +0,0 @@
<?xml version="1.0"?>
<!--
~ Licensed to Metamarkets Group Inc. (Metamarkets) under one
~ or more contributor license agreements. See the NOTICE file
~ distributed with this work for additional information
~ regarding copyright ownership. Metamarkets licenses this file
~ to you under the Apache License, Version 2.0 (the
~ "License"); you may not use this file except in compliance
~ with the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing,
~ software distributed under the License is distributed on an
~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
~ KIND, either express or implied. See the License for the
~ specific language governing permissions and limitations
~ under the License.
-->
<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.3"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.3 http://maven.apache.org/xsd/assembly-1.1.3.xsd">
<id>extensions-repo</id>
<formats>
<format>zip</format>
</formats>
<repositories>
<repository>
<includes>
<include>io.druid.extensions:*</include>
</includes>
</repository>
</repositories>
</assembly>

View File

@ -29,7 +29,6 @@ import io.druid.guice.ExtensionsConfig;
import io.druid.guice.GuiceInjectors;
import io.druid.indexing.common.TaskToolbox;
import io.druid.initialization.Initialization;
import io.tesla.aether.internal.DefaultTeslaAether;
import java.io.File;
import java.lang.reflect.InvocationTargetException;
@ -76,14 +75,10 @@ public abstract class HadoopTask extends AbstractTask
? hadoopDependencyCoordinates
: toolbox.getConfig().getDefaultHadoopCoordinates();
final DefaultTeslaAether aetherClient = Initialization.getAetherClient(extensionsConfig);
final List<URL> extensionURLs = Lists.newArrayList();
for (String coordinate : extensionsConfig.getCoordinates()) {
final ClassLoader coordinateLoader = Initialization.getClassLoaderForCoordinates(
aetherClient, coordinate, extensionsConfig.getDefaultVersion()
);
extensionURLs.addAll(Arrays.asList(((URLClassLoader) coordinateLoader).getURLs()));
for (final File extension : Initialization.getExtensionFilesToLoad(extensionsConfig)) {
final ClassLoader extensionLoader = Initialization.getClassLoaderForExtension(extension);
extensionURLs.addAll(Arrays.asList(((URLClassLoader) extensionLoader).getURLs()));
}
final List<URL> nonHadoopURLs = Lists.newArrayList();
@ -91,11 +86,14 @@ public abstract class HadoopTask extends AbstractTask
final List<URL> driverURLs = Lists.newArrayList();
driverURLs.addAll(nonHadoopURLs);
// put hadoop dependencies last to avoid jets3t & apache.httpcore version conflicts
for (String hadoopDependencyCoordinate : finalHadoopDependencyCoordinates) {
final ClassLoader hadoopLoader = Initialization.getClassLoaderForCoordinates(
aetherClient, hadoopDependencyCoordinate, extensionsConfig.getDefaultVersion()
);
for (final File hadoopDependency :
Initialization.getHadoopDependencyFilesToLoad(
finalHadoopDependencyCoordinates,
extensionsConfig
)) {
final ClassLoader hadoopLoader = Initialization.getClassLoaderForExtension(hadoopDependency);
driverURLs.addAll(Arrays.asList(((URLClassLoader) hadoopLoader).getURLs()));
}

View File

@ -105,7 +105,6 @@
<module>extensions/namespace-lookup</module>
<module>extensions/kafka-extraction-namespace</module>
<!-- distribution packaging -->
<module>extensions-distribution</module>
<module>distribution</module>
</modules>

View File

@ -18,7 +18,6 @@
package io.druid.guice;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.collect.ImmutableList;
import javax.validation.constraints.NotNull;
import java.util.List;
@ -27,54 +26,37 @@ import java.util.List;
*/
public class ExtensionsConfig
{
public static final String PACKAGE_VERSION = ExtensionsConfig.class.getPackage().getImplementationVersion();
@JsonProperty
@NotNull
private boolean searchCurrentClassloader = true;
@JsonProperty
@NotNull
private List<String> coordinates = ImmutableList.of();
// default version to use for extensions without version info
@JsonProperty
private String defaultVersion;
private String directory = "druid_extensions";
@JsonProperty
@NotNull
private String localRepository = String.format("%s/%s", System.getProperty("user.home"), ".m2/repository");
private String hadoopDependenciesDir = "hadoop_druid_dependencies";
@JsonProperty
@NotNull
private List<String> remoteRepositories = ImmutableList.of(
"https://repo1.maven.org/maven2/",
"https://metamx.artifactoryonline.com/metamx/pub-libs-releases-local"
);
private List<String> loadList;
public boolean searchCurrentClassloader()
{
return searchCurrentClassloader;
}
public List<String> getCoordinates()
public String getDirectory()
{
return coordinates;
return directory;
}
public String getDefaultVersion()
public String getHadoopDependenciesDir()
{
return defaultVersion != null ? defaultVersion : PACKAGE_VERSION;
return hadoopDependenciesDir;
}
public String getLocalRepository()
public List<String> getLoadList()
{
return localRepository;
}
public List<String> getRemoteRepositories()
{
return remoteRepositories;
return loadList;
}
@Override
@ -82,10 +64,9 @@ public class ExtensionsConfig
{
return "ExtensionsConfig{" +
"searchCurrentClassloader=" + searchCurrentClassloader +
", coordinates=" + coordinates +
", defaultVersion='" + getDefaultVersion() + '\'' +
", localRepository='" + localRepository + '\'' +
", remoteRepositories=" + remoteRepositories +
", directory='" + directory + '\'' +
", hadoopDependenciesDir='" + hadoopDependenciesDir + '\'' +
", loadList=" + loadList +
'}';
}
}

View File

@ -28,7 +28,6 @@ import com.google.inject.Key;
import com.google.inject.Module;
import com.google.inject.util.Modules;
import com.metamx.common.ISE;
import com.metamx.common.StringUtils;
import com.metamx.common.logger.Logger;
import io.druid.curator.CuratorModule;
import io.druid.curator.discovery.DiscoveryModule;
@ -57,27 +56,11 @@ import io.druid.metadata.storage.derby.DerbyMetadataStorageDruidModule;
import io.druid.server.initialization.EmitterModule;
import io.druid.server.initialization.jetty.JettyServerModule;
import io.druid.server.metrics.MetricsModule;
import io.tesla.aether.Repository;
import io.tesla.aether.TeslaAether;
import io.tesla.aether.internal.DefaultTeslaAether;
import org.eclipse.aether.artifact.Artifact;
import org.apache.commons.io.FileUtils;
import org.eclipse.aether.artifact.DefaultArtifact;
import org.eclipse.aether.collection.CollectRequest;
import org.eclipse.aether.graph.Dependency;
import org.eclipse.aether.graph.DependencyFilter;
import org.eclipse.aether.graph.DependencyNode;
import org.eclipse.aether.resolution.DependencyRequest;
import org.eclipse.aether.resolution.DependencyResolutionException;
import org.eclipse.aether.util.artifact.JavaScopes;
import org.eclipse.aether.util.filter.DependencyFilterUtils;
import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintStream;
import java.io.UnsupportedEncodingException;
import java.io.File;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLClassLoader;
import java.util.Collection;
@ -94,10 +77,6 @@ public class Initialization
private static final Logger log = new Logger(Initialization.class);
private static final Map<String, URLClassLoader> loadersMap = Maps.newHashMap();
private static final Set<String> exclusions = Sets.newHashSet(
"io.druid",
"com.metamx.druid"
);
private final static Map<Class, Set> extensionsMap = Maps.<Class, Set>newHashMap();
/**
@ -132,9 +111,9 @@ public class Initialization
}
/**
* Look for extension modules for the given class from both classpath and druid.extensions.coordinates.
* Extensions explicitly specified in druid.extensions.coordinates will be loaded first, if there is a duplicate
* extension from classpath, it will be ignored.
* Look for extension modules for the given class from both classpath and extensions directory. A user should never
* put the same two extensions in classpath and extensions directory, if he/she does that, the one that is in the
* classpath will be loaded, the other will be ignored.
*
* @param config Extensions configuration
* @param clazz The class of extension module (e.g., DruidModule)
@ -143,25 +122,39 @@ public class Initialization
*/
public synchronized static <T> Collection<T> getFromExtensions(ExtensionsConfig config, Class<T> clazz)
{
final TeslaAether aether = getAetherClient(config);
final Set<T> retVal = Sets.newHashSet();
final Set<String> extensionNames = Sets.newHashSet();
final Set<String> loadedExtensionNames = Sets.newHashSet();
for (String coordinate : config.getCoordinates()) {
log.info("Loading extension[%s] for class[%s]", coordinate, clazz.getName());
if (config.searchCurrentClassloader()) {
for (T module : ServiceLoader.load(clazz, Thread.currentThread().getContextClassLoader())) {
final String moduleName = module.getClass().getCanonicalName();
if (moduleName == null) {
log.warn(
"Extension module [%s] was ignored because it doesn't have a canonical name, is it a local or anonymous class?",
module.getClass().getName()
);
} else if (!loadedExtensionNames.contains(moduleName)) {
log.info("Adding classpath extension module [%s] for class [%s]", moduleName, clazz.getName());
loadedExtensionNames.add(moduleName);
retVal.add(module);
}
}
}
for (File extension : getExtensionFilesToLoad(config)) {
log.info("Loading extension [%s] for class [%s]", extension.getName(), clazz.getName());
try {
URLClassLoader loader = getClassLoaderForCoordinates(aether, coordinate, config.getDefaultVersion());
final URLClassLoader loader = getClassLoaderForExtension(extension);
for (T module : ServiceLoader.load(clazz, loader)) {
String moduleName = module.getClass().getCanonicalName();
final String moduleName = module.getClass().getCanonicalName();
if (moduleName == null) {
log.warn(
"Extension module [%s] was ignored because it doesn't have a canonical name, is it a local or anonymous class?",
module.getClass().getName()
);
} else if (!extensionNames.contains(moduleName)) {
log.info("Adding remote extension module[%s] for class[%s]", moduleName, clazz.getName());
extensionNames.add(moduleName);
} else if (!loadedExtensionNames.contains(moduleName)) {
log.info("Adding local file system extension module [%s] for class [%s]", moduleName, clazz.getName());
loadedExtensionNames.add(moduleName);
retVal.add(module);
}
}
@ -171,199 +164,111 @@ public class Initialization
}
}
if (config.searchCurrentClassloader()) {
for (T module : ServiceLoader.load(clazz, Initialization.class.getClassLoader())) {
String moduleName = module.getClass().getCanonicalName();
if (moduleName == null) {
log.warn(
"Extension module [%s] was ignored because it doesn't have a canonical name, is it a local or anonymous class?",
module.getClass().getName()
);
} else if (!extensionNames.contains(moduleName)) {
log.info("Adding local extension module[%s] for class[%s]", moduleName, clazz.getName());
extensionNames.add(moduleName);
retVal.add(module);
}
}
}
// update the map with currently loaded modules
extensionsMap.put(clazz, retVal);
return retVal;
}
public static URLClassLoader getClassLoaderForCoordinates(
TeslaAether aether,
String coordinate,
String defaultVersion
)
throws DependencyResolutionException, MalformedURLException
/**
* Find all the extension files that should be loaded by druid.
* <p/>
* If user explicitly specifies druid.extensions.loadList, then it will look for those extensions under root
* extensions directory. If one of them is not found, druid will fail loudly.
* <p/>
* If user doesn't specify druid.extension.toLoad (or its value is empty), druid will load all the extensions
* under the root extensions directory.
*
* @param config ExtensionsConfig configured by druid.extensions.xxx
*
* @return an array of druid extension files that will be loaded by druid process
*/
public static File[] getExtensionFilesToLoad(ExtensionsConfig config)
{
URLClassLoader loader = loadersMap.get(coordinate);
if (loader == null) {
final CollectRequest collectRequest = new CollectRequest();
DefaultArtifact versionedArtifact;
try {
// this will throw an exception if no version is specified
versionedArtifact = new DefaultArtifact(coordinate);
}
catch (IllegalArgumentException e) {
// try appending the default version so we can specify artifacts without versions
if (defaultVersion != null) {
versionedArtifact = new DefaultArtifact(coordinate + ":" + defaultVersion);
} else {
throw e;
final File rootExtensionsDir = new File(config.getDirectory());
if (rootExtensionsDir.exists() && !rootExtensionsDir.isDirectory()) {
throw new ISE("Root extensions directory [%s] is not a directory!?", rootExtensionsDir);
}
File[] extensionsToLoad;
final List<String> toLoad = config.getLoadList();
if (toLoad == null) {
extensionsToLoad = rootExtensionsDir.listFiles();
} else {
int i = 0;
extensionsToLoad = new File[toLoad.size()];
for (final String extensionName : toLoad) {
final File extensionDir = new File(rootExtensionsDir, extensionName);
if (!extensionDir.isDirectory()) {
throw new ISE(
String.format(
"Extension [%s] specified in \"druid.extensions.loadList\" didn't exist!?",
extensionDir.getAbsolutePath()
)
);
}
}
collectRequest.setRoot(new Dependency(versionedArtifact, JavaScopes.RUNTIME));
DependencyRequest dependencyRequest = new DependencyRequest(
collectRequest,
DependencyFilterUtils.andFilter(
DependencyFilterUtils.classpathFilter(JavaScopes.RUNTIME),
new DependencyFilter()
{
@Override
public boolean accept(DependencyNode node, List<DependencyNode> parents)
{
if (accept(node.getArtifact())) {
return false;
}
for (DependencyNode parent : parents) {
if (accept(parent.getArtifact())) {
return false;
}
}
return true;
}
private boolean accept(final Artifact artifact)
{
return exclusions.contains(artifact.getGroupId());
}
}
)
);
try {
final List<Artifact> artifacts = aether.resolveArtifacts(dependencyRequest);
List<URL> urls = Lists.newArrayListWithExpectedSize(artifacts.size());
for (Artifact artifact : artifacts) {
if (!exclusions.contains(artifact.getGroupId())) {
urls.add(artifact.getFile().toURI().toURL());
} else {
log.debug("Skipped Artifact[%s]", artifact);
}
}
for (URL url : urls) {
log.info("Added URL[%s]", url);
}
loader = new URLClassLoader(urls.toArray(new URL[urls.size()]), Initialization.class.getClassLoader());
loadersMap.put(coordinate, loader);
}
catch (Exception e) {
log.error(e, "Unable to resolve artifacts for [%s].", dependencyRequest);
throw Throwables.propagate(e);
extensionsToLoad[i++] = extensionDir;
}
}
return loader;
return extensionsToLoad == null ? new File[]{} : extensionsToLoad;
}
public static DefaultTeslaAether getAetherClient(ExtensionsConfig config)
/**
* Find all the hadoop dependencies that should be loaded by druid
*
* @param hadoopDependencyCoordinates e.g.["org.apache.hadoop:hadoop-client:2.3.0"]
* @param extensionsConfig ExtensionsConfig configured by druid.extensions.xxx
*
* @return an array of hadoop dependency files that will be loaded by druid process
*/
public static File[] getHadoopDependencyFilesToLoad(
List<String> hadoopDependencyCoordinates,
ExtensionsConfig extensionsConfig
)
{
/*
DefaultTeslaAether logs a bunch of stuff to System.out, which is annoying. We choose to disable that
unless debug logging is turned on. "Disabling" it, however, is kinda bass-ackwards. We copy out a reference
to the current System.out, and set System.out to a noop output stream. Then after DefaultTeslaAether has pulled
The reference we swap things back.
This has implications for other things that are running in parallel to this. Namely, if anything else also grabs
a reference to System.out or tries to log to it while we have things adjusted like this, then they will also log
to nothingness. Fortunately, the code that calls this is single-threaded and shouldn't hopefully be running
alongside anything else that's grabbing System.out. But who knows.
*/
List<String> remoteUriList = config.getRemoteRepositories();
List<Repository> remoteRepositories = Lists.newArrayList();
for (String uri : remoteUriList) {
try {
URI u = new URI(uri);
Repository r = new Repository(uri);
if (u.getUserInfo() != null) {
String[] auth = u.getUserInfo().split(":", 2);
if (auth.length == 2) {
r.setUsername(auth[0]);
r.setPassword(auth[1]);
} else {
log.warn(
"Invalid credentials in repository URI, expecting [<user>:<password>], got [%s] for [%s]",
u.getUserInfo(),
uri
);
}
}
remoteRepositories.add(r);
final File rootHadoopDependenciesDir = new File(extensionsConfig.getHadoopDependenciesDir());
if (rootHadoopDependenciesDir.exists() && !rootHadoopDependenciesDir.isDirectory()) {
throw new ISE("Root Hadoop dependencies directory [%s] is not a directory!?", rootHadoopDependenciesDir);
}
final File[] hadoopDependenciesToLoad = new File[hadoopDependencyCoordinates.size()];
int i = 0;
for (final String coordinate : hadoopDependencyCoordinates) {
final DefaultArtifact artifact = new DefaultArtifact(coordinate);
final File hadoopDependencyDir = new File(rootHadoopDependenciesDir, artifact.getArtifactId());
final File versionDir = new File(hadoopDependencyDir, artifact.getVersion());
// find the hadoop dependency with the version specified in coordinate
if (!hadoopDependencyDir.isDirectory() || !versionDir.isDirectory()) {
throw new ISE(
String.format("Hadoop dependency [%s] didn't exist!?", versionDir.getAbsolutePath())
);
}
catch (URISyntaxException e) {
throw Throwables.propagate(e);
hadoopDependenciesToLoad[i++] = versionDir;
}
return hadoopDependenciesToLoad;
}
/**
* @param extension The File instance of the extension we want to load
*
* @return a URLClassLoader that loads all the jars on which the extension is dependent
*
* @throws MalformedURLException
*/
public static URLClassLoader getClassLoaderForExtension(File extension) throws MalformedURLException
{
URLClassLoader loader = loadersMap.get(extension.getName());
if (loader == null) {
final Collection<File> jars = FileUtils.listFiles(extension, new String[]{"jar"}, false);
final URL[] urls = new URL[jars.size()];
int i = 0;
for (File jar : jars) {
final URL url = jar.toURI().toURL();
log.info("added URL[%s]", url);
urls[i++] = url;
}
loader = new URLClassLoader(urls, Initialization.class.getClassLoader());
loadersMap.put(extension.getName(), loader);
}
if (log.isTraceEnabled() || log.isDebugEnabled()) {
return new DefaultTeslaAether(
config.getLocalRepository(),
remoteRepositories.toArray(new Repository[remoteRepositories.size()])
);
}
PrintStream oldOut = System.out;
try {
System.setOut(
new PrintStream(
new OutputStream()
{
@Override
public void write(int b) throws IOException
{
}
@Override
public void write(byte[] b) throws IOException
{
}
@Override
public void write(byte[] b, int off, int len) throws IOException
{
}
}
, false, StringUtils.UTF8_STRING
)
);
return new DefaultTeslaAether(
config.getLocalRepository(),
remoteRepositories.toArray(new Repository[remoteRepositories.size()])
);
}
catch (UnsupportedEncodingException e) {
// should never happen
throw new IllegalStateException(e);
}
finally {
System.setOut(oldOut);
}
return loader;
}
public static Injector makeInjectorWithModules(final Injector baseInjector, Iterable<? extends Module> modules)

View File

@ -26,6 +26,7 @@ import com.google.common.collect.Sets;
import com.google.inject.Binder;
import com.google.inject.Injector;
import com.google.inject.Key;
import com.metamx.common.ISE;
import io.druid.guice.ExtensionsConfig;
import io.druid.guice.GuiceInjectors;
import io.druid.guice.JsonConfigProvider;
@ -33,18 +34,28 @@ import io.druid.guice.annotations.Self;
import io.druid.server.DruidNode;
import org.junit.Assert;
import org.junit.FixMethodOrder;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
import org.junit.runners.MethodSorters;
import javax.annotation.Nullable;
import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.net.URLClassLoader;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.List;
import java.util.Set;
@FixMethodOrder(MethodSorters.NAME_ASCENDING)
public class InitializationTest
{
@Rule
public final TemporaryFolder temporaryFolder = new TemporaryFolder();
@Test
public void test01InitialModulesEmpty() throws Exception
{
@ -102,22 +113,7 @@ public class InitializationTest
{
Initialization.getLoadersMap().put("xyz", (URLClassLoader) Initialization.class.getClassLoader());
Collection<DruidModule> modules = Initialization.getFromExtensions(
new ExtensionsConfig()
{
@Override
public List<String> getCoordinates()
{
return ImmutableList.of("xyz");
}
@Override
public List<String> getRemoteRepositories()
{
return ImmutableList.of();
}
}, DruidModule.class
);
Collection<DruidModule> modules = Initialization.getFromExtensions(new ExtensionsConfig(), DruidModule.class);
Set<String> loadedModuleNames = Sets.newHashSet();
for (DruidModule module : modules) {
@ -149,6 +145,32 @@ public class InitializationTest
Assert.assertNotNull(injector);
}
@Test
public void test06GetClassLoaderForExtension() throws IOException
{
final File some_extension_dir = temporaryFolder.newFolder();
final File a_jar = new File(some_extension_dir, "a.jar");
final File b_jar = new File(some_extension_dir, "b.jar");
final File c_jar = new File(some_extension_dir, "c.jar");
a_jar.createNewFile();
b_jar.createNewFile();
c_jar.createNewFile();
final URLClassLoader loader = Initialization.getClassLoaderForExtension(some_extension_dir);
final URL[] expectedURLs = new URL[]{a_jar.toURI().toURL(), b_jar.toURI().toURL(), c_jar.toURI().toURL()};
final URL[] actualURLs = loader.getURLs();
Arrays.sort(
actualURLs, new Comparator<URL>()
{
@Override
public int compare(URL o1, URL o2)
{
return o1.getPath().compareTo(o2.getPath());
}
}
);
Assert.assertArrayEquals(expectedURLs, actualURLs);
}
@Test
public void testGetLoadedModules()
{
@ -162,6 +184,199 @@ public class InitializationTest
Assert.assertEquals("Set from loaded modules #2 should be same!", modules, loadedModules2);
}
@Test
public void testGetExtensionFilesToLoad_non_exist_extensions_dir()
{
Assert.assertArrayEquals(
"Non-exist root extensionsDir should return emply array of File",
new File[]{},
Initialization.getExtensionFilesToLoad(new ExtensionsConfig())
);
}
@Test(expected = ISE.class)
public void testGetExtensionFilesToLoad_wrong_type_extensions_dir() throws IOException
{
final File extensionsDir = temporaryFolder.newFile();
final ExtensionsConfig config = new ExtensionsConfig()
{
@Override
public String getDirectory()
{
return extensionsDir.getAbsolutePath();
}
};
Initialization.getExtensionFilesToLoad(config);
}
@Test
public void testGetExtensionFilesToLoad_empty_extensions_dir() throws IOException
{
final File extensionsDir = temporaryFolder.newFolder();
final ExtensionsConfig config = new ExtensionsConfig()
{
@Override
public String getDirectory()
{
return extensionsDir.getAbsolutePath();
}
};
Assert.assertArrayEquals(
"Empty root extensionsDir should return emply array of File",
new File[]{},
Initialization.getExtensionFilesToLoad(new ExtensionsConfig())
);
}
/**
* If druid.extension.load is not specified, Initialization.getExtensionFilesToLoad is supposed to return all the
* extension folders under root extensions directory.
*/
@Test
public void testGetExtensionFilesToLoad_null_load_list() throws IOException
{
final File extensionsDir = temporaryFolder.newFolder();
final ExtensionsConfig config = new ExtensionsConfig()
{
@Override
public String getDirectory()
{
return extensionsDir.getAbsolutePath();
}
};
final File mysql_metadata_storage = new File(extensionsDir, "mysql-metadata-storage");
final File druid_kafka_eight = new File(extensionsDir, "druid-kafka-eight");
mysql_metadata_storage.mkdir();
druid_kafka_eight.mkdir();
final File[] expectedFileList = new File[]{druid_kafka_eight, mysql_metadata_storage};
final File[] actualFileList = Initialization.getExtensionFilesToLoad(config);
Arrays.sort(actualFileList);
Assert.assertArrayEquals(expectedFileList, actualFileList);
}
/**
* druid.extension.load is specified, Initialization.getExtensionFilesToLoad is supposed to return all the extension
* folders appeared in the load list.
*/
@Test
public void testGetExtensionFilesToLoad_with_load_list() throws IOException
{
final File extensionsDir = temporaryFolder.newFolder();
final ExtensionsConfig config = new ExtensionsConfig()
{
@Override
public List<String> getLoadList()
{
return Arrays.asList("mysql-metadata-storage", "druid-kafka-eight");
}
@Override
public String getDirectory()
{
return extensionsDir.getAbsolutePath();
}
};
final File mysql_metadata_storage = new File(extensionsDir, "mysql-metadata-storage");
final File druid_kafka_eight = new File(extensionsDir, "druid-kafka-eight");
final File random_extension = new File(extensionsDir, "random-extensions");
mysql_metadata_storage.mkdir();
druid_kafka_eight.mkdir();
random_extension.mkdir();
final File[] expectedFileList = new File[]{druid_kafka_eight, mysql_metadata_storage};
final File[] actualFileList = Initialization.getExtensionFilesToLoad(config);
Arrays.sort(actualFileList);
Assert.assertArrayEquals(expectedFileList, actualFileList);
}
/**
* druid.extension.load is specified, but contains an extension that is not prepared under root extension directory.
* Initialization.getExtensionFilesToLoad is supposed to throw ISE.
*/
@Test(expected = ISE.class)
public void testGetExtensionFilesToLoad_with_non_exist_item_in_load_list() throws IOException
{
final File extensionsDir = temporaryFolder.newFolder();
final ExtensionsConfig config = new ExtensionsConfig()
{
@Override
public List<String> getLoadList()
{
return Arrays.asList("mysql-metadata-storage", "druid-kafka-eight");
}
@Override
public String getDirectory()
{
return extensionsDir.getAbsolutePath();
}
};
final File druid_kafka_eight = new File(extensionsDir, "druid-kafka-eight");
final File random_extension = new File(extensionsDir, "random-extensions");
druid_kafka_eight.mkdir();
random_extension.mkdir();
Initialization.getExtensionFilesToLoad(config);
}
@Test(expected = ISE.class)
public void testGetHadoopDependencyFilesToLoad_wrong_type_root_hadoop_depenencies_dir() throws IOException
{
final File rootHadoopDependenciesDir = temporaryFolder.newFile();
final ExtensionsConfig config = new ExtensionsConfig()
{
@Override
public String getHadoopDependenciesDir()
{
return rootHadoopDependenciesDir.getAbsolutePath();
}
};
Initialization.getHadoopDependencyFilesToLoad(ImmutableList.<String>of(), config);
}
@Test(expected = ISE.class)
public void testGetHadoopDependencyFilesToLoad_non_exist_version_dir() throws IOException
{
final File rootHadoopDependenciesDir = temporaryFolder.newFolder();
final ExtensionsConfig config = new ExtensionsConfig()
{
@Override
public String getHadoopDependenciesDir()
{
return rootHadoopDependenciesDir.getAbsolutePath();
}
};
final File hadoopClient = new File(rootHadoopDependenciesDir, "hadoop-client");
hadoopClient.mkdir();
Initialization.getHadoopDependencyFilesToLoad(ImmutableList.of("org.apache.hadoop:hadoop-client:2.3.0"), config);
}
@Test
public void testGetHadoopDependencyFilesToLoad_with_hadoop_coordinates() throws IOException
{
final File rootHadoopDependenciesDir = temporaryFolder.newFolder();
final ExtensionsConfig config = new ExtensionsConfig()
{
@Override
public String getHadoopDependenciesDir()
{
return rootHadoopDependenciesDir.getAbsolutePath();
}
};
final File hadoopClient = new File(rootHadoopDependenciesDir, "hadoop-client");
final File versionDir = new File(hadoopClient, "2.3.0");
hadoopClient.mkdir();
versionDir.mkdir();
final File[] expectedFileList = new File[]{versionDir};
final File[] actualFileList = Initialization.getHadoopDependencyFilesToLoad(
ImmutableList.of(
"org.apache.hadoop:hadoop-client:2.3.0"
), config
);
Assert.assertArrayEquals(expectedFileList, actualFileList);
}
public static class TestDruidModule implements DruidModule
{
@Override

View File

@ -26,7 +26,6 @@ import io.airlift.airline.Command;
import io.airlift.airline.Option;
import io.druid.guice.ExtensionsConfig;
import io.druid.initialization.Initialization;
import io.tesla.aether.internal.DefaultTeslaAether;
import java.io.File;
import java.lang.reflect.Method;
@ -76,14 +75,10 @@ public class CliHadoopIndexer implements Runnable
allCoordinates.add(DEFAULT_HADOOP_COORDINATES);
}
final DefaultTeslaAether aetherClient = Initialization.getAetherClient(extensionsConfig);
final List<URL> extensionURLs = Lists.newArrayList();
for (String coordinate : extensionsConfig.getCoordinates()) {
final ClassLoader coordinateLoader = Initialization.getClassLoaderForCoordinates(
aetherClient, coordinate, extensionsConfig.getDefaultVersion()
);
extensionURLs.addAll(Arrays.asList(((URLClassLoader) coordinateLoader).getURLs()));
for (final File extension : Initialization.getExtensionFilesToLoad(extensionsConfig)) {
final ClassLoader extensionLoader = Initialization.getClassLoaderForExtension(extension);
extensionURLs.addAll(Arrays.asList(((URLClassLoader) extensionLoader).getURLs()));
}
final List<URL> nonHadoopURLs = Lists.newArrayList();
@ -92,10 +87,8 @@ public class CliHadoopIndexer implements Runnable
final List<URL> driverURLs = Lists.newArrayList();
driverURLs.addAll(nonHadoopURLs);
// put hadoop dependencies last to avoid jets3t & apache.httpcore version conflicts
for (String coordinate : allCoordinates) {
final ClassLoader hadoopLoader = Initialization.getClassLoaderForCoordinates(
aetherClient, coordinate, extensionsConfig.getDefaultVersion()
);
for (File hadoopDependency : Initialization.getHadoopDependencyFilesToLoad(allCoordinates, extensionsConfig)) {
final ClassLoader hadoopLoader = Initialization.getClassLoaderForExtension(hadoopDependency);
driverURLs.addAll(Arrays.asList(((URLClassLoader) hadoopLoader).getURLs()));
}

View File

@ -18,58 +18,380 @@
package io.druid.cli;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.google.inject.Inject;
import com.metamx.common.ISE;
import com.metamx.common.StringUtils;
import com.metamx.common.logger.Logger;
import io.airlift.airline.Command;
import io.airlift.airline.Option;
import io.druid.guice.ExtensionsConfig;
import io.druid.indexing.common.config.TaskConfig;
import io.druid.initialization.Initialization;
import io.tesla.aether.Repository;
import io.tesla.aether.TeslaAether;
import io.tesla.aether.internal.DefaultTeslaAether;
import org.apache.commons.io.FileUtils;
import org.eclipse.aether.artifact.Artifact;
import org.eclipse.aether.artifact.DefaultArtifact;
import org.eclipse.aether.collection.CollectRequest;
import org.eclipse.aether.graph.Dependency;
import org.eclipse.aether.graph.DependencyFilter;
import org.eclipse.aether.graph.DependencyNode;
import org.eclipse.aether.resolution.DependencyRequest;
import org.eclipse.aether.util.artifact.JavaScopes;
import org.eclipse.aether.util.filter.DependencyFilterUtils;
import java.io.File;
import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintStream;
import java.io.UnsupportedEncodingException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.List;
import java.util.Set;
@Command(
name = "pull-deps",
description = "Pull down dependencies to the local repository specified by druid.extensions.localRepository"
description = "Pull down dependencies to the local repository specified by druid.extensions.localRepository, extensions directory specified by druid.extensions.extensionsDir and hadoop depenencies directory specified by druid.extensions.hadoopDependenciesDir"
)
public class PullDependencies implements Runnable
{
@Option(name = {"-c", "--coordinate"},
title = "coordinate",
description = "extra dependencies to pull down (e.g. hadoop coordinates)",
required = false)
public List<String> coordinates;
private static final Logger log = new Logger(PullDependencies.class);
@Option(name = "--no-default-hadoop",
description = "don't pull down the default HadoopIndexTask dependencies",
required = false)
public boolean noDefaultHadoop;
private static final Set<String> exclusions = Sets.newHashSet(
"io.druid",
"com.metamx.druid"
);
private TeslaAether aether;
@Inject
public ExtensionsConfig extensionsConfig = null;
public ExtensionsConfig extensionsConfig;
@Option(
name = {"-c", "--coordinate"},
title = "coordinate",
description = "Extension coordinate to pull down, followed by a maven coordinate, e.g. io.druid.extensions:mysql-metadata-storage",
required = false)
public List<String> coordinates = Lists.newArrayList();
@Option(
name = {"-h", "--hadoop-coordinate"},
title = "hadoop coordinate",
description = "Hadoop dependency to pull down, followed by a maven coordinate, e.g. org.apache.hadoop:hadoop-client:2.4.0",
required = false)
public List<String> hadoopCoordinates = Lists.newArrayList();
@Option(
name = "--no-default-hadoop",
description = "Don't pull down the default hadoop coordinate, i.e., org.apache.hadoop:hadoop-client:2.3.0. If `-h` option is supplied, then default hadoop coordinate will not be downloaded.",
required = false)
public boolean noDefaultHadoop = false;
@Option(
name = "--clean",
title = "Remove exisiting extension and hadoop dependencies directories before pulling down dependencies.",
required = false)
public boolean clean = false;
@Option(
name = {"-l", "--localRepository"},
title = "A local repostiry that Maven will use to put downloaded files. Then pull-deps will lay these files out into the extensions directory as needed.",
required = false
)
public String localRepository = String.format("%s/%s", System.getProperty("user.home"), ".m2/repository");
@Option(
name = {"-r", "--remoteRepositories"},
title = "A JSON Array list of remote repositories to load dependencies from.",
required = false
)
List<String> remoteRepositories = ImmutableList.of(
"https://repo1.maven.org/maven2/",
"https://metamx.artifactoryonline.com/metamx/pub-libs-releases-local"
);
@Option(
name = {"-d", "--defaultVersion"},
title = "Version to use for extension artifacts without version information.",
required = false
)
public String defaultVersion = PullDependencies.class.getPackage().getImplementationVersion();
public PullDependencies()
{
}
// Used for testing only
PullDependencies(TeslaAether aether, ExtensionsConfig extensionsConfig)
{
this.aether = aether;
this.extensionsConfig = extensionsConfig;
}
@Override
public void run()
{
// Druid dependencies are pulled down as a side-effect of Guice injection. Extra dependencies are pulled down as
// a side-effect of getting class loaders.
final List<String> allCoordinates = Lists.newArrayList();
if (coordinates != null) {
allCoordinates.addAll(coordinates);
if (aether == null) {
aether = getAetherClient();
}
if (!noDefaultHadoop) {
allCoordinates.addAll(TaskConfig.DEFAULT_DEFAULT_HADOOP_COORDINATES);
}
try {
final DefaultTeslaAether aetherClient = Initialization.getAetherClient(extensionsConfig);
for (final String coordinate : allCoordinates) {
Initialization.getClassLoaderForCoordinates(aetherClient, coordinate, extensionsConfig.getDefaultVersion());
final File extensionsDir = new File(extensionsConfig.getDirectory());
final File hadoopDependenciesDir = new File(extensionsConfig.getHadoopDependenciesDir());
if (clean) {
try {
FileUtils.deleteDirectory(extensionsDir);
FileUtils.deleteDirectory(hadoopDependenciesDir);
}
catch (IOException e) {
log.error("Unable to clear extension directory at [%s]", extensionsConfig.getDirectory());
throw Throwables.propagate(e);
}
}
createRootExtensionsDirectory(extensionsDir);
createRootExtensionsDirectory(hadoopDependenciesDir);
try {
log.info("Start downloading dependencies for extension coordinates: [%s]", coordinates);
for (final String coordinate : coordinates) {
final Artifact versionedArtifact = getArtifact(coordinate);
File currExtensionDir = new File(extensionsDir, versionedArtifact.getArtifactId());
createExtensionDirectory(coordinate, currExtensionDir);
downloadExtension(versionedArtifact, currExtensionDir);
}
log.info("Finish downloading dependencies for extension coordinates: [%s]", coordinates);
if (!noDefaultHadoop && hadoopCoordinates.isEmpty()) {
hadoopCoordinates.addAll(TaskConfig.DEFAULT_DEFAULT_HADOOP_COORDINATES);
}
log.info("Start downloading dependencies for hadoop extension coordinates: [%s]", hadoopCoordinates);
for (final String hadoopCoordinate : hadoopCoordinates) {
final Artifact versionedArtifact = getArtifact(hadoopCoordinate);
File currExtensionDir = new File(hadoopDependenciesDir, versionedArtifact.getArtifactId());
createExtensionDirectory(hadoopCoordinate, currExtensionDir);
// add a version folder for hadoop dependency directory
currExtensionDir = new File(currExtensionDir, versionedArtifact.getVersion());
createExtensionDirectory(hadoopCoordinate, currExtensionDir);
downloadExtension(versionedArtifact, currExtensionDir);
}
log.info("Finish downloading dependencies for hadoop extension coordinates: [%s]", hadoopCoordinates);
}
catch (Exception e) {
throw Throwables.propagate(e);
}
}
private Artifact getArtifact(String coordinate)
{
DefaultArtifact versionedArtifact;
try {
// this will throw an exception if no version is specified
versionedArtifact = new DefaultArtifact(coordinate);
}
catch (IllegalArgumentException e) {
// try appending the default version so we can specify artifacts without versions
if (defaultVersion != null) {
versionedArtifact = new DefaultArtifact(coordinate + ":" + defaultVersion);
} else {
throw e;
}
}
return versionedArtifact;
}
/**
* Download the extension given its maven coordinate
*
* @param versionedArtifact The maven artifact of the extension
* @param toLocation The location where this extension will be downloaded to
*/
private void downloadExtension(Artifact versionedArtifact, File toLocation)
{
final CollectRequest collectRequest = new CollectRequest();
collectRequest.setRoot(new Dependency(versionedArtifact, JavaScopes.RUNTIME));
final DependencyRequest dependencyRequest = new DependencyRequest(
collectRequest,
DependencyFilterUtils.andFilter(
DependencyFilterUtils.classpathFilter(JavaScopes.RUNTIME),
new DependencyFilter()
{
@Override
public boolean accept(DependencyNode node, List<DependencyNode> parents)
{
if (accept(node.getArtifact())) {
return false;
}
for (DependencyNode parent : parents) {
if (accept(parent.getArtifact())) {
return false;
}
}
return true;
}
private boolean accept(final Artifact artifact)
{
return exclusions.contains(artifact.getGroupId());
}
}
)
);
try {
log.info("Start downloading extension [%s]", versionedArtifact);
final List<Artifact> artifacts = aether.resolveArtifacts(dependencyRequest);
for (Artifact artifact : artifacts) {
if (!exclusions.contains(artifact.getGroupId())) {
log.info("Adding file [%s] at [%s]", artifact.getFile().getName(), toLocation.getAbsolutePath());
FileUtils.copyFileToDirectory(artifact.getFile(), toLocation);
} else {
log.debug("Skipped Artifact[%s]", artifact);
}
}
}
catch (Exception e) {
log.error(e, "Unable to resolve artifacts for [%s].", dependencyRequest);
throw Throwables.propagate(e);
}
log.info("Finish downloading extension [%s]", versionedArtifact);
}
private DefaultTeslaAether getAetherClient()
{
/*
DefaultTeslaAether logs a bunch of stuff to System.out, which is annoying. We choose to disable that
unless debug logging is turned on. "Disabling" it, however, is kinda bass-ackwards. We copy out a reference
to the current System.out, and set System.out to a noop output stream. Then after DefaultTeslaAether has pulled
The reference we swap things back.
This has implications for other things that are running in parallel to this. Namely, if anything else also grabs
a reference to System.out or tries to log to it while we have things adjusted like this, then they will also log
to nothingness. Fortunately, the code that calls this is single-threaded and shouldn't hopefully be running
alongside anything else that's grabbing System.out. But who knows.
*/
List<String> remoteUriList = remoteRepositories;
List<Repository> remoteRepositories = Lists.newArrayList();
for (String uri : remoteUriList) {
try {
URI u = new URI(uri);
Repository r = new Repository(uri);
if (u.getUserInfo() != null) {
String[] auth = u.getUserInfo().split(":", 2);
if (auth.length == 2) {
r.setUsername(auth[0]);
r.setPassword(auth[1]);
} else {
log.warn(
"Invalid credentials in repository URI, expecting [<user>:<password>], got [%s] for [%s]",
u.getUserInfo(),
uri
);
}
}
remoteRepositories.add(r);
}
catch (URISyntaxException e) {
throw Throwables.propagate(e);
}
}
if (log.isTraceEnabled() || log.isDebugEnabled()) {
return new DefaultTeslaAether(
localRepository,
remoteRepositories.toArray(new Repository[remoteRepositories.size()])
);
}
PrintStream oldOut = System.out;
try {
System.setOut(
new PrintStream(
new OutputStream()
{
@Override
public void write(int b) throws IOException
{
}
@Override
public void write(byte[] b) throws IOException
{
}
@Override
public void write(byte[] b, int off, int len) throws IOException
{
}
}
, false, StringUtils.UTF8_STRING
)
);
return new DefaultTeslaAether(
localRepository,
remoteRepositories.toArray(new Repository[remoteRepositories.size()])
);
}
catch (UnsupportedEncodingException e) {
// should never happen
throw new IllegalStateException(e);
}
finally {
System.setOut(oldOut);
}
}
private void createRootExtensionsDirectory(File atLocation)
{
if (!atLocation.mkdirs()) {
throw new ISE(
String.format(
"Unable to create extensions directory at [%s]",
atLocation.getAbsolutePath()
)
);
}
}
/**
* Create the extension directory for a specific maven coordinate.
* The name of this directory should be the artifactId in the coordinate
*/
private void createExtensionDirectory(String coordinate, File atLocation)
{
if (atLocation.isDirectory()) {
log.info("Directory [%s] already exists, skipping creating a directory", atLocation.getAbsolutePath());
return;
}
if (!atLocation.mkdir()) {
throw new ISE(
String.format(
"Unable to create directory at [%s] for coordinate [%s]",
atLocation.getAbsolutePath(),
coordinate
)
);
}
}
}

View File

@ -0,0 +1,222 @@
/*
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Metamarkets licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package io.druid.cli;
import com.google.api.client.repackaged.com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.metamx.common.ISE;
import io.druid.guice.ExtensionsConfig;
import io.tesla.aether.internal.DefaultTeslaAether;
import org.eclipse.aether.artifact.Artifact;
import org.eclipse.aether.artifact.DefaultArtifact;
import org.eclipse.aether.resolution.DependencyRequest;
import org.eclipse.aether.resolution.DependencyResolutionException;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
/**
*/
public class PullDependenciesTest
{
private static final String EXTENSION_A_COORDINATE = "groupX:extension_A:123";
private static final String EXTENSION_B_COORDINATE = "groupY:extension_B:456";
private static final String HADOOP_CLIENT_2_3_0_COORDINATE = "org.apache.hadoop:hadoop-client:2.3.0";
private static final String HADOOP_CLIENT_2_4_0_COORDINATE = "org.apache.hadoop:hadoop-client:2.4.0";
@Rule
public final TemporaryFolder temporaryFolder = new TemporaryFolder();
private File localRepo; // a mock local repository that stores jars
private final Artifact extension_A = new DefaultArtifact(EXTENSION_A_COORDINATE);
private final Artifact extension_B = new DefaultArtifact(EXTENSION_B_COORDINATE);
private final Artifact hadoop_client_2_3_0 = new DefaultArtifact(HADOOP_CLIENT_2_3_0_COORDINATE);
private final Artifact hadoop_client_2_4_0 = new DefaultArtifact(HADOOP_CLIENT_2_4_0_COORDINATE);
private PullDependencies pullDependencies;
private File rootExtensionsDir;
private File rootHadoopDependenciesDir;
private HashMap<Artifact, List<String>> extensionToJars; // map Artifact to its associated jars' names
@Before
public void setUp() throws Exception
{
localRepo = temporaryFolder.newFolder();
extensionToJars = new HashMap<>();
extensionToJars.put(extension_A, ImmutableList.of("a.jar", "b.jar", "c.jar"));
extensionToJars.put(extension_B, ImmutableList.of("d.jar", "e.jar"));
extensionToJars.put(hadoop_client_2_3_0, ImmutableList.of("f.jar", "g.jar"));
extensionToJars.put(hadoop_client_2_4_0, ImmutableList.of("h.jar", "i.jar"));
rootExtensionsDir = new File(temporaryFolder.getRoot(), "druid_extensions");
rootHadoopDependenciesDir = new File(temporaryFolder.getRoot(), "druid_hadoop_dependencies");
pullDependencies = new PullDependencies(
new DefaultTeslaAether()
{
@Override
public List<Artifact> resolveArtifacts(DependencyRequest request) throws DependencyResolutionException
{
return getArtifactsForExtension(request.getCollectRequest().getRoot().getArtifact());
}
},
new ExtensionsConfig()
{
@Override
public String getDirectory()
{
return rootExtensionsDir.getAbsolutePath();
}
@Override
public String getHadoopDependenciesDir()
{
return rootHadoopDependenciesDir.getAbsolutePath();
}
}
);
pullDependencies.coordinates = ImmutableList.of(EXTENSION_A_COORDINATE, EXTENSION_B_COORDINATE);
pullDependencies.hadoopCoordinates = ImmutableList.of(
HADOOP_CLIENT_2_3_0_COORDINATE,
HADOOP_CLIENT_2_4_0_COORDINATE
);
}
private List<Artifact> getArtifactsForExtension(Artifact artifact)
{
final List<String> jarNames = extensionToJars.get(artifact);
final List<Artifact> artifacts = Lists.newArrayList();
for (String jarName : jarNames) {
final File jarFile = new File(localRepo, jarName);
try {
jarFile.createNewFile();
}
catch (IOException e) {
Throwables.propagate(e);
}
artifacts.add(new DefaultArtifact(null, jarName, null, "jar", "1.0", null, jarFile));
}
return artifacts;
}
private File[] getExpectedJarFiles(Artifact artifact)
{
final String artifactId = artifact.getArtifactId();
final List<String> jarNames = extensionToJars.get(artifact);
final File[] expectedJars = new File[jarNames.size()];
if (artifactId.equals("hadoop-client")) {
final String version = artifact.getVersion();
for (int i = 0; i < jarNames.size(); ++i) {
expectedJars[i] = new File(
String.format(
"%s/%s/%s/%s",
rootHadoopDependenciesDir,
artifactId,
version,
jarNames.get(i)
)
);
}
} else {
for (int i = 0; i < jarNames.size(); ++i) {
expectedJars[i] = new File(String.format("%s/%s/%s", rootExtensionsDir, artifactId, jarNames.get(i)));
}
}
return expectedJars;
}
/**
* If --clean is not specified and something already exists at druid.extensions.directory, ISE should be thrown
*/
@Test(expected = ISE.class)
public void testPullDependencies_root_extension_dir_exists()
{
rootExtensionsDir.mkdir();
pullDependencies.run();
}
/**
* If --clean is not specified and something already exists at druid.extensions.hadoopDependenciesDir,
* ISE should be thrown
*/
@Test(expected = ISE.class)
public void testPullDependencies_root_hadoop_dependencies_dir_exists()
{
rootHadoopDependenciesDir.mkdir();
pullDependencies.run();
}
@Test
public void testPullDependencies()
{
rootExtensionsDir.mkdir();
rootHadoopDependenciesDir.mkdir();
// Because --clean is specified, pull-deps will first remove existing root extensions and hadoop dependencies
pullDependencies.clean = true;
pullDependencies.run();
final File[] actualExtensions = rootExtensionsDir.listFiles();
Arrays.sort(actualExtensions);
Assert.assertEquals(2, actualExtensions.length);
Assert.assertEquals(extension_A.getArtifactId(), actualExtensions[0].getName());
Assert.assertEquals(extension_B.getArtifactId(), actualExtensions[1].getName());
final File[] jarsUnderExtensionA = actualExtensions[0].listFiles();
Arrays.sort(jarsUnderExtensionA);
Assert.assertArrayEquals(getExpectedJarFiles(extension_A), jarsUnderExtensionA);
final File[] jarsUnderExtensionB = actualExtensions[1].listFiles();
Arrays.sort(jarsUnderExtensionB);
Assert.assertArrayEquals(getExpectedJarFiles(extension_B), jarsUnderExtensionB);
final File[] actualHadoopDependencies = rootHadoopDependenciesDir.listFiles();
Arrays.sort(actualHadoopDependencies);
Assert.assertEquals(1, actualHadoopDependencies.length);
Assert.assertEquals(hadoop_client_2_3_0.getArtifactId(), actualHadoopDependencies[0].getName());
final File[] versionDirsUnderHadoopClient = actualHadoopDependencies[0].listFiles();
Assert.assertEquals(2, versionDirsUnderHadoopClient.length);
Arrays.sort(versionDirsUnderHadoopClient);
Assert.assertEquals(hadoop_client_2_3_0.getVersion(), versionDirsUnderHadoopClient[0].getName());
Assert.assertEquals(hadoop_client_2_4_0.getVersion(), versionDirsUnderHadoopClient[1].getName());
final File[] jarsUnder2_3_0 = versionDirsUnderHadoopClient[0].listFiles();
Arrays.sort(jarsUnder2_3_0);
Assert.assertArrayEquals(getExpectedJarFiles(hadoop_client_2_3_0), jarsUnder2_3_0);
final File[] jarsUnder2_4_0 = versionDirsUnderHadoopClient[1].listFiles();
Arrays.sort(jarsUnder2_4_0);
Assert.assertArrayEquals(getExpectedJarFiles(hadoop_client_2_4_0), jarsUnder2_4_0);
}
}