diff --git a/.travis.yml b/.travis.yml
index bcf50159b79..de38a02fb58 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -139,6 +139,28 @@ jobs:
" && false; }
+ - name: "analyze hadoop 3 dependencies"
+ script: |-
+ MAVEN_OPTS='-Xmx3000m' ${MVN} ${MAVEN_SKIP} dependency:analyze -DoutputXML=true -DignoreNonCompile=true -DfailOnWarning=true -Phadoop3 || { echo "
+
+ The dependency analysis has found a dependency that is either:
+
+ 1) Used and undeclared: These are available as a transitive dependency but should be explicitly
+ added to the POM to ensure the dependency version. The XML to add the dependencies to the POM is
+ shown above.
+
+ 2) Unused and declared: These are not needed and removing them from the POM will speed up the build
+ and reduce the artifact size. The dependencies to remove are shown above.
+
+ If there are false positive dependency analysis warnings, they can be suppressed:
+ https://maven.apache.org/plugins/maven-dependency-plugin/analyze-mojo.html#usedDependencies
+ https://maven.apache.org/plugins/maven-dependency-plugin/examples/exclude-dependencies-from-dependency-analysis.html
+
+ For more information, refer to:
+ https://maven.apache.org/plugins/maven-dependency-plugin/analyze-mojo.html
+
+ " && false; }
+
- name: "intellij inspections"
script: >
./check_test_suite.py && travis_terminate 0 || docker run --rm
diff --git a/distribution/bin/check-licenses.py b/distribution/bin/check-licenses.py
index 50151b2a4a6..8af9e59226e 100755
--- a/distribution/bin/check-licenses.py
+++ b/distribution/bin/check-licenses.py
@@ -233,6 +233,7 @@ def build_compatible_license_names():
compatible_licenses['BSD-2-Clause License'] = 'BSD-2-Clause License'
compatible_licenses['BSD-2-Clause'] = 'BSD-2-Clause License'
+ compatible_licenses['BSD 2-Clause license'] = 'BSD 2-Clause License'
compatible_licenses['BSD-3-Clause License'] = 'BSD-3-Clause License'
compatible_licenses['New BSD license'] = 'BSD-3-Clause License'
diff --git a/distribution/pom.xml b/distribution/pom.xml
index 6b43e321246..ad6fd650270 100644
--- a/distribution/pom.xml
+++ b/distribution/pom.xml
@@ -288,6 +288,186 @@
+
+ dist-hadoop3
+
+ false
+
+
+
+
+ org.codehaus.mojo
+ exec-maven-plugin
+
+
+ generate-readme
+ initialize
+
+ exec
+
+
+ ${project.basedir}/bin/build-textfile-readme.sh
+
+ ${project.basedir}/../
+ ${project.parent.version}
+
+
+
+
+ generate-binary-license
+ initialize
+
+ exec
+
+
+ ${project.basedir}/bin/generate-binary-license.py
+
+ ${project.parent.basedir}/licenses/APACHE2
+ ${project.parent.basedir}/licenses.yaml
+ ${project.parent.basedir}/LICENSE.BINARY
+
+
+
+
+ generate-binary-notice
+ initialize
+
+ exec
+
+
+ ${project.basedir}/bin/generate-binary-notice.py
+
+ ${project.parent.basedir}/NOTICE
+ ${project.parent.basedir}/licenses.yaml
+ ${project.parent.basedir}/NOTICE.BINARY
+
+
+
+
+ pull-deps
+ package
+
+ exec
+
+
+ java
+
+ -classpath
+
+ -Ddruid.extensions.loadList=[]
+ -Ddruid.extensions.directory=${project.build.directory}/extensions
+
+
+ -Ddruid.extensions.hadoopDependenciesDir=${project.build.directory}/hadoop-dependencies
+
+ -Dhadoop3.enabled=true
+ org.apache.druid.cli.Main
+ tools
+ pull-deps
+ --clean
+ --defaultVersion
+ ${project.parent.version}
+ -l
+ ${settings.localRepository}
+ -h
+ org.apache.hadoop:hadoop-client-api:${hadoop.compile.version}
+ -h
+ org.apache.hadoop:hadoop-client-runtime:${hadoop.compile.version}
+ -c
+ org.apache.druid.extensions:druid-avro-extensions
+ -c
+ org.apache.druid.extensions:druid-azure-extensions
+ -c
+ org.apache.druid.extensions:druid-bloom-filter
+ -c
+ org.apache.druid.extensions:druid-datasketches
+ -c
+ org.apache.druid.extensions:druid-hdfs-storage
+ -c
+ org.apache.druid.extensions:druid-histogram
+ -c
+ org.apache.druid.extensions:druid-kafka-extraction-namespace
+ -c
+ org.apache.druid.extensions:druid-kafka-indexing-service
+ -c
+ org.apache.druid.extensions:druid-kinesis-indexing-service
+ -c
+ org.apache.druid.extensions:druid-lookups-cached-global
+ -c
+ org.apache.druid.extensions:druid-lookups-cached-single
+ -c
+ org.apache.druid.extensions:druid-protobuf-extensions
+ -c
+ org.apache.druid.extensions:mysql-metadata-storage
+ -c
+ org.apache.druid.extensions:druid-orc-extensions
+ -c
+ org.apache.druid.extensions:druid-parquet-extensions
+ -c
+ org.apache.druid.extensions:postgresql-metadata-storage
+ -c
+ org.apache.druid.extensions:druid-kerberos
+ -c
+ org.apache.druid.extensions:druid-s3-extensions
+ -c
+ org.apache.druid.extensions:druid-aws-rds-extensions
+ -c
+ org.apache.druid.extensions:druid-ec2-extensions
+ -c
+ org.apache.druid.extensions:druid-google-extensions
+ -c
+ org.apache.druid.extensions:druid-stats
+ -c
+ org.apache.druid.extensions:simple-client-sslcontext
+ -c
+ org.apache.druid.extensions:druid-basic-security
+ -c
+ org.apache.druid.extensions:druid-pac4j
+ -c
+ org.apache.druid.extensions:druid-ranger-security
+ -c
+ org.apache.druid.extensions:druid-kubernetes-extensions
+ ${druid.distribution.pulldeps.opts}
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-assembly-plugin
+
+
+ distro-assembly
+ package
+
+ single
+
+
+ apache-druid-${project.parent.version}
+ posix
+
+ src/assembly/assembly.xml
+
+
+
+
+
+
+ org.codehaus.mojo
+ license-maven-plugin
+
+
+ download-licenses
+
+ download-licenses
+
+
+
+
+
+
+
apache-release
diff --git a/docs/development/build.md b/docs/development/build.md
index b59dddcbf67..1b2dc475fb2 100644
--- a/docs/development/build.md
+++ b/docs/development/build.md
@@ -71,6 +71,25 @@ Putting these together, if you wish to build the source and binary distributions
```bash
mvn clean install -Papache-release,dist,rat -DskipTests
```
+
+### Building hadoop 3 distribution
+
+By default, druid ships hadoop 2.x.x jars along with the distribution. Exact version can be found in the
+main [pom](https://github.com/apache/druid/blob/master/pom.xml). To build druid with hadoop 3.x.x jars, hadoop3 profile
+needs to be activated.
+
+To generate build with hadoop 3 dependencies, run:
+
+```bash
+mvn clean install -Phadoop3
+```
+
+To generate distribution with hadoop3 dependencies, run :
+
+```bash
+mvn clean install -Papache-release,dist-hadoop3,rat,hadoop3 -DskipTests
+```
+
#### Potential issues
##### Missing `pyyaml`
diff --git a/examples/quickstart/tutorial/hadoop3/docker/Dockerfile b/examples/quickstart/tutorial/hadoop3/docker/Dockerfile
new file mode 100644
index 00000000000..482c72b698e
--- /dev/null
+++ b/examples/quickstart/tutorial/hadoop3/docker/Dockerfile
@@ -0,0 +1,133 @@
+# Based on the SequenceIQ hadoop-docker project hosted at
+# https://github.com/sequenceiq/hadoop-docker, and modified at
+# the Apache Software Foundation (ASF).
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Creates pseudo distributed hadoop 3.3.1 with java 8
+FROM centos:7
+
+USER root
+
+# install dev tools
+RUN yum clean all \
+ && rpm --rebuilddb \
+ && yum install -y curl which tar sudo openssh-server openssh-clients rsync yum-plugin-ovl\
+ && yum clean all \
+ && yum update -y libselinux \
+ && yum update -y nss \
+ && yum clean all
+# update libselinux. see https://github.com/sequenceiq/hadoop-docker/issues/14
+# update nss. see https://unix.stackexchange.com/questions/280548/curl-doesnt-connect-to-https-while-wget-does-nss-error-12286
+
+# passwordless ssh
+RUN ssh-keygen -q -N "" -t dsa -f /etc/ssh/ssh_host_dsa_key
+RUN ssh-keygen -q -N "" -t rsa -f /etc/ssh/ssh_host_rsa_key
+RUN ssh-keygen -q -N "" -t rsa -f /root/.ssh/id_rsa
+RUN cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys
+
+# zulu java 8
+COPY ../../hadoop/docker/setup-zulu-repo.sh /root/setup-zulu-repo.sh
+RUN /root/setup-zulu-repo.sh
+RUN yum install -y zulu-8
+
+ENV JAVA_HOME /usr/lib/jvm/zulu-8
+ENV PATH $PATH:$JAVA_HOME/bin
+
+# hadoop
+ARG APACHE_ARCHIVE_MIRROR_HOST=https://archive.apache.org
+RUN curl -s ${APACHE_ARCHIVE_MIRROR_HOST}/dist/hadoop/core/hadoop-3.3.1/hadoop-3.3.1.tar.gz | tar -xz -C /usr/local/
+RUN cd /usr/local && ln -s ./hadoop-3.3.1 hadoop
+
+ENV HADOOP_HOME /usr/local/hadoop
+ENV HADOOP_COMMON_HOME /usr/local/hadoop
+ENV HADOOP_HDFS_HOME /usr/local/hadoop
+ENV HADOOP_MAPRED_HOME /usr/local/hadoop
+ENV HADOOP_YARN_HOME /usr/local/hadoop
+ENV HADOOP_CONF_DIR /usr/local/hadoop/etc/hadoop
+ENV YARN_CONF_DIR $HADOOP_HOME/etc/hadoop
+
+# in hadoop 3 the example file is nearly empty so we can just append stuff
+RUN sed -i '$ a export JAVA_HOME=/usr/lib/jvm/zulu-8' $HADOOP_HOME/etc/hadoop/hadoop-env.sh
+RUN sed -i '$ a export HADOOP_HOME=/usr/local/hadoop' $HADOOP_HOME/etc/hadoop/hadoop-env.sh
+RUN sed -i '$ a export HADOOP_CONF_DIR=/usr/local/hadoop/etc/hadoop/' $HADOOP_HOME/etc/hadoop/hadoop-env.sh
+RUN sed -i '$ a export HDFS_NAMENODE_USER=root' $HADOOP_HOME/etc/hadoop/hadoop-env.sh
+RUN sed -i '$ a export HDFS_DATANODE_USER=root' $HADOOP_HOME/etc/hadoop/hadoop-env.sh
+RUN sed -i '$ a export HDFS_SECONDARYNAMENODE_USER=root' $HADOOP_HOME/etc/hadoop/hadoop-env.sh
+RUN sed -i '$ a export YARN_RESOURCEMANAGER_USER=root' $HADOOP_HOME/etc/hadoop/hadoop-env.sh
+RUN sed -i '$ a export YARN_NODEMANAGER_USER=root' $HADOOP_HOME/etc/hadoop/hadoop-env.sh
+
+RUN cat $HADOOP_HOME/etc/hadoop/hadoop-env.sh
+
+RUN mkdir $HADOOP_HOME/input
+RUN cp $HADOOP_HOME/etc/hadoop/*.xml $HADOOP_HOME/input
+
+# pseudo distributed
+ADD ../../hadoop/docker/core-site.xml.template $HADOOP_HOME/etc/hadoop/core-site.xml.template
+RUN sed s/HOSTNAME/localhost/ /usr/local/hadoop/etc/hadoop/core-site.xml.template > /usr/local/hadoop/etc/hadoop/core-site.xml
+ADD ../../hadoop/docker/hdfs-site.xml $HADOOP_HOME/etc/hadoop/hdfs-site.xml
+ADD ../../hadoop/docker/mapred-site.xml $HADOOP_HOME/etc/hadoop/mapred-site.xml
+ADD ../../hadoop/docker/yarn-site.xml $HADOOP_HOME/etc/hadoop/yarn-site.xml
+
+RUN $HADOOP_HOME/bin/hdfs namenode -format
+
+ADD ../../hadoop/docker/ssh_config /root/.ssh/config
+RUN chmod 600 /root/.ssh/config
+RUN chown root:root /root/.ssh/config
+
+# # installing supervisord
+# RUN yum install -y python-setuptools
+# RUN easy_install pip
+# RUN curl https://bitbucket.org/pypa/setuptools/raw/bootstrap/ez_setup.py -o - | python
+# RUN pip install supervisor
+#
+# ADD supervisord.conf /etc/supervisord.conf
+
+ADD bootstrap.sh /etc/bootstrap.sh
+RUN chown root:root /etc/bootstrap.sh
+RUN chmod 700 /etc/bootstrap.sh
+
+ENV BOOTSTRAP /etc/bootstrap.sh
+
+# workingaround docker.io build error
+RUN ls -la /usr/local/hadoop/etc/hadoop/*-env.sh
+RUN chmod +x /usr/local/hadoop/etc/hadoop/*-env.sh
+RUN ls -la /usr/local/hadoop/etc/hadoop/*-env.sh
+
+# Copy additional .jars to classpath
+RUN cp /usr/local/hadoop/share/hadoop/tools/lib/*.jar /usr/local/hadoop/share/hadoop/common/lib/
+
+# fix the 254 error code
+RUN sed -i "/^[^#]*UsePAM/ s/.*/#&/" /etc/ssh/sshd_config
+RUN echo "UsePAM no" >> /etc/ssh/sshd_config
+RUN echo "Port 2122" >> /etc/ssh/sshd_config
+
+# script for plain sshd start
+RUN echo -e \
+ '#!/bin/bash\n/usr/sbin/sshd\ntimeout 10 bash -c "until printf \"\" 2>>/dev/null >>/dev/tcp/127.0.0.1/2122; do sleep 0.5; done"' > \
+ /usr/local/bin/start_sshd && \
+ chmod a+x /usr/local/bin/start_sshd
+
+RUN start_sshd && $HADOOP_HOME/etc/hadoop/hadoop-env.sh && $HADOOP_HOME/sbin/start-dfs.sh
+RUN start_sshd && $HADOOP_HOME/etc/hadoop/hadoop-env.sh && $HADOOP_HOME/sbin/start-dfs.sh
+
+CMD ["/etc/bootstrap.sh", "-d"]
+
+# Hdfs ports
+EXPOSE 8020 9000 9820 9864 9865 9866 9867 9868 9869 9870 9871 50010 50020 50070 50075 50090
+# Mapred ports
+EXPOSE 10020 19888
+#Yarn ports
+EXPOSE 8030 8031 8032 8033 8040 8042 8088
+#Other ports
+EXPOSE 2122 49707
\ No newline at end of file
diff --git a/examples/quickstart/tutorial/hadoop3/docker/bootstrap.sh b/examples/quickstart/tutorial/hadoop3/docker/bootstrap.sh
new file mode 100644
index 00000000000..d1fa493d4ea
--- /dev/null
+++ b/examples/quickstart/tutorial/hadoop3/docker/bootstrap.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+: ${HADOOP_HOME:=/usr/local/hadoop}
+
+$HADOOP_HOME/etc/hadoop/hadoop-env.sh
+
+rm /tmp/*.pid
+# installing libraries if any - (resource urls added comma separated to the ACP system variable)
+cd $HADOOP_HOME/share/hadoop/common ; for cp in ${ACP//,/ }; do echo == $cp; curl -LO $cp ; done; cd -
+
+# altering the core-site configuration
+sed s/HOSTNAME/$HOSTNAME/ /usr/local/hadoop/etc/hadoop/core-site.xml.template > /usr/local/hadoop/etc/hadoop/core-site.xml
+
+
+start_sshd
+$HADOOP_HOME/sbin/start-dfs.sh
+$HADOOP_HOME/sbin/start-yarn.sh
+$HADOOP_HOME/sbin/mr-jobhistory-daemon.sh start historyserver
+
+if [[ $1 == "-d" ]]; then
+ while true; do sleep 1000; done
+fi
+
+if [[ $1 == "-bash" ]]; then
+ /bin/bash
+fi
diff --git a/examples/quickstart/tutorial/wikipedia-index-hadoop3.json b/examples/quickstart/tutorial/wikipedia-index-hadoop3.json
new file mode 100644
index 00000000000..28db64e3268
--- /dev/null
+++ b/examples/quickstart/tutorial/wikipedia-index-hadoop3.json
@@ -0,0 +1,80 @@
+{
+ "type" : "index_hadoop",
+ "spec" : {
+ "dataSchema" : {
+ "dataSource" : "wikipedia",
+ "parser" : {
+ "type" : "hadoopyString",
+ "parseSpec" : {
+ "format" : "json",
+ "dimensionsSpec" : {
+ "dimensions" : [
+ "channel",
+ "cityName",
+ "comment",
+ "countryIsoCode",
+ "countryName",
+ "isAnonymous",
+ "isMinor",
+ "isNew",
+ "isRobot",
+ "isUnpatrolled",
+ "metroCode",
+ "namespace",
+ "page",
+ "regionIsoCode",
+ "regionName",
+ "user",
+ { "name": "added", "type": "long" },
+ { "name": "deleted", "type": "long" },
+ { "name": "delta", "type": "long" }
+ ]
+ },
+ "timestampSpec" : {
+ "format" : "auto",
+ "column" : "time"
+ }
+ }
+ },
+ "metricsSpec" : [],
+ "granularitySpec" : {
+ "type" : "uniform",
+ "segmentGranularity" : "day",
+ "queryGranularity" : "none",
+ "intervals" : ["2015-09-12/2015-09-13"],
+ "rollup" : false
+ }
+ },
+ "ioConfig" : {
+ "type" : "hadoop",
+ "inputSpec" : {
+ "type" : "static",
+ "paths" : "/quickstart/wikiticker-2015-09-12-sampled.json.gz"
+ }
+ },
+ "tuningConfig" : {
+ "type" : "hadoop",
+ "partitionsSpec" : {
+ "type" : "hashed",
+ "targetPartitionSize" : 5000000
+ },
+ "forceExtendableShardSpecs" : true,
+ "jobProperties" : {
+ "fs.default.name" : "hdfs://druid-hadoop-demo:9000",
+ "fs.defaultFS" : "hdfs://druid-hadoop-demo:9000",
+ "dfs.datanode.address" : "druid-hadoop-demo",
+ "dfs.client.use.datanode.hostname" : "true",
+ "dfs.datanode.use.datanode.hostname" : "true",
+ "yarn.resourcemanager.hostname" : "druid-hadoop-demo",
+ "yarn.nodemanager.vmem-check-enabled" : "false",
+ "mapreduce.map.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8",
+ "mapreduce.job.user.classpath.first" : "true",
+ "mapreduce.reduce.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8",
+ "mapreduce.map.memory.mb" : 1024,
+ "mapreduce.reduce.memory.mb" : 1024,
+ "mapreduce.job.classloader" : "true"
+ }
+ }
+ },
+ "hadoopDependencyCoordinates": ["org.apache.hadoop:hadoop-client-api:3.3.1","org.apache.hadoop:hadoop-client-runtime:3.3.1"]
+}
diff --git a/extensions-contrib/thrift-extensions/pom.xml b/extensions-contrib/thrift-extensions/pom.xml
index 9b83cc3dd43..68af02e4061 100644
--- a/extensions-contrib/thrift-extensions/pom.xml
+++ b/extensions-contrib/thrift-extensions/pom.xml
@@ -71,11 +71,6 @@
${project.parent.version}
provided
-
- org.apache.hadoop
- hadoop-client
- provided
-
com.twitter.elephantbird
elephant-bird-core
@@ -115,11 +110,6 @@
guice
provided
-
- org.apache.hadoop
- hadoop-common
- provided
-
com.fasterxml.jackson.core
jackson-databind
@@ -142,7 +132,56 @@
test
-
+
+
+ hadoop2
+
+ true
+
+
+
+ org.apache.hadoop
+ hadoop-common
+ provided
+
+
+ org.apache.hadoop
+ hadoop-client
+ ${hadoop.compile.version}
+ provided
+
+
+ org.apache.avro
+ avro
+
+
+
+
+
+
+ hadoop3
+
+
+ hadoop3.enabled
+ true
+
+
+
+
+ org.apache.hadoop
+ hadoop-client-api
+ ${hadoop.compile.version}
+ provided
+
+
+ org.apache.hadoop
+ hadoop-client-runtime
+ ${hadoop.compile.version}
+ test
+
+
+
+
diff --git a/extensions-core/avro-extensions/pom.xml b/extensions-core/avro-extensions/pom.xml
index 292f6377234..403c0944317 100644
--- a/extensions-core/avro-extensions/pom.xml
+++ b/extensions-core/avro-extensions/pom.xml
@@ -192,10 +192,6 @@
-
- org.apache.hadoop
- hadoop-client
-
com.google.code.findbugs
jsr305
@@ -211,28 +207,6 @@
jackson-annotations
provided
-
- org.apache.hadoop
- hadoop-mapreduce-client-core
- provided
-
-
- org.slf4j
- slf4j-api
-
-
-
-
- org.apache.hadoop
- hadoop-common
- provided
-
-
- org.slf4j
- slf4j-api
-
-
-
com.google.inject
guice
@@ -287,7 +261,78 @@
test
-
+
+
+ hadoop2
+
+ true
+
+
+
+ org.apache.hadoop
+ hadoop-client
+ ${hadoop.compile.version}
+ provided
+
+
+ org.apache.avro
+ avro
+
+
+
+
+ org.apache.hadoop
+ hadoop-mapreduce-client-core
+ provided
+
+
+ org.slf4j
+ slf4j-api
+
+
+
+
+ org.apache.hadoop
+ hadoop-common
+ provided
+
+
+ org.slf4j
+ slf4j-api
+
+
+
+
+
+
+ hadoop3
+
+
+ hadoop3.enabled
+ true
+
+
+
+
+ org.apache.hadoop
+ hadoop-client-api
+ ${hadoop.compile.version}
+ provided
+
+
+ com.sun.jersey
+ jersey-core
+ test
+
+
+ org.apache.hadoop
+ hadoop-client-runtime
+ ${hadoop.compile.version}
+ test
+
+
+
+
diff --git a/extensions-core/druid-kerberos/pom.xml b/extensions-core/druid-kerberos/pom.xml
index af0d3b7201b..351f1f697ee 100644
--- a/extensions-core/druid-kerberos/pom.xml
+++ b/extensions-core/druid-kerberos/pom.xml
@@ -283,9 +283,12 @@
jetty-client
provided
+
org.apache.hadoop
hadoop-auth
+ ${hadoop.compile.version}
com.nimbusds
diff --git a/extensions-core/druid-ranger-security/pom.xml b/extensions-core/druid-ranger-security/pom.xml
index d4c3089782e..8022c1cb93f 100644
--- a/extensions-core/druid-ranger-security/pom.xml
+++ b/extensions-core/druid-ranger-security/pom.xml
@@ -128,11 +128,23 @@
ranger-plugins-common
${apache.ranger.version}
compile
+
+
+ org.apache.hadoop
+ hadoop-common
+
+
org.apache.ranger
ranger-plugins-audit
${apache.ranger.version}
+
+
+ org.apache.hadoop
+ hadoop-common
+
+
compile
@@ -141,223 +153,6 @@
${apache.ranger.gson.version}
compile
-
- org.apache.hadoop
- hadoop-client
- runtime
-
-
- commons-cli
- commons-cli
-
-
- log4j
- log4j
-
-
- commons-codec
- commons-codec
-
-
- commons-logging
- commons-logging
-
-
- commons-io
- commons-io
-
-
- commons-lang
- commons-lang
-
-
- org.apache.httpcomponents
- httpclient
-
-
- org.apache.httpcomponents
- httpcore
-
-
- org.apache.zookeeper
- zookeeper
-
-
- org.slf4j
- slf4j-api
-
-
- org.slf4j
- slf4j-log4j12
-
-
- javax.ws.rs
- jsr311-api
-
-
- com.google.code.findbugs
- jsr305
-
-
- org.mortbay.jetty
- jetty-util
-
-
- org.apache.hadoop
- hadoop-annotations
-
-
- javax.activation
- activation
-
-
- com.google.protobuf
- protobuf-java
-
-
- com.sun.jersey
- jersey-core
-
-
- org.apache.curator
- curator-client
-
-
- org.apache.curator
- curator-framework
-
-
- org.apache.curator
- curator-recipes
-
-
- org.apache.commons
- commons-math3
-
-
- com.google.guava
- guava
-
-
-
- commons-beanutils
- commons-beanutils-core
-
-
-
-
- org.apache.hadoop
- hadoop-common
- ${hadoop.compile.version}
- compile
-
-
- commons-cli
- commons-cli
-
-
- log4j
- log4j
-
-
- commons-codec
- commons-codec
-
-
- commons-logging
- commons-logging
-
-
- commons-io
- commons-io
-
-
- commons-lang
- commons-lang
-
-
- org.apache.httpcomponents
- httpclient
-
-
- org.apache.httpcomponents
- httpcore
-
-
- org.apache.zookeeper
- zookeeper
-
-
- org.slf4j
- slf4j-api
-
-
- org.slf4j
- slf4j-log4j12
-
-
- javax.ws.rs
- jsr311-api
-
-
- com.google.code.findbugs
- jsr305
-
-
- org.mortbay.jetty
- jetty-util
-
-
- com.google.protobuf
- protobuf-java
-
-
- com.sun.jersey
- jersey-core
-
-
- org.apache.curator
- curator-client
-
-
- org.apache.commons
- commons-math3
-
-
- com.google.guava
- guava
-
-
- org.apache.avro
- avro
-
-
- net.java.dev.jets3t
- jets3t
-
-
- com.sun.jersey
- jersey-json
-
-
- com.jcraft
- jsch
-
-
- org.mortbay.jetty
- jetty
-
-
- com.sun.jersey
- jersey-server
-
-
-
- commons-beanutils
- commons-beanutils-core
-
-
-
@@ -378,6 +173,260 @@
test
+
+
+ hadoop2
+
+ true
+
+
+
+ org.apache.hadoop
+ hadoop-client
+ ${hadoop.compile.version}
+ runtime
+
+
+ org.apache.avro
+ avro
+
+
+ commons-cli
+ commons-cli
+
+
+ log4j
+ log4j
+
+
+ commons-codec
+ commons-codec
+
+
+ commons-logging
+ commons-logging
+
+
+ commons-io
+ commons-io
+
+
+ commons-lang
+ commons-lang
+
+
+ org.apache.httpcomponents
+ httpclient
+
+
+ org.apache.httpcomponents
+ httpcore
+
+
+ org.apache.zookeeper
+ zookeeper
+
+
+ org.slf4j
+ slf4j-api
+
+
+ org.slf4j
+ slf4j-log4j12
+
+
+ javax.ws.rs
+ jsr311-api
+
+
+ com.google.code.findbugs
+ jsr305
+
+
+ org.mortbay.jetty
+ jetty-util
+
+
+ org.apache.hadoop
+ hadoop-annotations
+
+
+ javax.activation
+ activation
+
+
+ com.google.protobuf
+ protobuf-java
+
+
+ com.sun.jersey
+ jersey-core
+
+
+ org.apache.curator
+ curator-client
+
+
+ org.apache.curator
+ curator-framework
+
+
+ org.apache.curator
+ curator-recipes
+
+
+ org.apache.commons
+ commons-math3
+
+
+ com.google.guava
+ guava
+
+
+
+ commons-beanutils
+ commons-beanutils-core
+
+
+
+
+ org.apache.hadoop
+ hadoop-common
+ compile
+
+
+ commons-cli
+ commons-cli
+
+
+ log4j
+ log4j
+
+
+ commons-codec
+ commons-codec
+
+
+ commons-logging
+ commons-logging
+
+
+ commons-io
+ commons-io
+
+
+ commons-lang
+ commons-lang
+
+
+ org.apache.httpcomponents
+ httpclient
+
+
+ org.apache.httpcomponents
+ httpcore
+
+
+ org.apache.zookeeper
+ zookeeper
+
+
+ org.slf4j
+ slf4j-api
+
+
+ org.slf4j
+ slf4j-log4j12
+
+
+ javax.ws.rs
+ jsr311-api
+
+
+ com.google.code.findbugs
+ jsr305
+
+
+ org.mortbay.jetty
+ jetty-util
+
+
+ com.google.protobuf
+ protobuf-java
+
+
+ com.sun.jersey
+ jersey-core
+
+
+ org.apache.curator
+ curator-client
+
+
+ org.apache.commons
+ commons-math3
+
+
+ com.google.guava
+ guava
+
+
+ org.apache.avro
+ avro
+
+
+ net.java.dev.jets3t
+ jets3t
+
+
+ com.sun.jersey
+ jersey-json
+
+
+ com.jcraft
+ jsch
+
+
+ org.mortbay.jetty
+ jetty
+
+
+ com.sun.jersey
+ jersey-server
+
+
+
+ commons-beanutils
+ commons-beanutils-core
+
+
+
+
+
+
+ hadoop3
+
+
+ hadoop3.enabled
+ true
+
+
+
+
+ org.apache.hadoop
+ hadoop-client-api
+ ${hadoop.compile.version}
+ compile
+
+
+ org.apache.hadoop
+ hadoop-client-runtime
+ ${hadoop.compile.version}
+ test
+
+
+
+
diff --git a/extensions-core/hdfs-storage/pom.xml b/extensions-core/hdfs-storage/pom.xml
index 908a1707a4a..4985c5e3bea 100644
--- a/extensions-core/hdfs-storage/pom.xml
+++ b/extensions-core/hdfs-storage/pom.xml
@@ -41,113 +41,230 @@
provided
- org.apache.hadoop
- hadoop-client
- runtime
-
-
- commons-cli
- commons-cli
-
-
- log4j
- log4j
-
-
- commons-codec
- commons-codec
-
-
- commons-logging
- commons-logging
-
-
- commons-io
- commons-io
-
-
- commons-lang
- commons-lang
-
-
- org.apache.httpcomponents
- httpclient
-
-
- org.apache.httpcomponents
- httpcore
-
-
- org.apache.zookeeper
- zookeeper
-
-
- org.slf4j
- slf4j-api
-
-
- org.slf4j
- slf4j-log4j12
-
-
- javax.ws.rs
- jsr311-api
-
-
- com.google.code.findbugs
- jsr305
-
-
- org.mortbay.jetty
- jetty-util
-
-
- org.apache.hadoop
- hadoop-annotations
-
-
- javax.activation
- activation
-
-
- com.google.protobuf
- protobuf-java
-
-
- com.sun.jersey
- jersey-core
-
-
- org.apache.curator
- curator-client
-
-
- org.apache.curator
- curator-framework
-
-
- org.apache.curator
- curator-recipes
-
-
- org.apache.commons
- commons-math3
-
-
- com.google.guava
- guava
-
-
-
- commons-beanutils
- commons-beanutils-core
-
-
+ org.apache.hadoop
+ hadoop-aws
+ ${hadoop.compile.version}
+ runtime
+
+
+ commons-io
+ commons-io
+ provided
+
+
+ com.google.code.findbugs
+ jsr305
+ provided
+
+
+ com.fasterxml.jackson.core
+ jackson-annotations
+ provided
+
+
+ joda-time
+ joda-time
+ provided
+
+
+ com.google.inject
+ guice
+ provided
+
+
+ com.fasterxml.jackson.core
+ jackson-databind
+ provided
+
+
+ com.fasterxml.jackson.core
+ jackson-core
+ provided
+
+
+ com.google.inject.extensions
+ guice-multibindings
+ provided
+
+
+ commons-lang
+ commons-lang
+ provided
+
+
+ com.google.guava
+ guava
+ provided
+
+
+ javax.validation
+ validation-api
+ provided
+
+
+
+
+ junit
+ junit
+ test
+
+
+ com.google.protobuf
+ protobuf-java
+ test
+
+
+ org.apache.druid
+ druid-server
+ ${project.parent.version}
+ test
+
+
+ org.apache.druid
+ druid-processing
+ ${project.parent.version}
+ test-jar
+ test
+
+
+ org.apache.druid
+ druid-indexing-hadoop
+ ${project.parent.version}
+ test
+
+
+ org.apache.druid
+ druid-processing
+ ${project.parent.version}
+ test
+
+
+ org.apache.druid
+ druid-core
+ ${project.parent.version}
+ tests
+ test
+
+
+
+
+ hadoop2
+
+ true
+
+
+
+ org.apache.hadoop
+ hadoop-client
+ ${hadoop.compile.version}
+ runtime
+
+
+ org.apache.avro
+ avro
+
+
+ commons-cli
+ commons-cli
+
+
+ log4j
+ log4j
+
+
+ commons-codec
+ commons-codec
+
+
+ commons-logging
+ commons-logging
+
+
+ commons-io
+ commons-io
+
+
+ commons-lang
+ commons-lang
+
+
+ org.apache.httpcomponents
+ httpclient
+
+
+ org.apache.httpcomponents
+ httpcore
+
+
+ org.apache.zookeeper
+ zookeeper
+
+
+ org.slf4j
+ slf4j-api
+
+
+ org.slf4j
+ slf4j-log4j12
+
+
+ javax.ws.rs
+ jsr311-api
+
+
+ com.google.code.findbugs
+ jsr305
+
+
+ org.mortbay.jetty
+ jetty-util
+
+
+ org.apache.hadoop
+ hadoop-annotations
+
+
+ javax.activation
+ activation
+
+
+ com.google.protobuf
+ protobuf-java
+
+
+ com.sun.jersey
+ jersey-core
+
+
+ org.apache.curator
+ curator-client
+
+
+ org.apache.curator
+ curator-framework
+
+
+ org.apache.curator
+ curator-recipes
+
+
+ org.apache.commons
+ commons-math3
+
+
+ com.google.guava
+ guava
+
+
+
+ commons-beanutils
+ commons-beanutils-core
+
+
org.apache.hadoop
hadoop-common
- ${hadoop.compile.version}
compile
@@ -259,161 +376,97 @@
org.apache.hadoop
- hadoop-aws
+ hadoop-mapreduce-client-core
+ compile
+
+
+ javax.servlet
+ servlet-api
+
+
+ com.fasterxml.jackson.core
+ jackson-core
+
+
+ com.fasterxml.jackson.core
+ jackson-databind
+
+
+ com.google.inject.extensions
+ guice-servlet
+
+
+ com.google.protobuf
+ protobuf-java
+
+
+ io.netty
+ netty
+
+
+ log4j
+ log4j
+
+
+ org.apache.avro
+ avro
+
+
+ org.apache.hadoop
+ hadoop-annotations
+
+
+ org.slf4j
+ slf4j-api
+
+
+ org.slf4j
+ slf4j-log4j12
+
+
+
+
+ org.apache.hadoop
+ hadoop-hdfs-client
+ runtime
+
+
+
+
+
+ hadoop3
+
+
+ hadoop3.enabled
+ true
+
+
+
+
+ org.apache.hadoop
+ hadoop-client-api
+ ${hadoop.compile.version}
+ compile
+
+
+ org.apache.hadoop
+ hadoop-client-runtime
${hadoop.compile.version}
runtime
- commons-io
- commons-io
- provided
-
-
- com.google.code.findbugs
- jsr305
- provided
-
-
- com.fasterxml.jackson.core
- jackson-annotations
- provided
-
-
- joda-time
- joda-time
- provided
-
-
- org.apache.hadoop
- hadoop-mapreduce-client-core
- compile
-
-
- com.fasterxml.jackson.core
- jackson-core
-
-
- com.fasterxml.jackson.core
- jackson-databind
-
-
- com.google.inject.extensions
- guice-servlet
-
-
- com.google.protobuf
- protobuf-java
-
-
- io.netty
- netty
-
-
- log4j
- log4j
-
-
- org.apache.avro
- avro
-
-
- org.apache.hadoop
- hadoop-annotations
-
-
- org.slf4j
- slf4j-api
-
-
- org.slf4j
- slf4j-log4j12
-
-
-
-
- com.google.inject
- guice
- provided
-
-
- com.fasterxml.jackson.core
- jackson-databind
- provided
-
-
- org.apache.hadoop
- hadoop-hdfs-client
- runtime
-
-
- com.fasterxml.jackson.core
- jackson-core
- provided
-
-
- com.google.inject.extensions
- guice-multibindings
- provided
-
-
- commons-lang
- commons-lang
- provided
-
-
- com.google.guava
- guava
- provided
-
-
- javax.validation
- validation-api
- provided
-
-
-
-
- junit
- junit
+ org.apache.hadoop
+ hadoop-client-minicluster
+ ${hadoop.compile.version}
test
- com.google.protobuf
- protobuf-java
+ log4j
+ log4j
+ 1.2.17
test
-
- org.apache.druid
- druid-server
- ${project.parent.version}
- test
-
-
- org.apache.druid
- druid-processing
- ${project.parent.version}
- test-jar
- test
-
-
- org.apache.druid
- druid-indexing-hadoop
- ${project.parent.version}
- test
-
-
- org.apache.druid
- druid-processing
- ${project.parent.version}
- test
-
-
- org.apache.druid
- druid-core
- ${project.parent.version}
- tests
- test
-
-
-
+
+
+
diff --git a/extensions-core/orc-extensions/pom.xml b/extensions-core/orc-extensions/pom.xml
index 4e363ef7f0b..9f929390afd 100644
--- a/extensions-core/orc-extensions/pom.xml
+++ b/extensions-core/orc-extensions/pom.xml
@@ -159,249 +159,6 @@
jackson-annotations
provided
-
- org.apache.hadoop
- hadoop-mapreduce-client-core
- compile
-
-
- aopalliance
- aopalliance
-
-
- org.apache.avro
- avro
-
-
- org.apache.commons
- commons-compress
-
-
- com.google.guava
- guava
-
-
- com.google.inject
- guice
-
-
- com.google.inject.extensions
- guice-servlet
-
-
- com.fasterxml.jackson.core
- jackson-annotations
-
-
- com.fasterxml.jackson.core
- jackson-core
-
-
- com.fasterxml.jackson.core
- jackson-databind
-
-
- javax.inject
- javax
-
-
- io.netty
- netty
-
-
- org.slf4j
- slf4j-log4j12
-
-
- org.slf4j
- slf4j-api
-
-
- com.google.protobuf
- protobuf-java
-
-
-
-
- org.apache.hadoop
- hadoop-hdfs-client
- runtime
-
-
-
- org.apache.hadoop
- hadoop-common
- compile
-
-
- org.apache.yetus
- audience-annotations
-
-
- org.apache.directory.server
- apacheds-kerberos-codec
-
-
- org.apache.avro
- avro
-
-
- commons-beanutils
- commons-beanutils-core
-
-
- commons-cli
- commons-cli
-
-
- commons-codec
- commons-codec
-
-
- org.apache.commons
- commons-compress
-
-
- commons-io
- commons-io
-
-
- commons-lang
- commons-lang
-
-
- commons-collections
- commons-collections
-
-
- commons-logging
- commons-logging
-
-
- org.apache.commons
- commons-math3
-
-
- commons-net
- commons-net
-
-
- org.apache.curator
- curator-client
-
-
- org.apache.curator
- curator-recipes
-
-
- org.apache.curator
- curator-framework
-
-
- com.google.code.gson
- gson
-
-
- com.google.guava
- guava
-
-
- org.apache.httpcomponents
- httpclient
-
-
- org.apache.httpcomponents
- httpcore
-
-
- com.fasterxml.jackson.core
- jackson-annotations
-
-
- com.fasterxml.jackson.core
- jackson-core
-
-
- com.fasterxml.jackson.core
- jackson-databind
-
-
- com.sun.jersey
- jersey-core
-
-
- com.sun.jersey
- jersey-server
-
-
- com.sun.jersey
- jersey-json
-
-
- org.mortbay.jetty
- jetty-util
-
-
- org.mortbay.jetty
- jetty-sslengine
-
-
- org.mortbay.jetty
- jetty
-
-
- net.java.dev.jets3t
- jets3t
-
-
- com.google.code.findbugs
- jsr305
-
-
- javax.ws.rs
- jsr311-api
-
-
- javax.servlet.jsp
- jsp-api
-
-
- com.jcraft
- jsch
-
-
- log4j
- log4j
-
-
- org.slf4j
- slf4j-api
-
-
- org.slf4j
- slf4j-log4j12
-
-
- xmlenc
- xmlenc
-
-
- org.apache.zookeeper
- zookeeper
-
-
- com.nimbusds
- nimbus-jose-jwt
-
-
-
com.google.inject
guice
@@ -465,4 +222,290 @@
test
+
+
+ hadoop2
+
+ true
+
+
+
+ org.apache.hadoop
+ hadoop-mapreduce-client-core
+ compile
+
+
+ aopalliance
+ aopalliance
+
+
+ org.apache.avro
+ avro
+
+
+ org.apache.commons
+ commons-compress
+
+
+ com.google.guava
+ guava
+
+
+ com.google.inject
+ guice
+
+
+ javax.servlet
+ servlet-api
+
+
+ com.google.inject.extensions
+ guice-servlet
+
+
+ com.fasterxml.jackson.core
+ jackson-annotations
+
+
+ com.fasterxml.jackson.core
+ jackson-core
+
+
+ com.fasterxml.jackson.core
+ jackson-databind
+
+
+ javax.inject
+ javax
+
+
+ io.netty
+ netty
+
+
+ org.slf4j
+ slf4j-log4j12
+
+
+ org.slf4j
+ slf4j-api
+
+
+ com.google.protobuf
+ protobuf-java
+
+
+
+
+ org.apache.hadoop
+ hadoop-hdfs-client
+ runtime
+
+
+
+ org.apache.hadoop
+ hadoop-common
+ ${hadoop.compile.version}
+ compile
+
+
+ org.apache.yetus
+ audience-annotations
+
+
+ org.apache.directory.server
+ apacheds-kerberos-codec
+
+
+ org.apache.avro
+ avro
+
+
+ commons-beanutils
+ commons-beanutils-core
+
+
+ commons-cli
+ commons-cli
+
+
+ commons-codec
+ commons-codec
+
+
+ org.apache.commons
+ commons-compress
+
+
+ commons-io
+ commons-io
+
+
+ commons-lang
+ commons-lang
+
+
+ commons-collections
+ commons-collections
+
+
+ commons-logging
+ commons-logging
+
+
+ org.apache.commons
+ commons-math3
+
+
+ commons-net
+ commons-net
+
+
+ org.apache.curator
+ curator-client
+
+
+ org.apache.curator
+ curator-recipes
+
+
+ org.apache.curator
+ curator-framework
+
+
+ com.google.code.gson
+ gson
+
+
+ com.google.guava
+ guava
+
+
+ org.apache.httpcomponents
+ httpclient
+
+
+ org.apache.httpcomponents
+ httpcore
+
+
+ com.fasterxml.jackson.core
+ jackson-annotations
+
+
+ com.fasterxml.jackson.core
+ jackson-core
+
+
+ com.fasterxml.jackson.core
+ jackson-databind
+
+
+ com.sun.jersey
+ jersey-core
+
+
+ com.sun.jersey
+ jersey-server
+
+
+ com.sun.jersey
+ jersey-json
+
+
+ org.mortbay.jetty
+ jetty-util
+
+
+ org.mortbay.jetty
+ jetty-sslengine
+
+
+ org.mortbay.jetty
+ jetty
+
+
+ net.java.dev.jets3t
+ jets3t
+
+
+ com.google.code.findbugs
+ jsr305
+
+
+ javax.ws.rs
+ jsr311-api
+
+
+ javax.servlet.jsp
+ jsp-api
+
+
+ com.jcraft
+ jsch
+
+
+ log4j
+ log4j
+
+
+ org.slf4j
+ slf4j-api
+
+
+ org.slf4j
+ slf4j-log4j12
+
+
+ xmlenc
+ xmlenc
+
+
+ org.apache.zookeeper
+ zookeeper
+
+
+ com.nimbusds
+ nimbus-jose-jwt
+
+
+
+
+
+
+ hadoop3
+
+
+ hadoop3.enabled
+ true
+
+
+
+
+ org.apache.hadoop
+ hadoop-client-api
+ ${hadoop.compile.version}
+ compile
+
+
+ com.google.protobuf
+ protobuf-java
+ test
+
+
+ org.apache.hadoop
+ hadoop-client-runtime
+ ${hadoop.compile.version}
+ test
+
+
+
+
diff --git a/extensions-core/parquet-extensions/pom.xml b/extensions-core/parquet-extensions/pom.xml
index c2bbcf05851..66ef705906f 100644
--- a/extensions-core/parquet-extensions/pom.xml
+++ b/extensions-core/parquet-extensions/pom.xml
@@ -115,248 +115,6 @@
${project.parent.version}
provided
-
-
- org.apache.hadoop
- hadoop-hdfs-client
- runtime
-
-
-
- org.apache.hadoop
- hadoop-mapreduce-client-core
- ${hadoop.compile.version}
- compile
-
-
- aopalliance
- aopalliance
-
-
- org.apache.commons
- commons-compress
-
-
- com.google.guava
- guava
-
-
- com.google.inject
- guice
-
-
- com.google.inject.extensions
- guice-servlet
-
-
- com.fasterxml.jackson.core
- jackson-annotations
-
-
- com.fasterxml.jackson.core
- jackson-core
-
-
- com.fasterxml.jackson.core
- jackson-databind
-
-
- javax.inject
- javax
-
-
- io.netty
- netty
-
-
- slf4j-log4j12
- org.slf4j
-
-
- org.slf4j
- slf4j-api
-
-
- protobuf-java
- com.google.protobuf
-
-
-
-
- org.apache.hadoop
- hadoop-common
- ${hadoop.compile.version}
- compile
-
-
- org.apache.yetus
- audience-annotations
-
-
- commons-codec
- commons-codec
-
-
- org.apache.commons
- commons-compress
-
-
- commons-io
- commons-io
-
-
- commons-lang
- commons-lang
-
-
- org.apache.commons
- commons-math3
-
-
- commons-net
- commons-net
-
-
- org.apache.curator
- curator-client
-
-
- org.apache.curator
- curator-framework
-
-
- org.apache.curator
- curator-recipes
-
-
- com.google.guava
- guava
-
-
- com.fasterxml.jackson.core
- jackson-annotations
-
-
- com.fasterxml.jackson.core
- jackson-core
-
-
- com.fasterxml.jackson.core
- jackson-databind
-
-
- com.sun.jersey
- jersey-core
-
-
- com.sun.jersey
- jersey-server
-
-
- javax.servlet.jsp
- jsp-api
-
-
- com.google.code.findbugs
- jsr305
-
-
- javax.ws.rs
- jsr311-api
-
-
- org.apache.zookeeper
- zookeeper
-
-
- org.slf4j
- slf4j-api
-
-
- org.slf4j
- slf4j-log4j12
-
-
- com.sun.jersey
- jersey-json
-
-
- log4j
- log4j
-
-
- org.mortbay.jetty
- jetty-sslengine
-
-
- org.mortbay.jetty
- jetty-util
-
-
- net.java.dev.jets3t
- jets3t
-
-
- org.mortbay.jetty
- jetty
-
-
- com.google.code.gson
- gson
-
-
- xmlenc
- xmlenc
-
-
- org.apache.httpcomponents
- httpclient
-
-
- com.jcraft
- jsch
-
-
- com.google.protobuf
- protobuf-java
-
-
- commons-collections
- commons-collections
-
-
- commons-logging
- commons-logging
-
-
- commons-cli
- commons-cli
-
-
- commons-digester
- commons-digester
-
-
- commons-beanutils
- commons-beanutils-core
-
-
- org.apache.directory.server
- apacheds-kerberos-codec
-
-
- com.nimbusds
- nimbus-jose-jwt
-
-
-
com.google.code.findbugs
jsr305
@@ -408,5 +166,277 @@
test
-
+
+
+ hadoop2
+
+ true
+
+
+
+
+ org.apache.hadoop
+ hadoop-hdfs-client
+ runtime
+
+
+
+ org.apache.hadoop
+ hadoop-mapreduce-client-core
+ compile
+
+
+ aopalliance
+ aopalliance
+
+
+ org.apache.commons
+ commons-compress
+
+
+ com.google.guava
+ guava
+
+
+ com.google.inject
+ guice
+
+
+ com.google.inject.extensions
+ guice-servlet
+
+
+ com.fasterxml.jackson.core
+ jackson-annotations
+
+
+ com.fasterxml.jackson.core
+ jackson-core
+
+
+ com.fasterxml.jackson.core
+ jackson-databind
+
+
+ javax.inject
+ javax
+
+
+ io.netty
+ netty
+
+
+ slf4j-log4j12
+ org.slf4j
+
+
+ org.slf4j
+ slf4j-api
+
+
+ protobuf-java
+ com.google.protobuf
+
+
+
+
+ org.apache.hadoop
+ hadoop-common
+ compile
+
+
+ org.apache.yetus
+ audience-annotations
+
+
+ commons-codec
+ commons-codec
+
+
+ org.apache.commons
+ commons-compress
+
+
+ commons-io
+ commons-io
+
+
+ commons-lang
+ commons-lang
+
+
+ org.apache.commons
+ commons-math3
+
+
+ commons-net
+ commons-net
+
+
+ org.apache.curator
+ curator-client
+
+
+ org.apache.curator
+ curator-framework
+
+
+ org.apache.curator
+ curator-recipes
+
+
+ com.google.guava
+ guava
+
+
+ com.fasterxml.jackson.core
+ jackson-annotations
+
+
+ com.fasterxml.jackson.core
+ jackson-core
+
+
+ com.fasterxml.jackson.core
+ jackson-databind
+
+
+ com.sun.jersey
+ jersey-core
+
+
+ com.sun.jersey
+ jersey-server
+
+
+ javax.servlet.jsp
+ jsp-api
+
+
+ com.google.code.findbugs
+ jsr305
+
+
+ javax.ws.rs
+ jsr311-api
+
+
+ org.apache.zookeeper
+ zookeeper
+
+
+ org.slf4j
+ slf4j-api
+
+
+ org.slf4j
+ slf4j-log4j12
+
+
+ com.sun.jersey
+ jersey-json
+
+
+ log4j
+ log4j
+
+
+ org.mortbay.jetty
+ jetty-sslengine
+
+
+ org.mortbay.jetty
+ jetty-util
+
+
+ net.java.dev.jets3t
+ jets3t
+
+
+ org.mortbay.jetty
+ jetty
+
+
+ com.google.code.gson
+ gson
+
+
+ xmlenc
+ xmlenc
+
+
+ org.apache.httpcomponents
+ httpclient
+
+
+ com.jcraft
+ jsch
+
+
+ com.google.protobuf
+ protobuf-java
+
+
+ commons-collections
+ commons-collections
+
+
+ commons-logging
+ commons-logging
+
+
+ commons-cli
+ commons-cli
+
+
+ commons-digester
+ commons-digester
+
+
+ commons-beanutils
+ commons-beanutils-core
+
+
+ org.apache.directory.server
+ apacheds-kerberos-codec
+
+
+ com.nimbusds
+ nimbus-jose-jwt
+
+
+
+
+
+
+ hadoop3
+
+
+ hadoop3.enabled
+ true
+
+
+
+
+ org.apache.hadoop
+ hadoop-client-api
+ ${hadoop.compile.version}
+ provided
+
+
+ org.apache.hadoop
+ hadoop-client-runtime
+ ${hadoop.compile.version}
+ test
+
+
+
+
diff --git a/indexing-hadoop/pom.xml b/indexing-hadoop/pom.xml
index 77f3aed0e36..56168474e28 100644
--- a/indexing-hadoop/pom.xml
+++ b/indexing-hadoop/pom.xml
@@ -67,11 +67,6 @@
com.google.guava
guava
-
- org.apache.hadoop
- hadoop-client
- provided
-
com.fasterxml.jackson.core
jackson-core
@@ -84,16 +79,6 @@
com.google.code.findbugs
jsr305
-
- org.apache.hadoop
- hadoop-common
- provided
-
-
- org.apache.hadoop
- hadoop-mapreduce-client-core
- provided
-
joda-time
joda-time
@@ -141,13 +126,6 @@
hamcrest-all
test
-
- org.apache.hadoop
- hadoop-common
- ${hadoop.compile.version}
- tests
- test
-
org.apache.druid
druid-core
@@ -200,7 +178,98 @@
test
+
+
+ hadoop2
+
+ true
+
+
+
+ org.apache.hadoop
+ hadoop-client
+ ${hadoop.compile.version}
+ provided
+
+
+ org.apache.avro
+ avro
+
+
+ javax.servlet
+ servlet-api
+
+
+
+
+
+ org.apache.hadoop
+ hadoop-common
+ provided
+
+
+ javax.servlet
+ servlet-api
+
+
+
+
+ org.apache.hadoop
+ hadoop-mapreduce-client-core
+ provided
+
+
+ javax.servlet
+ servlet-api
+
+
+
+
+ org.apache.hadoop
+ hadoop-common
+ ${hadoop.compile.version}
+ tests
+ test
+
+
+
+
+ hadoop3
+
+
+ hadoop3.enabled
+ true
+
+
+
+
+ org.apache.hadoop
+ hadoop-client-api
+ ${hadoop.compile.version}
+ provided
+
+
+ org.apache.hadoop
+ hadoop-client-runtime
+ ${hadoop.compile.version}
+ provided
+
+
+ org.apache.hadoop
+ hadoop-client-minicluster
+ ${hadoop.compile.version}
+ test
+
+
+ log4j
+ log4j
+ 1.2.17
+ test
+
+
+
+
diff --git a/indexing-service/pom.xml b/indexing-service/pom.xml
index 59f1e45b0bb..42c6baaf78f 100644
--- a/indexing-service/pom.xml
+++ b/indexing-service/pom.xml
@@ -42,11 +42,6 @@
druid-server
${project.parent.version}
-
- org.apache.hadoop
- hadoop-client
- provided
-
org.apache.druid
druid-indexing-hadoop
@@ -114,16 +109,6 @@
javax.inject
javax.inject
-
- org.apache.hadoop
- hadoop-mapreduce-client-core
- provided
-
-
- org.apache.hadoop
- hadoop-common
- provided
-
io.netty
netty
@@ -297,6 +282,75 @@
+
+
+ hadoop2
+
+ true
+
+
+
+ org.apache.hadoop
+ hadoop-mapreduce-client-core
+ provided
+
+
+ javax.servlet
+ servlet-api
+
+
+
+
+ org.apache.hadoop
+ hadoop-common
+ provided
+
+
+ org.apache.hadoop
+ hadoop-client
+ ${hadoop.compile.version}
+ provided
+
+
+ org.apache.avro
+ avro
+
+
+
+
+ org.apache.hadoop
+ hadoop-yarn-common
+ provided
+
+
+
+ org.apache.hadoop:hadoop-client:${hadoop.compile.version}
+
+
+
+ hadoop3
+
+
+ hadoop3.enabled
+ true
+
+
+
+
+ org.apache.hadoop
+ hadoop-client-api
+ ${hadoop.compile.version}
+ provided
+
+
+
+
+ org.apache.hadoop:hadoop-client-api:${hadoop.compile.version},org.apache.hadoop:hadoop-client-runtime:${hadoop.compile.version}
+
+
+
+
+
@@ -310,6 +364,20 @@
+
+ maven-resources-plugin
+ org.apache.maven.plugins
+
+ ${project.build.outputDirectory}
+
+
+ src/main/resources
+ hadoop.indexer.libs.version
+ true
+
+
+
+
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/config/TaskConfig.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/config/TaskConfig.java
index a41488642c4..184e7a9425c 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/config/TaskConfig.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/config/TaskConfig.java
@@ -22,13 +22,17 @@ package org.apache.druid.indexing.common.config;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Lists;
+import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.EnumUtils;
+import org.apache.druid.java.util.common.ISE;
import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.segment.loading.StorageLocationConfig;
import org.joda.time.Period;
import javax.annotation.Nullable;
import java.io.File;
+import java.nio.charset.StandardCharsets;
import java.nio.file.Paths;
import java.util.Collections;
import java.util.List;
@@ -42,10 +46,23 @@ import java.util.List;
public class TaskConfig
{
private static final Logger log = new Logger(TaskConfig.class);
+ private static final String HADOOP_LIB_VERSIONS = "hadoop.indexer.libs.version";
+ public static final List DEFAULT_DEFAULT_HADOOP_COORDINATES;
- public static final List DEFAULT_DEFAULT_HADOOP_COORDINATES = ImmutableList.of(
- "org.apache.hadoop:hadoop-client:2.8.5"
- );
+ static {
+ try {
+ DEFAULT_DEFAULT_HADOOP_COORDINATES =
+ ImmutableList.copyOf(Lists.newArrayList(IOUtils.toString(
+ TaskConfig.class.getResourceAsStream("/"
+ + HADOOP_LIB_VERSIONS),
+ StandardCharsets.UTF_8
+ ).split(",")));
+
+ }
+ catch (Exception e) {
+ throw new ISE(e, "Unable to read file %s from classpath ", HADOOP_LIB_VERSIONS);
+ }
+ }
// This enum controls processing mode of batch ingestion "segment creation" phase (i.e. appenderator logic)
public enum BatchProcessingMode
diff --git a/indexing-service/src/main/resources/hadoop.indexer.libs.version b/indexing-service/src/main/resources/hadoop.indexer.libs.version
new file mode 100644
index 00000000000..61e072272ac
--- /dev/null
+++ b/indexing-service/src/main/resources/hadoop.indexer.libs.version
@@ -0,0 +1 @@
+${hadoop-task-libs}
\ No newline at end of file
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/HadoopTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/HadoopTaskTest.java
index 47091696aa1..c41ee058d31 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/HadoopTaskTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/HadoopTaskTest.java
@@ -30,7 +30,6 @@ import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.common.granularity.Granularity;
import org.apache.druid.timeline.DataSegment;
import org.apache.druid.utils.JvmUtils;
-import org.apache.hadoop.util.ApplicationClassLoader;
import org.easymock.EasyMock;
import org.joda.time.Interval;
import org.junit.Assert;
@@ -146,7 +145,7 @@ public class HadoopTaskTest
// This is a check against the current HadoopTask which creates a single URLClassLoader with null parent
Assert.assertNull(classLoader.getParent());
}
- Assert.assertFalse(classLoader instanceof ApplicationClassLoader);
+ Assert.assertFalse(classLoader.getClass().getSimpleName().equals("ApplicationClassLoader"));
Assert.assertTrue(classLoader instanceof URLClassLoader);
final ClassLoader appLoader = HadoopDruidIndexerConfig.class.getClassLoader();
diff --git a/integration-tests/.gitignore b/integration-tests/.gitignore
index aa0b0953609..e11916a44f2 100644
--- a/integration-tests/.gitignore
+++ b/integration-tests/.gitignore
@@ -3,4 +3,5 @@ docker/docker_ip
docker/tls/root.key
docker/tls/root.pem
docker/tls/untrusted_root.key
-docker/tls/untrusted_root.pem
\ No newline at end of file
+docker/tls/untrusted_root.pem
+gen-scripts/
\ No newline at end of file
diff --git a/integration-tests/build_run_cluster.sh b/integration-tests/build_run_cluster.sh
index 97176e5ae86..928fdfc0b78 100755
--- a/integration-tests/build_run_cluster.sh
+++ b/integration-tests/build_run_cluster.sh
@@ -20,6 +20,12 @@ echo $DRUID_INTEGRATION_TEST_OVERRIDE_CONFIG_PATH
export DIR=$(cd $(dirname $0) && pwd)
export HADOOP_DOCKER_DIR=$DIR/../examples/quickstart/tutorial/hadoop/docker
+
+if [ -n "${HADOOP_VERSION}" ] && [ ${HADOOP_VERSION:0:1)} == "3" ]; then
+ export HADOOP_DOCKER_DIR=$DIR/../examples/quickstart/tutorial/hadoop3/docker
+fi
+
+
export DOCKERDIR=$DIR/docker
export SHARED_DIR=${HOME}/shared
@@ -27,7 +33,7 @@ export SHARED_DIR=${HOME}/shared
echo ${DOCKER_IP:=127.0.0.1} > $DOCKERDIR/docker_ip
if !($DRUID_INTEGRATION_TEST_SKIP_BUILD_DOCKER); then
- bash ./script/copy_resources.sh
+ bash ./gen-scripts/copy_resources.sh
bash ./script/docker_build_containers.sh
fi
diff --git a/integration-tests/docker/docker-compose.base.yml b/integration-tests/docker/docker-compose.base.yml
index 11d7962d993..82870d02f02 100644
--- a/integration-tests/docker/docker-compose.base.yml
+++ b/integration-tests/docker/docker-compose.base.yml
@@ -317,8 +317,10 @@ services:
- ./environment-configs/router-custom-check-tls
### optional supporting infra
+
druid-it-hadoop:
- image: druid-it/hadoop:2.8.5
+ ## Giving fake version
+ image: druid-it/hadoop:9.9.9
container_name: druid-it-hadoop
ports:
- 2049:2049
diff --git a/integration-tests/docker/environment-configs/override-examples/hadoop/azure_to_azure b/integration-tests/docker/environment-configs/override-examples/hadoop/azure_to_azure
index 6564b7d83c4..2382cd120c1 100644
--- a/integration-tests/docker/environment-configs/override-examples/hadoop/azure_to_azure
+++ b/integration-tests/docker/environment-configs/override-examples/hadoop/azure_to_azure
@@ -28,4 +28,13 @@ druid_azure_container=
druid_extensions_loadList=["druid-azure-extensions","druid-hdfs-storage"]
-druid_indexer_task_defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client:2.8.5", "org.apache.hadoop:hadoop-azure:2.8.5"]
+#
+# Please replace with corresponding libs
+# Sample hadoop 2 config
+# druid_indexer_task_defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client:2.8.5", "org.apache.hadoop:hadoop-aws:2.8.5"]
+#
+# Sample hadoop 3 config
+# druid_indexer_task_defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client-api:3.2.2", "org.apache.hadoop:hadoop-client-runtime:3.2.2", "org.apache.hadoop:hadoop-azure:3.2.2"]
+#
+
+druid_indexer_task_defaultHadoopCoordinates=
diff --git a/integration-tests/docker/environment-configs/override-examples/hadoop/azure_to_hdfs b/integration-tests/docker/environment-configs/override-examples/hadoop/azure_to_hdfs
index 26ce1343a59..bfc2552d07c 100644
--- a/integration-tests/docker/environment-configs/override-examples/hadoop/azure_to_hdfs
+++ b/integration-tests/docker/environment-configs/override-examples/hadoop/azure_to_hdfs
@@ -31,4 +31,13 @@ druid_azure_account=
druid_azure_key=
druid_azure_container=
-druid_indexer_task_defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client:2.8.5", "org.apache.hadoop:hadoop-azure:2.8.5"]
+#
+# Please replace with corresponding libs
+# Sample hadoop 2 config
+# druid_indexer_task_defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client:2.8.5", "org.apache.hadoop:hadoop-aws:2.8.5"]
+#
+# Sample hadoop 3 config
+# druid_indexer_task_defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client-api:3.2.2", "org.apache.hadoop:hadoop-client-runtime:3.2.2", "org.apache.hadoop:hadoop-azure:3.2.2"]
+#
+
+druid_indexer_task_defaultHadoopCoordinates=
diff --git a/integration-tests/docker/environment-configs/override-examples/hadoop/s3_to_hdfs b/integration-tests/docker/environment-configs/override-examples/hadoop/s3_to_hdfs
index b16500ad9d6..785e376595b 100644
--- a/integration-tests/docker/environment-configs/override-examples/hadoop/s3_to_hdfs
+++ b/integration-tests/docker/environment-configs/override-examples/hadoop/s3_to_hdfs
@@ -31,4 +31,13 @@ AWS_REGION=
druid_extensions_loadList=["druid-s3-extensions","druid-hdfs-storage"]
-druid_indexer_task_defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client:2.8.5", "org.apache.hadoop:hadoop-aws:2.8.5"]
+#
+# Please replace with corresponding libs
+# Sample hadoop 2 config
+# druid_indexer_task_defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client:2.8.5", "org.apache.hadoop:hadoop-aws:2.8.5"]
+#
+# Sample hadoop 3 config
+# druid_indexer_task_defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client-api:3.2.2", "org.apache.hadoop:hadoop-client-runtime:3.2.2", "org.apache.hadoop:hadoop-azure:3.2.2"]
+#
+
+druid_indexer_task_defaultHadoopCoordinates=
diff --git a/integration-tests/docker/environment-configs/override-examples/hadoop/s3_to_s3 b/integration-tests/docker/environment-configs/override-examples/hadoop/s3_to_s3
index 60dd85673a6..7daf16a63b1 100644
--- a/integration-tests/docker/environment-configs/override-examples/hadoop/s3_to_s3
+++ b/integration-tests/docker/environment-configs/override-examples/hadoop/s3_to_s3
@@ -31,5 +31,13 @@ druid_storage_baseKey=
AWS_REGION=
druid_extensions_loadList=["druid-s3-extensions","druid-hdfs-storage"]
+#
+# Please replace with corresponding libs
+# Sample hadoop 2 config
+# druid_indexer_task_defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client:2.8.5", "org.apache.hadoop:hadoop-aws:2.8.5"]
+#
+# Sample hadoop 3 config
+# druid_indexer_task_defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client-api:3.2.2", "org.apache.hadoop:hadoop-client-runtime:3.2.2", "org.apache.hadoop:hadoop-azure:3.2.2"]
+#
-druid_indexer_task_defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client:2.8.5", "org.apache.hadoop:hadoop-aws:2.8.5"]
+druid_indexer_task_defaultHadoopCoordinates=
diff --git a/integration-tests/pom.xml b/integration-tests/pom.xml
index ed6b90f5fb4..9985a022c91 100644
--- a/integration-tests/pom.xml
+++ b/integration-tests/pom.xml
@@ -38,6 +38,11 @@
+
+ "org.apache.hadoop:hadoop-client:${hadoop.compile.version}", "org.apache.hadoop:hadoop-azure:${hadoop.compile.version}"
+ org.apache.hadoop.fs.s3native.NativeS3FileSystem
+
+
com.amazonaws
@@ -155,6 +160,12 @@
druid-hdfs-storage
${project.parent.version}
runtime
+
+
+ com.amazonaws
+ aws-java-sdk-bundle
+
+
org.apache.druid.extensions
@@ -466,10 +477,88 @@
true
-
+
+ maven-resources-plugin
+ org.apache.maven.plugins
+
+ ${project.build.outputDirectory}
+
+
+ script
+ copy_resources_template.sh
+ true
+
+
+ src/test/resources/hadoop/
+ *template.json
+ true
+
+
+ src/test/resources
+ false
+
+
+ src/main/resources
+ false
+
+
+
+
+
+ com.coderplus.maven.plugins
+ copy-rename-maven-plugin
+ 1.0
+
+
+ copy-file-azure
+ process-resources
+
+ copy
+
+
+ ${project.build.outputDirectory}/wikipedia_hadoop_azure_input_index_task_template.json
+ ${project.build.outputDirectory}/wikipedia_hadoop_azure_input_index_task.sh
+
+
+
+ copy-file-s3
+ process-resources
+
+ copy
+
+
+
+ ${project.build.outputDirectory}/wikipedia_hadoop_s3_input_index_task_template.json
+
+
+ ${project.build.outputDirectory}/wikipedia_hadoop_s3_input_index_task.json
+
+
+
+
+ rename-file
+ process-resources
+
+ rename
+
+
+ ${project.build.outputDirectory}/copy_resources_template.sh
+ gen-scripts/copy_resources.sh
+
+
+
+
+
+
+ hadoop3
+
+ "org.apache.hadoop:hadoop-client-api:${hadoop.compile.version}", "org.apache.hadoop:hadoop-client-runtime:${hadoop.compile.version}", "org.apache.hadoop:hadoop-azure:${hadoop.compile.version}"
+ org.apache.hadoop.fs.s3a.S3AFileSystem
+
+
integration-tests
@@ -512,6 +601,7 @@
5.5.1
${apache.kafka.version}
${zookeeper.version}
+ ${hadoop.compile.version}
${project.basedir}/build_run_cluster.sh
diff --git a/integration-tests/script/copy_hadoop_resources.sh b/integration-tests/script/copy_hadoop_resources.sh
index ae378cfab3f..8a442c7c466 100755
--- a/integration-tests/script/copy_hadoop_resources.sh
+++ b/integration-tests/script/copy_hadoop_resources.sh
@@ -35,6 +35,10 @@ else
fi
set -e
+if [ -n "${HADOOP_VERSION}" ] && [ ${HADOOP_VERSION:0:1)} == "3" ]; then
+ docker exec -t druid-it-hadoop sh -c "./usr/local/hadoop/bin/hdfs dfs -mkdir -p /user/root"
+ docker exec -t druid-it-hadoop sh -c "./usr/local/hadoop/bin/hdfs dfs -put /usr/local/hadoop/etc/hadoop/ input"
+fi
# Setup hadoop druid dirs
echo "Setting up druid hadoop dirs"
diff --git a/integration-tests/script/copy_resources.sh b/integration-tests/script/copy_resources_template.sh
similarity index 76%
rename from integration-tests/script/copy_resources.sh
rename to integration-tests/script/copy_resources_template.sh
index 8e4d8f3a81d..87e06df21ea 100755
--- a/integration-tests/script/copy_resources.sh
+++ b/integration-tests/script/copy_resources_template.sh
@@ -75,8 +75,15 @@ cp $SHARED_DIR/docker/lib/druid-orc-extensions-* $SHARED_DIR/docker/extensions/d
# Pull Hadoop dependency if needed
if [ -n "$DRUID_INTEGRATION_TEST_START_HADOOP_DOCKER" ] && [ "$DRUID_INTEGRATION_TEST_START_HADOOP_DOCKER" == true ]
then
- java -cp "$SHARED_DIR/docker/lib/*" -Ddruid.extensions.hadoopDependenciesDir="$SHARED_DIR/hadoop-dependencies" org.apache.druid.cli.Main tools pull-deps -h org.apache.hadoop:hadoop-client:2.8.5 -h org.apache.hadoop:hadoop-aws:2.8.5 -h org.apache.hadoop:hadoop-azure:2.8.5
- curl https://storage.googleapis.com/hadoop-lib/gcs/gcs-connector-hadoop2-latest.jar --output $SHARED_DIR/docker/lib/gcs-connector-hadoop2-latest.jar
+ ## We put same version in both commands but as we have an if, correct code path will always be executed as this is generated script.
+ ## Remove if
+ if [ -n "${HADOOP_VERSION}" ] && [ ${HADOOP_VERSION:0:1)} == "3" ]; then
+ java -cp "$SHARED_DIR/docker/lib/*" -Ddruid.extensions.hadoopDependenciesDir="$SHARED_DIR/hadoop-dependencies" org.apache.druid.cli.Main tools pull-deps -h org.apache.hadoop:hadoop-client-api:${hadoop.compile.version} -h org.apache.hadoop:hadoop-client-runtime:${hadoop.compile.version} -h org.apache.hadoop:hadoop-aws:${hadoop.compile.version} -h org.apache.hadoop:hadoop-azure:${hadoop.compile.version}
+ curl https://storage.googleapis.com/hadoop-lib/gcs/gcs-connector-hadoop3-latest.jar --output $SHARED_DIR/docker/lib/gcs-connector-hadoop3-latest.jar
+ else
+ java -cp "$SHARED_DIR/docker/lib/*" -Ddruid.extensions.hadoopDependenciesDir="$SHARED_DIR/hadoop-dependencies" org.apache.druid.cli.Main tools pull-deps -h org.apache.hadoop:hadoop-client:${hadoop.compile.version} -h org.apache.hadoop:hadoop-aws:${hadoop.compile.version} -h org.apache.hadoop:hadoop-azure:${hadoop.compile.version}
+ curl https://storage.googleapis.com/hadoop-lib/gcs/gcs-connector-hadoop2-latest.jar --output $SHARED_DIR/docker/lib/gcs-connector-hadoop2-latest.jar
+ fi
fi
# one of the integration tests needs the wikiticker sample data
diff --git a/integration-tests/script/docker_build_containers.sh b/integration-tests/script/docker_build_containers.sh
index 50d27a21517..9d586150e93 100755
--- a/integration-tests/script/docker_build_containers.sh
+++ b/integration-tests/script/docker_build_containers.sh
@@ -48,5 +48,5 @@ fi
# Build Hadoop docker if needed
if [ -n "$DRUID_INTEGRATION_TEST_BUILD_HADOOP_DOCKER" ] && [ "$DRUID_INTEGRATION_TEST_BUILD_HADOOP_DOCKER" == true ]
then
- docker build -t druid-it/hadoop:2.8.5 --build-arg APACHE_ARCHIVE_MIRROR_HOST $HADOOP_DOCKER_DIR
+ docker build -t druid-it/hadoop:9.9.9 --build-arg APACHE_ARCHIVE_MIRROR_HOST $HADOOP_DOCKER_DIR
fi
diff --git a/integration-tests/src/test/resources/hadoop/batch_hadoop_indexer.json b/integration-tests/src/test/resources/hadoop/batch_hadoop_indexer.json
index a6710db6e27..6402ca4fcb2 100644
--- a/integration-tests/src/test/resources/hadoop/batch_hadoop_indexer.json
+++ b/integration-tests/src/test/resources/hadoop/batch_hadoop_indexer.json
@@ -65,18 +65,19 @@
},
"jobProperties": {
"fs.permissions.umask-mode": "022",
- "fs.default.name" : "hdfs://druid-it-hadoop:9000",
- "fs.defaultFS" : "hdfs://druid-it-hadoop:9000",
- "dfs.datanode.address" : "druid-it-hadoop",
- "dfs.client.use.datanode.hostname" : "true",
- "dfs.datanode.use.datanode.hostname" : "true",
- "yarn.resourcemanager.hostname" : "druid-it-hadoop",
- "yarn.nodemanager.vmem-check-enabled" : "false",
- "mapreduce.map.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8",
- "mapreduce.job.user.classpath.first" : "true",
- "mapreduce.reduce.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8",
- "mapreduce.map.memory.mb" : 1024,
- "mapreduce.reduce.memory.mb" : 1024
+ "fs.default.name": "hdfs://druid-it-hadoop:9000",
+ "fs.defaultFS": "hdfs://druid-it-hadoop:9000",
+ "dfs.datanode.address": "druid-it-hadoop",
+ "dfs.client.use.datanode.hostname": "true",
+ "dfs.datanode.use.datanode.hostname": "true",
+ "yarn.resourcemanager.hostname": "druid-it-hadoop",
+ "yarn.nodemanager.vmem-check-enabled": "false",
+ "mapreduce.job.classloader": "true",
+ "mapreduce.map.java.opts": "-Duser.timezone=UTC -Dfile.encoding=UTF-8",
+ "mapreduce.job.user.classpath.first": "true",
+ "mapreduce.reduce.java.opts": "-Duser.timezone=UTC -Dfile.encoding=UTF-8",
+ "mapreduce.map.memory.mb": 1024,
+ "mapreduce.reduce.memory.mb": 1024
},
"rowFlushBoundary": 10000
}
diff --git a/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_azure_input_index_task.json b/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_azure_input_index_task_template.json
similarity index 95%
rename from integration-tests/src/test/resources/hadoop/wikipedia_hadoop_azure_input_index_task.json
rename to integration-tests/src/test/resources/hadoop/wikipedia_hadoop_azure_input_index_task_template.json
index 60bd4954271..fb4318bf470 100644
--- a/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_azure_input_index_task.json
+++ b/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_azure_input_index_task_template.json
@@ -1,6 +1,6 @@
{
"type": "index_hadoop",
- "hadoopDependencyCoordinates" : ["org.apache.hadoop:hadoop-client:2.8.5", "org.apache.hadoop:hadoop-azure:2.8.5"],
+ "hadoopDependencyCoordinates" : [${hadoop.integ.libs}],
"spec": {
"dataSchema": {
"dataSource": "%%DATASOURCE%%",
@@ -94,7 +94,8 @@
"dfs.datanode.use.datanode.hostname" : "true",
"yarn.resourcemanager.hostname" : "druid-it-hadoop",
"yarn.nodemanager.vmem-check-enabled" : "false",
- "mapreduce.map.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8 -D",
+ "mapreduce.map.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8",
+ "mapreduce.job.classloader": "true",
"mapreduce.job.user.classpath.first" : "true",
"mapreduce.reduce.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8",
"mapreduce.map.memory.mb" : 1024,
diff --git a/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_gcs_input_index_task.json b/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_gcs_input_index_task.json
index 03a1292105a..8d792e06ca3 100644
--- a/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_gcs_input_index_task.json
+++ b/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_gcs_input_index_task.json
@@ -93,6 +93,7 @@
"dfs.datanode.use.datanode.hostname" : "true",
"yarn.resourcemanager.hostname" : "druid-it-hadoop",
"yarn.nodemanager.vmem-check-enabled" : "false",
+ "mapreduce.job.classloader": "true",
"mapreduce.map.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8 -D",
"mapreduce.job.user.classpath.first" : "true",
"mapreduce.reduce.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8",
diff --git a/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_index_task.json b/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_index_task.json
index 76600f57cf8..203b696da81 100644
--- a/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_index_task.json
+++ b/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_index_task.json
@@ -90,6 +90,7 @@
"dfs.datanode.use.datanode.hostname" : "true",
"yarn.resourcemanager.hostname" : "druid-it-hadoop",
"yarn.nodemanager.vmem-check-enabled" : "false",
+ "mapreduce.job.classloader": "true",
"mapreduce.map.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8",
"mapreduce.job.user.classpath.first" : "true",
"mapreduce.reduce.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8",
diff --git a/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_reindex_task.json b/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_reindex_task.json
index cf44540a6b4..64852b3f0a9 100644
--- a/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_reindex_task.json
+++ b/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_reindex_task.json
@@ -61,6 +61,7 @@
"fs.default.name" : "hdfs://druid-it-hadoop:9000",
"fs.defaultFS" : "hdfs://druid-it-hadoop:9000",
"dfs.datanode.address" : "druid-it-hadoop",
+ "mapreduce.job.classloader": "true",
"dfs.client.use.datanode.hostname" : "true",
"dfs.datanode.use.datanode.hostname" : "true",
"yarn.resourcemanager.hostname" : "druid-it-hadoop",
diff --git a/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_s3_input_index_task.json b/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_s3_input_index_task_template.json
similarity index 97%
rename from integration-tests/src/test/resources/hadoop/wikipedia_hadoop_s3_input_index_task.json
rename to integration-tests/src/test/resources/hadoop/wikipedia_hadoop_s3_input_index_task_template.json
index d18b6da1e4e..858dff1d2a2 100644
--- a/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_s3_input_index_task.json
+++ b/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_s3_input_index_task_template.json
@@ -94,13 +94,14 @@
"yarn.resourcemanager.hostname" : "druid-it-hadoop",
"yarn.nodemanager.vmem-check-enabled" : "false",
"mapreduce.map.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8 -Daws.region=%%AWS_REGION%%",
+ "mapreduce.job.classloader": "true",
"mapreduce.job.user.classpath.first" : "true",
"mapreduce.reduce.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8 -Daws.region=%%AWS_REGION%%",
"mapreduce.map.memory.mb" : 1024,
"mapreduce.reduce.memory.mb" : 1024,
"fs.s3.awsAccessKeyId" : "%%AWS_ACCESS_KEY%%",
"fs.s3.awsSecretAccessKey" : "%%AWS_SECRET_KEY%%",
- "fs.s3.impl" : "org.apache.hadoop.fs.s3native.NativeS3FileSystem",
+ "fs.s3.impl" : "${hadoop.s3.impl}",
"fs.s3n.awsAccessKeyId" : "%%AWS_ACCESS_KEY%%",
"fs.s3n.awsSecretAccessKey" : "%%AWS_SECRET_KEY%%",
"fs.s3n.impl" : "org.apache.hadoop.fs.s3native.NativeS3FileSystem",
diff --git a/licenses.yaml b/licenses.yaml
index 8312ebcd814..e26bfc8f8e8 100644
--- a/licenses.yaml
+++ b/licenses.yaml
@@ -630,6 +630,16 @@ notices:
---
+name: Apache Commons Configuration
+license_category: binary
+module: java-core
+license_name: Apache License version 2.0
+version: 2.1.1
+libraries:
+ - org.apache.commons: commons-configuration2
+
+---
+
name: Apache Commons DBCP
license_category: binary
module: java-core
@@ -709,6 +719,15 @@ notices:
---
+name: Apache Commons Text
+license_category: binary
+module: java-core
+license_name: Apache License version 2.0
+version: 1.4
+libraries:
+ - org.apache.commons: commons-text
+---
+
name: Airline
license_category: binary
module: java-core
@@ -1757,6 +1776,20 @@ notices:
---
+name: Apache HttpClient
+license_category: binary
+module: java-core
+license_name: Apache License version 2.0
+version: 4.5.13
+libraries:
+ - org.apache.httpcomponents: httpclient
+notices:
+ - httpclient: |
+ Apache HttpClient
+ Copyright 1999-2017 The Apache Software Foundation
+
+---
+
name: Apache HttpClient
license_category: binary
module: hadoop-client
@@ -1988,6 +2021,8 @@ libraries:
- org.eclipse.jetty: jetty-servlets
- org.eclipse.jetty: jetty-util
- org.eclipse.jetty: jetty-util-ajax
+ - org.eclipse.jetty: jetty-xml
+ - org.eclipse.jetty: jetty-webapp
notice: |
==============================================================
Jetty Web Container
@@ -2130,6 +2165,30 @@ libraries:
---
+name: Kerby
+license_category: binary
+module: java-core
+license_name: Apache License version 2.0
+version: 1.0.1
+libraries:
+ - org.apache.kerby: kerb-admin
+ - org.apache.kerby: kerb-client
+ - org.apache.kerby: kerb-common
+ - org.apache.kerby: kerb-core
+ - org.apache.kerby: kerb-crypto
+ - org.apache.kerby: kerb-identity
+ - org.apache.kerby: kerb-server
+ - org.apache.kerby: kerb-simplekdc
+ - org.apache.kerby: kerb-util
+ - org.apache.kerby: kerby-asn1
+ - org.apache.kerby: kerby-config
+ - org.apache.kerby: kerby-pkix
+ - org.apache.kerby: kerby-util
+ - org.apache.kerby: kerby-xdr
+ - org.apache.kerby: token-provider
+
+---
+
name: SIGAR
license_category: binary
module: java-core
@@ -2827,6 +2886,29 @@ notice: |
---
+name: Apache Hadoop
+license_category: binary
+module: hadoop-client
+license_name: Apache License version 2.0
+version: 3.3.1
+libraries:
+ - org.apache.hadoop: hadoop-annotations
+ - org.apache.hadoop: hadoop-auth
+ - org.apache.hadoop: hadoop-client
+ - org.apache.hadoop: hadoop-common
+ - org.apache.hadoop: hadoop-hdfs-client
+ - org.apache.hadoop: hadoop-mapreduce-client-app
+ - org.apache.hadoop: hadoop-mapreduce-client-common
+ - org.apache.hadoop: hadoop-mapreduce-client-core
+ - org.apache.hadoop: hadoop-mapreduce-client-jobclient
+ - org.apache.hadoop: hadoop-mapreduce-client-shuffle
+ - org.apache.hadoop: hadoop-yarn-api
+ - org.apache.hadoop: hadoop-yarn-client
+ - org.apache.hadoop: hadoop-yarn-common
+ - org.apache.hadoop: hadoop-yarn-server-common
+
+---
+
name: Gson
license_category: binary
module: hadoop-client
@@ -3596,6 +3678,36 @@ libraries:
---
+name: Jersey
+license_category: binary
+module: extensions/druid-avro-extensions
+license_name: CDDL 1.1
+version: 1.19.3
+libraries:
+ - com.sun.jersey: jersey-json
+
+---
+
+name: jaxb
+license_category: binary
+module: extensions/druid-avro-extensions
+license_name: CDDL 1.1
+version: 2.2.3-1
+libraries:
+ - com.sun.xml.bind: jaxb-impl
+
+---
+
+name: commons-cli
+license_category: binary
+module: extensions/druid-avro-extensions
+license_name: Apache License version 2.0
+version: 1.3.1
+libraries:
+ - commons-cli: commons-cli
+
+---
+
name: Apache Hive
license_category: binary
module: extensions/druid-bloom-filter
@@ -4773,6 +4885,16 @@ libraries:
---
+name: org.codehaus.woodstox stax2-api
+license_category: binary
+version: 4.2.1
+module: druid-ranger-security
+license_name: BSD-3-Clause License
+libraries:
+ - org.codehaus.woodstox: stax2-api
+
+---
+
name: org.codehaus.woodstox woodstox-core-asl
license_category: binary
version: 4.4.1
@@ -4869,6 +4991,38 @@ notices:
---
+name: Woodstox
+license_category: binary
+module: java-core
+license_name: Apache License version 2.0
+version: 5.3.0
+libraries:
+ - com.fasterxml.woodstox: woodstox-core
+
+---
+
+name: DNS Java
+license_category: binary
+module: java-core
+license_name: BSD 2-Clause license
+version: 2.1.7
+libraries:
+ - dnsjava: dnsjava
+
+---
+
+name: Hadoop shaded
+license_category: binary
+module: java-core
+license_name: Apache License version 2.0
+version: 1.1.1
+libraries:
+ - org.apache.hadoop.thirdparty: hadoop-shaded-protobuf_3_7
+ - org.apache.hadoop.thirdparty: hadoop-shaded-guava
+
+---
+
+
# Web console modules start
name: "@babel/runtime"
license_category: binary
diff --git a/owasp-dependency-check-suppressions.xml b/owasp-dependency-check-suppressions.xml
index b7da4e5a167..842ebb89a52 100644
--- a/owasp-dependency-check-suppressions.xml
+++ b/owasp-dependency-check-suppressions.xml
@@ -327,6 +327,14 @@
CVE-2018-11765
CVE-2020-9492
+
+
+ ^pkg:maven/org\.apache\.hadoop/hadoop\-.*@.*$
+ CVE-2018-11765
+ CVE-2020-9492
+
CVE-2021-26291
+
+
+
+ CVE-2020-13949
+
@@ -402,4 +417,49 @@
-->
CVE-2021-40531
+
+
+
+ CVE-2020-10740
+ CVE-2020-25644
+ CVE-2020-10718
+
+
+
+
+
+ CVE-2020-8570
+ CVE-2015-8559
+ CVE-2021-20291
+ CVE-2017-17485
+ CVE-2018-5968
+ CVE-2017-15095
+ CVE-2019-16942
+ CVE-2020-25649
+ CVE-2020-35491
+ CVE-2019-16943
+ CVE-2020-35490
+ CVE-2019-20330
+ CVE-2020-10673
+ CVE-2018-11307
+ CVE-2018-7489
+ CVE-2019-17267
+ CVE-2019-17531
+ CVE-2019-16335
+ CVE-2019-14893
+ CVE-2019-14540
+
+
+
+
+
+ CVE-2015-7430
+ CVE-2017-3162
+
diff --git a/pom.xml b/pom.xml
index ee60c3bbf3a..b02322a8ccd 100644
--- a/pom.xml
+++ b/pom.xml
@@ -112,6 +112,8 @@
1.12.37
2.8.0
0.8.7
+ 5.2.5.Final
+ 4.5.10
3.5.9
2.5.7
@@ -459,7 +461,7 @@
org.hibernate
hibernate-validator
- 5.2.5.Final
+ ${hibernate-validator.version}
javax.validation
@@ -727,60 +729,13 @@
org.apache.httpcomponents
httpclient
- 4.5.10
+ ${httpclient.version}
org.apache.httpcomponents
httpcore
4.4.11
-
- org.apache.hadoop
- hadoop-auth
- ${hadoop.compile.version}
-
-
- org.apache.hadoop
- hadoop-client
- ${hadoop.compile.version}
- provided
-
-
- org.apache.avro
- avro
-
-
-
-
- org.apache.hadoop
- hadoop-common
- ${hadoop.compile.version}
- provided
-
-
- org.apache.hadoop
- hadoop-hdfs-client
- ${hadoop.compile.version}
- provided
-
-
- org.apache.hadoop
- hadoop-yarn-common
- ${hadoop.compile.version}
- provided
-
-
- org.apache.hadoop
- hadoop-mapreduce-client-core
- ${hadoop.compile.version}
- provided
-
-
- javax.servlet
- servlet-api
-
-
-
org.mapdb
mapdb
@@ -1176,16 +1131,46 @@
test
- com.github.stefanbirkner
- system-rules
- 1.19.0
- test
-
-
- io.timeandspace
- cron-scheduler
- 0.1
+ com.github.stefanbirkner
+ system-rules
+ 1.19.0
+ test
+
+ io.timeandspace
+ cron-scheduler
+ 0.1
+
+
+ org.apache.hadoop
+ hadoop-common
+ ${hadoop.compile.version}
+ provided
+
+
+ org.apache.hadoop
+ hadoop-hdfs-client
+ ${hadoop.compile.version}
+ provided
+
+
+ org.apache.hadoop
+ hadoop-yarn-common
+ ${hadoop.compile.version}
+ provided
+
+
+ org.apache.hadoop
+ hadoop-mapreduce-client-core
+ ${hadoop.compile.version}
+ provided
+
+
+ javax.servlet
+ servlet-api
+
+
+
@@ -1558,11 +1543,6 @@
maven-deploy-plugin
2.7
-
- org.apache.maven.plugins
- maven-help-plugin
- 2.1.1
-
org.apache.maven.plugins
maven-install-plugin
@@ -1637,15 +1617,15 @@
${maven.compiler.target}
-
- com.github.eirslett
- frontend-maven-plugin
- 1.11.3
-
- ${node.version}
- ${npm.version}
-
-
+
+ com.github.eirslett
+ frontend-maven-plugin
+ 1.11.3
+
+ ${node.version}
+ ${npm.version}
+
+
@@ -1902,6 +1882,7 @@
.asf.yaml
**/dependency-reduced-pom.xml
.editorconfig
+ **/hadoop.indexer.libs.version
@@ -1963,5 +1944,19 @@
true
+
+ hadoop3
+
+
+ hadoop3.enabled
+ true
+
+
+
+ 3.3.1
+ 5.3.6.Final
+ 4.5.13
+
+
diff --git a/services/pom.xml b/services/pom.xml
index d23f5779dd2..50a2ac0f56b 100644
--- a/services/pom.xml
+++ b/services/pom.xml
@@ -52,6 +52,12 @@
org.apache.druid
druid-indexing-hadoop
${project.parent.version}
+
+
+ javax.servlet
+ servlet-api
+
+
org.apache.druid
diff --git a/services/src/main/java/org/apache/druid/cli/PullDependencies.java b/services/src/main/java/org/apache/druid/cli/PullDependencies.java
index f2ea7f1b684..bbb91f2930d 100644
--- a/services/src/main/java/org/apache/druid/cli/PullDependencies.java
+++ b/services/src/main/java/org/apache/druid/cli/PullDependencies.java
@@ -183,7 +183,7 @@ public class PullDependencies implements Runnable
@Option(
name = "--no-default-hadoop",
- description = "Don't pull down the default hadoop coordinate, i.e., org.apache.hadoop:hadoop-client:2.8.5. If `-h` option is supplied, then default hadoop coordinate will not be downloaded."
+ description = "Don't pull down the default hadoop coordinate, i.e., org.apache.hadoop:hadoop-client if hadoop2 or org.apache.hadoop:hadoop-client-runtime if hadoop3. If `-h` option is supplied, then default hadoop coordinate will not be downloaded."
)
public boolean noDefaultHadoop = false;
diff --git a/website/.spelling b/website/.spelling
index 9b298b702dc..f1324234ee9 100644
--- a/website/.spelling
+++ b/website/.spelling
@@ -510,6 +510,12 @@ Papache-release
Pdist
Ddruid.console.skip
yaml
+Phadoop3
+dist-hadoop3
+hadoop3
+hadoop2
+2.x.x
+3.x.x
- ../docs/development/extensions-contrib/ambari-metrics-emitter.md
ambari-metrics
metricName
@@ -1927,3 +1933,4 @@ PiB
protobuf
Golang
multiValueHandling
+