diff --git a/.travis.yml b/.travis.yml index bcf50159b79..de38a02fb58 100644 --- a/.travis.yml +++ b/.travis.yml @@ -139,6 +139,28 @@ jobs: " && false; } + - name: "analyze hadoop 3 dependencies" + script: |- + MAVEN_OPTS='-Xmx3000m' ${MVN} ${MAVEN_SKIP} dependency:analyze -DoutputXML=true -DignoreNonCompile=true -DfailOnWarning=true -Phadoop3 || { echo " + + The dependency analysis has found a dependency that is either: + + 1) Used and undeclared: These are available as a transitive dependency but should be explicitly + added to the POM to ensure the dependency version. The XML to add the dependencies to the POM is + shown above. + + 2) Unused and declared: These are not needed and removing them from the POM will speed up the build + and reduce the artifact size. The dependencies to remove are shown above. + + If there are false positive dependency analysis warnings, they can be suppressed: + https://maven.apache.org/plugins/maven-dependency-plugin/analyze-mojo.html#usedDependencies + https://maven.apache.org/plugins/maven-dependency-plugin/examples/exclude-dependencies-from-dependency-analysis.html + + For more information, refer to: + https://maven.apache.org/plugins/maven-dependency-plugin/analyze-mojo.html + + " && false; } + - name: "intellij inspections" script: > ./check_test_suite.py && travis_terminate 0 || docker run --rm diff --git a/distribution/bin/check-licenses.py b/distribution/bin/check-licenses.py index 50151b2a4a6..8af9e59226e 100755 --- a/distribution/bin/check-licenses.py +++ b/distribution/bin/check-licenses.py @@ -233,6 +233,7 @@ def build_compatible_license_names(): compatible_licenses['BSD-2-Clause License'] = 'BSD-2-Clause License' compatible_licenses['BSD-2-Clause'] = 'BSD-2-Clause License' + compatible_licenses['BSD 2-Clause license'] = 'BSD 2-Clause License' compatible_licenses['BSD-3-Clause License'] = 'BSD-3-Clause License' compatible_licenses['New BSD license'] = 'BSD-3-Clause License' diff --git a/distribution/pom.xml b/distribution/pom.xml index 6b43e321246..ad6fd650270 100644 --- a/distribution/pom.xml +++ b/distribution/pom.xml @@ -288,6 +288,186 @@ + + dist-hadoop3 + + false + + + + + org.codehaus.mojo + exec-maven-plugin + + + generate-readme + initialize + + exec + + + ${project.basedir}/bin/build-textfile-readme.sh + + ${project.basedir}/../ + ${project.parent.version} + + + + + generate-binary-license + initialize + + exec + + + ${project.basedir}/bin/generate-binary-license.py + + ${project.parent.basedir}/licenses/APACHE2 + ${project.parent.basedir}/licenses.yaml + ${project.parent.basedir}/LICENSE.BINARY + + + + + generate-binary-notice + initialize + + exec + + + ${project.basedir}/bin/generate-binary-notice.py + + ${project.parent.basedir}/NOTICE + ${project.parent.basedir}/licenses.yaml + ${project.parent.basedir}/NOTICE.BINARY + + + + + pull-deps + package + + exec + + + java + + -classpath + + -Ddruid.extensions.loadList=[] + -Ddruid.extensions.directory=${project.build.directory}/extensions + + + -Ddruid.extensions.hadoopDependenciesDir=${project.build.directory}/hadoop-dependencies + + -Dhadoop3.enabled=true + org.apache.druid.cli.Main + tools + pull-deps + --clean + --defaultVersion + ${project.parent.version} + -l + ${settings.localRepository} + -h + org.apache.hadoop:hadoop-client-api:${hadoop.compile.version} + -h + org.apache.hadoop:hadoop-client-runtime:${hadoop.compile.version} + -c + org.apache.druid.extensions:druid-avro-extensions + -c + org.apache.druid.extensions:druid-azure-extensions + -c + org.apache.druid.extensions:druid-bloom-filter + -c + org.apache.druid.extensions:druid-datasketches + -c + org.apache.druid.extensions:druid-hdfs-storage + -c + org.apache.druid.extensions:druid-histogram + -c + org.apache.druid.extensions:druid-kafka-extraction-namespace + -c + org.apache.druid.extensions:druid-kafka-indexing-service + -c + org.apache.druid.extensions:druid-kinesis-indexing-service + -c + org.apache.druid.extensions:druid-lookups-cached-global + -c + org.apache.druid.extensions:druid-lookups-cached-single + -c + org.apache.druid.extensions:druid-protobuf-extensions + -c + org.apache.druid.extensions:mysql-metadata-storage + -c + org.apache.druid.extensions:druid-orc-extensions + -c + org.apache.druid.extensions:druid-parquet-extensions + -c + org.apache.druid.extensions:postgresql-metadata-storage + -c + org.apache.druid.extensions:druid-kerberos + -c + org.apache.druid.extensions:druid-s3-extensions + -c + org.apache.druid.extensions:druid-aws-rds-extensions + -c + org.apache.druid.extensions:druid-ec2-extensions + -c + org.apache.druid.extensions:druid-google-extensions + -c + org.apache.druid.extensions:druid-stats + -c + org.apache.druid.extensions:simple-client-sslcontext + -c + org.apache.druid.extensions:druid-basic-security + -c + org.apache.druid.extensions:druid-pac4j + -c + org.apache.druid.extensions:druid-ranger-security + -c + org.apache.druid.extensions:druid-kubernetes-extensions + ${druid.distribution.pulldeps.opts} + + + + + + + org.apache.maven.plugins + maven-assembly-plugin + + + distro-assembly + package + + single + + + apache-druid-${project.parent.version} + posix + + src/assembly/assembly.xml + + + + + + + org.codehaus.mojo + license-maven-plugin + + + download-licenses + + download-licenses + + + + + + + apache-release diff --git a/docs/development/build.md b/docs/development/build.md index b59dddcbf67..1b2dc475fb2 100644 --- a/docs/development/build.md +++ b/docs/development/build.md @@ -71,6 +71,25 @@ Putting these together, if you wish to build the source and binary distributions ```bash mvn clean install -Papache-release,dist,rat -DskipTests ``` + +### Building hadoop 3 distribution + +By default, druid ships hadoop 2.x.x jars along with the distribution. Exact version can be found in the +main [pom](https://github.com/apache/druid/blob/master/pom.xml). To build druid with hadoop 3.x.x jars, hadoop3 profile +needs to be activated. + +To generate build with hadoop 3 dependencies, run: + +```bash +mvn clean install -Phadoop3 +``` + +To generate distribution with hadoop3 dependencies, run : + +```bash +mvn clean install -Papache-release,dist-hadoop3,rat,hadoop3 -DskipTests +``` + #### Potential issues ##### Missing `pyyaml` diff --git a/examples/quickstart/tutorial/hadoop3/docker/Dockerfile b/examples/quickstart/tutorial/hadoop3/docker/Dockerfile new file mode 100644 index 00000000000..482c72b698e --- /dev/null +++ b/examples/quickstart/tutorial/hadoop3/docker/Dockerfile @@ -0,0 +1,133 @@ +# Based on the SequenceIQ hadoop-docker project hosted at +# https://github.com/sequenceiq/hadoop-docker, and modified at +# the Apache Software Foundation (ASF). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Creates pseudo distributed hadoop 3.3.1 with java 8 +FROM centos:7 + +USER root + +# install dev tools +RUN yum clean all \ + && rpm --rebuilddb \ + && yum install -y curl which tar sudo openssh-server openssh-clients rsync yum-plugin-ovl\ + && yum clean all \ + && yum update -y libselinux \ + && yum update -y nss \ + && yum clean all +# update libselinux. see https://github.com/sequenceiq/hadoop-docker/issues/14 +# update nss. see https://unix.stackexchange.com/questions/280548/curl-doesnt-connect-to-https-while-wget-does-nss-error-12286 + +# passwordless ssh +RUN ssh-keygen -q -N "" -t dsa -f /etc/ssh/ssh_host_dsa_key +RUN ssh-keygen -q -N "" -t rsa -f /etc/ssh/ssh_host_rsa_key +RUN ssh-keygen -q -N "" -t rsa -f /root/.ssh/id_rsa +RUN cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys + +# zulu java 8 +COPY ../../hadoop/docker/setup-zulu-repo.sh /root/setup-zulu-repo.sh +RUN /root/setup-zulu-repo.sh +RUN yum install -y zulu-8 + +ENV JAVA_HOME /usr/lib/jvm/zulu-8 +ENV PATH $PATH:$JAVA_HOME/bin + +# hadoop +ARG APACHE_ARCHIVE_MIRROR_HOST=https://archive.apache.org +RUN curl -s ${APACHE_ARCHIVE_MIRROR_HOST}/dist/hadoop/core/hadoop-3.3.1/hadoop-3.3.1.tar.gz | tar -xz -C /usr/local/ +RUN cd /usr/local && ln -s ./hadoop-3.3.1 hadoop + +ENV HADOOP_HOME /usr/local/hadoop +ENV HADOOP_COMMON_HOME /usr/local/hadoop +ENV HADOOP_HDFS_HOME /usr/local/hadoop +ENV HADOOP_MAPRED_HOME /usr/local/hadoop +ENV HADOOP_YARN_HOME /usr/local/hadoop +ENV HADOOP_CONF_DIR /usr/local/hadoop/etc/hadoop +ENV YARN_CONF_DIR $HADOOP_HOME/etc/hadoop + +# in hadoop 3 the example file is nearly empty so we can just append stuff +RUN sed -i '$ a export JAVA_HOME=/usr/lib/jvm/zulu-8' $HADOOP_HOME/etc/hadoop/hadoop-env.sh +RUN sed -i '$ a export HADOOP_HOME=/usr/local/hadoop' $HADOOP_HOME/etc/hadoop/hadoop-env.sh +RUN sed -i '$ a export HADOOP_CONF_DIR=/usr/local/hadoop/etc/hadoop/' $HADOOP_HOME/etc/hadoop/hadoop-env.sh +RUN sed -i '$ a export HDFS_NAMENODE_USER=root' $HADOOP_HOME/etc/hadoop/hadoop-env.sh +RUN sed -i '$ a export HDFS_DATANODE_USER=root' $HADOOP_HOME/etc/hadoop/hadoop-env.sh +RUN sed -i '$ a export HDFS_SECONDARYNAMENODE_USER=root' $HADOOP_HOME/etc/hadoop/hadoop-env.sh +RUN sed -i '$ a export YARN_RESOURCEMANAGER_USER=root' $HADOOP_HOME/etc/hadoop/hadoop-env.sh +RUN sed -i '$ a export YARN_NODEMANAGER_USER=root' $HADOOP_HOME/etc/hadoop/hadoop-env.sh + +RUN cat $HADOOP_HOME/etc/hadoop/hadoop-env.sh + +RUN mkdir $HADOOP_HOME/input +RUN cp $HADOOP_HOME/etc/hadoop/*.xml $HADOOP_HOME/input + +# pseudo distributed +ADD ../../hadoop/docker/core-site.xml.template $HADOOP_HOME/etc/hadoop/core-site.xml.template +RUN sed s/HOSTNAME/localhost/ /usr/local/hadoop/etc/hadoop/core-site.xml.template > /usr/local/hadoop/etc/hadoop/core-site.xml +ADD ../../hadoop/docker/hdfs-site.xml $HADOOP_HOME/etc/hadoop/hdfs-site.xml +ADD ../../hadoop/docker/mapred-site.xml $HADOOP_HOME/etc/hadoop/mapred-site.xml +ADD ../../hadoop/docker/yarn-site.xml $HADOOP_HOME/etc/hadoop/yarn-site.xml + +RUN $HADOOP_HOME/bin/hdfs namenode -format + +ADD ../../hadoop/docker/ssh_config /root/.ssh/config +RUN chmod 600 /root/.ssh/config +RUN chown root:root /root/.ssh/config + +# # installing supervisord +# RUN yum install -y python-setuptools +# RUN easy_install pip +# RUN curl https://bitbucket.org/pypa/setuptools/raw/bootstrap/ez_setup.py -o - | python +# RUN pip install supervisor +# +# ADD supervisord.conf /etc/supervisord.conf + +ADD bootstrap.sh /etc/bootstrap.sh +RUN chown root:root /etc/bootstrap.sh +RUN chmod 700 /etc/bootstrap.sh + +ENV BOOTSTRAP /etc/bootstrap.sh + +# workingaround docker.io build error +RUN ls -la /usr/local/hadoop/etc/hadoop/*-env.sh +RUN chmod +x /usr/local/hadoop/etc/hadoop/*-env.sh +RUN ls -la /usr/local/hadoop/etc/hadoop/*-env.sh + +# Copy additional .jars to classpath +RUN cp /usr/local/hadoop/share/hadoop/tools/lib/*.jar /usr/local/hadoop/share/hadoop/common/lib/ + +# fix the 254 error code +RUN sed -i "/^[^#]*UsePAM/ s/.*/#&/" /etc/ssh/sshd_config +RUN echo "UsePAM no" >> /etc/ssh/sshd_config +RUN echo "Port 2122" >> /etc/ssh/sshd_config + +# script for plain sshd start +RUN echo -e \ + '#!/bin/bash\n/usr/sbin/sshd\ntimeout 10 bash -c "until printf \"\" 2>>/dev/null >>/dev/tcp/127.0.0.1/2122; do sleep 0.5; done"' > \ + /usr/local/bin/start_sshd && \ + chmod a+x /usr/local/bin/start_sshd + +RUN start_sshd && $HADOOP_HOME/etc/hadoop/hadoop-env.sh && $HADOOP_HOME/sbin/start-dfs.sh +RUN start_sshd && $HADOOP_HOME/etc/hadoop/hadoop-env.sh && $HADOOP_HOME/sbin/start-dfs.sh + +CMD ["/etc/bootstrap.sh", "-d"] + +# Hdfs ports +EXPOSE 8020 9000 9820 9864 9865 9866 9867 9868 9869 9870 9871 50010 50020 50070 50075 50090 +# Mapred ports +EXPOSE 10020 19888 +#Yarn ports +EXPOSE 8030 8031 8032 8033 8040 8042 8088 +#Other ports +EXPOSE 2122 49707 \ No newline at end of file diff --git a/examples/quickstart/tutorial/hadoop3/docker/bootstrap.sh b/examples/quickstart/tutorial/hadoop3/docker/bootstrap.sh new file mode 100644 index 00000000000..d1fa493d4ea --- /dev/null +++ b/examples/quickstart/tutorial/hadoop3/docker/bootstrap.sh @@ -0,0 +1,41 @@ +#!/bin/bash + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +: ${HADOOP_HOME:=/usr/local/hadoop} + +$HADOOP_HOME/etc/hadoop/hadoop-env.sh + +rm /tmp/*.pid +# installing libraries if any - (resource urls added comma separated to the ACP system variable) +cd $HADOOP_HOME/share/hadoop/common ; for cp in ${ACP//,/ }; do echo == $cp; curl -LO $cp ; done; cd - + +# altering the core-site configuration +sed s/HOSTNAME/$HOSTNAME/ /usr/local/hadoop/etc/hadoop/core-site.xml.template > /usr/local/hadoop/etc/hadoop/core-site.xml + + +start_sshd +$HADOOP_HOME/sbin/start-dfs.sh +$HADOOP_HOME/sbin/start-yarn.sh +$HADOOP_HOME/sbin/mr-jobhistory-daemon.sh start historyserver + +if [[ $1 == "-d" ]]; then + while true; do sleep 1000; done +fi + +if [[ $1 == "-bash" ]]; then + /bin/bash +fi diff --git a/examples/quickstart/tutorial/wikipedia-index-hadoop3.json b/examples/quickstart/tutorial/wikipedia-index-hadoop3.json new file mode 100644 index 00000000000..28db64e3268 --- /dev/null +++ b/examples/quickstart/tutorial/wikipedia-index-hadoop3.json @@ -0,0 +1,80 @@ +{ + "type" : "index_hadoop", + "spec" : { + "dataSchema" : { + "dataSource" : "wikipedia", + "parser" : { + "type" : "hadoopyString", + "parseSpec" : { + "format" : "json", + "dimensionsSpec" : { + "dimensions" : [ + "channel", + "cityName", + "comment", + "countryIsoCode", + "countryName", + "isAnonymous", + "isMinor", + "isNew", + "isRobot", + "isUnpatrolled", + "metroCode", + "namespace", + "page", + "regionIsoCode", + "regionName", + "user", + { "name": "added", "type": "long" }, + { "name": "deleted", "type": "long" }, + { "name": "delta", "type": "long" } + ] + }, + "timestampSpec" : { + "format" : "auto", + "column" : "time" + } + } + }, + "metricsSpec" : [], + "granularitySpec" : { + "type" : "uniform", + "segmentGranularity" : "day", + "queryGranularity" : "none", + "intervals" : ["2015-09-12/2015-09-13"], + "rollup" : false + } + }, + "ioConfig" : { + "type" : "hadoop", + "inputSpec" : { + "type" : "static", + "paths" : "/quickstart/wikiticker-2015-09-12-sampled.json.gz" + } + }, + "tuningConfig" : { + "type" : "hadoop", + "partitionsSpec" : { + "type" : "hashed", + "targetPartitionSize" : 5000000 + }, + "forceExtendableShardSpecs" : true, + "jobProperties" : { + "fs.default.name" : "hdfs://druid-hadoop-demo:9000", + "fs.defaultFS" : "hdfs://druid-hadoop-demo:9000", + "dfs.datanode.address" : "druid-hadoop-demo", + "dfs.client.use.datanode.hostname" : "true", + "dfs.datanode.use.datanode.hostname" : "true", + "yarn.resourcemanager.hostname" : "druid-hadoop-demo", + "yarn.nodemanager.vmem-check-enabled" : "false", + "mapreduce.map.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8", + "mapreduce.job.user.classpath.first" : "true", + "mapreduce.reduce.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8", + "mapreduce.map.memory.mb" : 1024, + "mapreduce.reduce.memory.mb" : 1024, + "mapreduce.job.classloader" : "true" + } + } + }, + "hadoopDependencyCoordinates": ["org.apache.hadoop:hadoop-client-api:3.3.1","org.apache.hadoop:hadoop-client-runtime:3.3.1"] +} diff --git a/extensions-contrib/thrift-extensions/pom.xml b/extensions-contrib/thrift-extensions/pom.xml index 9b83cc3dd43..68af02e4061 100644 --- a/extensions-contrib/thrift-extensions/pom.xml +++ b/extensions-contrib/thrift-extensions/pom.xml @@ -71,11 +71,6 @@ ${project.parent.version} provided - - org.apache.hadoop - hadoop-client - provided - com.twitter.elephantbird elephant-bird-core @@ -115,11 +110,6 @@ guice provided - - org.apache.hadoop - hadoop-common - provided - com.fasterxml.jackson.core jackson-databind @@ -142,7 +132,56 @@ test - + + + hadoop2 + + true + + + + org.apache.hadoop + hadoop-common + provided + + + org.apache.hadoop + hadoop-client + ${hadoop.compile.version} + provided + + + org.apache.avro + avro + + + + + + + hadoop3 + + + hadoop3.enabled + true + + + + + org.apache.hadoop + hadoop-client-api + ${hadoop.compile.version} + provided + + + org.apache.hadoop + hadoop-client-runtime + ${hadoop.compile.version} + test + + + + diff --git a/extensions-core/avro-extensions/pom.xml b/extensions-core/avro-extensions/pom.xml index 292f6377234..403c0944317 100644 --- a/extensions-core/avro-extensions/pom.xml +++ b/extensions-core/avro-extensions/pom.xml @@ -192,10 +192,6 @@ - - org.apache.hadoop - hadoop-client - com.google.code.findbugs jsr305 @@ -211,28 +207,6 @@ jackson-annotations provided - - org.apache.hadoop - hadoop-mapreduce-client-core - provided - - - org.slf4j - slf4j-api - - - - - org.apache.hadoop - hadoop-common - provided - - - org.slf4j - slf4j-api - - - com.google.inject guice @@ -287,7 +261,78 @@ test - + + + hadoop2 + + true + + + + org.apache.hadoop + hadoop-client + ${hadoop.compile.version} + provided + + + org.apache.avro + avro + + + + + org.apache.hadoop + hadoop-mapreduce-client-core + provided + + + org.slf4j + slf4j-api + + + + + org.apache.hadoop + hadoop-common + provided + + + org.slf4j + slf4j-api + + + + + + + hadoop3 + + + hadoop3.enabled + true + + + + + org.apache.hadoop + hadoop-client-api + ${hadoop.compile.version} + provided + + + com.sun.jersey + jersey-core + test + + + org.apache.hadoop + hadoop-client-runtime + ${hadoop.compile.version} + test + + + + diff --git a/extensions-core/druid-kerberos/pom.xml b/extensions-core/druid-kerberos/pom.xml index af0d3b7201b..351f1f697ee 100644 --- a/extensions-core/druid-kerberos/pom.xml +++ b/extensions-core/druid-kerberos/pom.xml @@ -283,9 +283,12 @@ jetty-client provided + org.apache.hadoop hadoop-auth + ${hadoop.compile.version} com.nimbusds diff --git a/extensions-core/druid-ranger-security/pom.xml b/extensions-core/druid-ranger-security/pom.xml index d4c3089782e..8022c1cb93f 100644 --- a/extensions-core/druid-ranger-security/pom.xml +++ b/extensions-core/druid-ranger-security/pom.xml @@ -128,11 +128,23 @@ ranger-plugins-common ${apache.ranger.version} compile + + + org.apache.hadoop + hadoop-common + + org.apache.ranger ranger-plugins-audit ${apache.ranger.version} + + + org.apache.hadoop + hadoop-common + + compile @@ -141,223 +153,6 @@ ${apache.ranger.gson.version} compile - - org.apache.hadoop - hadoop-client - runtime - - - commons-cli - commons-cli - - - log4j - log4j - - - commons-codec - commons-codec - - - commons-logging - commons-logging - - - commons-io - commons-io - - - commons-lang - commons-lang - - - org.apache.httpcomponents - httpclient - - - org.apache.httpcomponents - httpcore - - - org.apache.zookeeper - zookeeper - - - org.slf4j - slf4j-api - - - org.slf4j - slf4j-log4j12 - - - javax.ws.rs - jsr311-api - - - com.google.code.findbugs - jsr305 - - - org.mortbay.jetty - jetty-util - - - org.apache.hadoop - hadoop-annotations - - - javax.activation - activation - - - com.google.protobuf - protobuf-java - - - com.sun.jersey - jersey-core - - - org.apache.curator - curator-client - - - org.apache.curator - curator-framework - - - org.apache.curator - curator-recipes - - - org.apache.commons - commons-math3 - - - com.google.guava - guava - - - - commons-beanutils - commons-beanutils-core - - - - - org.apache.hadoop - hadoop-common - ${hadoop.compile.version} - compile - - - commons-cli - commons-cli - - - log4j - log4j - - - commons-codec - commons-codec - - - commons-logging - commons-logging - - - commons-io - commons-io - - - commons-lang - commons-lang - - - org.apache.httpcomponents - httpclient - - - org.apache.httpcomponents - httpcore - - - org.apache.zookeeper - zookeeper - - - org.slf4j - slf4j-api - - - org.slf4j - slf4j-log4j12 - - - javax.ws.rs - jsr311-api - - - com.google.code.findbugs - jsr305 - - - org.mortbay.jetty - jetty-util - - - com.google.protobuf - protobuf-java - - - com.sun.jersey - jersey-core - - - org.apache.curator - curator-client - - - org.apache.commons - commons-math3 - - - com.google.guava - guava - - - org.apache.avro - avro - - - net.java.dev.jets3t - jets3t - - - com.sun.jersey - jersey-json - - - com.jcraft - jsch - - - org.mortbay.jetty - jetty - - - com.sun.jersey - jersey-server - - - - commons-beanutils - commons-beanutils-core - - - @@ -378,6 +173,260 @@ test + + + hadoop2 + + true + + + + org.apache.hadoop + hadoop-client + ${hadoop.compile.version} + runtime + + + org.apache.avro + avro + + + commons-cli + commons-cli + + + log4j + log4j + + + commons-codec + commons-codec + + + commons-logging + commons-logging + + + commons-io + commons-io + + + commons-lang + commons-lang + + + org.apache.httpcomponents + httpclient + + + org.apache.httpcomponents + httpcore + + + org.apache.zookeeper + zookeeper + + + org.slf4j + slf4j-api + + + org.slf4j + slf4j-log4j12 + + + javax.ws.rs + jsr311-api + + + com.google.code.findbugs + jsr305 + + + org.mortbay.jetty + jetty-util + + + org.apache.hadoop + hadoop-annotations + + + javax.activation + activation + + + com.google.protobuf + protobuf-java + + + com.sun.jersey + jersey-core + + + org.apache.curator + curator-client + + + org.apache.curator + curator-framework + + + org.apache.curator + curator-recipes + + + org.apache.commons + commons-math3 + + + com.google.guava + guava + + + + commons-beanutils + commons-beanutils-core + + + + + org.apache.hadoop + hadoop-common + compile + + + commons-cli + commons-cli + + + log4j + log4j + + + commons-codec + commons-codec + + + commons-logging + commons-logging + + + commons-io + commons-io + + + commons-lang + commons-lang + + + org.apache.httpcomponents + httpclient + + + org.apache.httpcomponents + httpcore + + + org.apache.zookeeper + zookeeper + + + org.slf4j + slf4j-api + + + org.slf4j + slf4j-log4j12 + + + javax.ws.rs + jsr311-api + + + com.google.code.findbugs + jsr305 + + + org.mortbay.jetty + jetty-util + + + com.google.protobuf + protobuf-java + + + com.sun.jersey + jersey-core + + + org.apache.curator + curator-client + + + org.apache.commons + commons-math3 + + + com.google.guava + guava + + + org.apache.avro + avro + + + net.java.dev.jets3t + jets3t + + + com.sun.jersey + jersey-json + + + com.jcraft + jsch + + + org.mortbay.jetty + jetty + + + com.sun.jersey + jersey-server + + + + commons-beanutils + commons-beanutils-core + + + + + + + hadoop3 + + + hadoop3.enabled + true + + + + + org.apache.hadoop + hadoop-client-api + ${hadoop.compile.version} + compile + + + org.apache.hadoop + hadoop-client-runtime + ${hadoop.compile.version} + test + + + + diff --git a/extensions-core/hdfs-storage/pom.xml b/extensions-core/hdfs-storage/pom.xml index 908a1707a4a..4985c5e3bea 100644 --- a/extensions-core/hdfs-storage/pom.xml +++ b/extensions-core/hdfs-storage/pom.xml @@ -41,113 +41,230 @@ provided - org.apache.hadoop - hadoop-client - runtime - - - commons-cli - commons-cli - - - log4j - log4j - - - commons-codec - commons-codec - - - commons-logging - commons-logging - - - commons-io - commons-io - - - commons-lang - commons-lang - - - org.apache.httpcomponents - httpclient - - - org.apache.httpcomponents - httpcore - - - org.apache.zookeeper - zookeeper - - - org.slf4j - slf4j-api - - - org.slf4j - slf4j-log4j12 - - - javax.ws.rs - jsr311-api - - - com.google.code.findbugs - jsr305 - - - org.mortbay.jetty - jetty-util - - - org.apache.hadoop - hadoop-annotations - - - javax.activation - activation - - - com.google.protobuf - protobuf-java - - - com.sun.jersey - jersey-core - - - org.apache.curator - curator-client - - - org.apache.curator - curator-framework - - - org.apache.curator - curator-recipes - - - org.apache.commons - commons-math3 - - - com.google.guava - guava - - - - commons-beanutils - commons-beanutils-core - - + org.apache.hadoop + hadoop-aws + ${hadoop.compile.version} + runtime + + + commons-io + commons-io + provided + + + com.google.code.findbugs + jsr305 + provided + + + com.fasterxml.jackson.core + jackson-annotations + provided + + + joda-time + joda-time + provided + + + com.google.inject + guice + provided + + + com.fasterxml.jackson.core + jackson-databind + provided + + + com.fasterxml.jackson.core + jackson-core + provided + + + com.google.inject.extensions + guice-multibindings + provided + + + commons-lang + commons-lang + provided + + + com.google.guava + guava + provided + + + javax.validation + validation-api + provided + + + + + junit + junit + test + + + com.google.protobuf + protobuf-java + test + + + org.apache.druid + druid-server + ${project.parent.version} + test + + + org.apache.druid + druid-processing + ${project.parent.version} + test-jar + test + + + org.apache.druid + druid-indexing-hadoop + ${project.parent.version} + test + + + org.apache.druid + druid-processing + ${project.parent.version} + test + + + org.apache.druid + druid-core + ${project.parent.version} + tests + test + + + + + hadoop2 + + true + + + + org.apache.hadoop + hadoop-client + ${hadoop.compile.version} + runtime + + + org.apache.avro + avro + + + commons-cli + commons-cli + + + log4j + log4j + + + commons-codec + commons-codec + + + commons-logging + commons-logging + + + commons-io + commons-io + + + commons-lang + commons-lang + + + org.apache.httpcomponents + httpclient + + + org.apache.httpcomponents + httpcore + + + org.apache.zookeeper + zookeeper + + + org.slf4j + slf4j-api + + + org.slf4j + slf4j-log4j12 + + + javax.ws.rs + jsr311-api + + + com.google.code.findbugs + jsr305 + + + org.mortbay.jetty + jetty-util + + + org.apache.hadoop + hadoop-annotations + + + javax.activation + activation + + + com.google.protobuf + protobuf-java + + + com.sun.jersey + jersey-core + + + org.apache.curator + curator-client + + + org.apache.curator + curator-framework + + + org.apache.curator + curator-recipes + + + org.apache.commons + commons-math3 + + + com.google.guava + guava + + + + commons-beanutils + commons-beanutils-core + + org.apache.hadoop hadoop-common - ${hadoop.compile.version} compile @@ -259,161 +376,97 @@ org.apache.hadoop - hadoop-aws + hadoop-mapreduce-client-core + compile + + + javax.servlet + servlet-api + + + com.fasterxml.jackson.core + jackson-core + + + com.fasterxml.jackson.core + jackson-databind + + + com.google.inject.extensions + guice-servlet + + + com.google.protobuf + protobuf-java + + + io.netty + netty + + + log4j + log4j + + + org.apache.avro + avro + + + org.apache.hadoop + hadoop-annotations + + + org.slf4j + slf4j-api + + + org.slf4j + slf4j-log4j12 + + + + + org.apache.hadoop + hadoop-hdfs-client + runtime + + + + + + hadoop3 + + + hadoop3.enabled + true + + + + + org.apache.hadoop + hadoop-client-api + ${hadoop.compile.version} + compile + + + org.apache.hadoop + hadoop-client-runtime ${hadoop.compile.version} runtime - commons-io - commons-io - provided - - - com.google.code.findbugs - jsr305 - provided - - - com.fasterxml.jackson.core - jackson-annotations - provided - - - joda-time - joda-time - provided - - - org.apache.hadoop - hadoop-mapreduce-client-core - compile - - - com.fasterxml.jackson.core - jackson-core - - - com.fasterxml.jackson.core - jackson-databind - - - com.google.inject.extensions - guice-servlet - - - com.google.protobuf - protobuf-java - - - io.netty - netty - - - log4j - log4j - - - org.apache.avro - avro - - - org.apache.hadoop - hadoop-annotations - - - org.slf4j - slf4j-api - - - org.slf4j - slf4j-log4j12 - - - - - com.google.inject - guice - provided - - - com.fasterxml.jackson.core - jackson-databind - provided - - - org.apache.hadoop - hadoop-hdfs-client - runtime - - - com.fasterxml.jackson.core - jackson-core - provided - - - com.google.inject.extensions - guice-multibindings - provided - - - commons-lang - commons-lang - provided - - - com.google.guava - guava - provided - - - javax.validation - validation-api - provided - - - - - junit - junit + org.apache.hadoop + hadoop-client-minicluster + ${hadoop.compile.version} test - com.google.protobuf - protobuf-java + log4j + log4j + 1.2.17 test - - org.apache.druid - druid-server - ${project.parent.version} - test - - - org.apache.druid - druid-processing - ${project.parent.version} - test-jar - test - - - org.apache.druid - druid-indexing-hadoop - ${project.parent.version} - test - - - org.apache.druid - druid-processing - ${project.parent.version} - test - - - org.apache.druid - druid-core - ${project.parent.version} - tests - test - - - + + + diff --git a/extensions-core/orc-extensions/pom.xml b/extensions-core/orc-extensions/pom.xml index 4e363ef7f0b..9f929390afd 100644 --- a/extensions-core/orc-extensions/pom.xml +++ b/extensions-core/orc-extensions/pom.xml @@ -159,249 +159,6 @@ jackson-annotations provided - - org.apache.hadoop - hadoop-mapreduce-client-core - compile - - - aopalliance - aopalliance - - - org.apache.avro - avro - - - org.apache.commons - commons-compress - - - com.google.guava - guava - - - com.google.inject - guice - - - com.google.inject.extensions - guice-servlet - - - com.fasterxml.jackson.core - jackson-annotations - - - com.fasterxml.jackson.core - jackson-core - - - com.fasterxml.jackson.core - jackson-databind - - - javax.inject - javax - - - io.netty - netty - - - org.slf4j - slf4j-log4j12 - - - org.slf4j - slf4j-api - - - com.google.protobuf - protobuf-java - - - - - org.apache.hadoop - hadoop-hdfs-client - runtime - - - - org.apache.hadoop - hadoop-common - compile - - - org.apache.yetus - audience-annotations - - - org.apache.directory.server - apacheds-kerberos-codec - - - org.apache.avro - avro - - - commons-beanutils - commons-beanutils-core - - - commons-cli - commons-cli - - - commons-codec - commons-codec - - - org.apache.commons - commons-compress - - - commons-io - commons-io - - - commons-lang - commons-lang - - - commons-collections - commons-collections - - - commons-logging - commons-logging - - - org.apache.commons - commons-math3 - - - commons-net - commons-net - - - org.apache.curator - curator-client - - - org.apache.curator - curator-recipes - - - org.apache.curator - curator-framework - - - com.google.code.gson - gson - - - com.google.guava - guava - - - org.apache.httpcomponents - httpclient - - - org.apache.httpcomponents - httpcore - - - com.fasterxml.jackson.core - jackson-annotations - - - com.fasterxml.jackson.core - jackson-core - - - com.fasterxml.jackson.core - jackson-databind - - - com.sun.jersey - jersey-core - - - com.sun.jersey - jersey-server - - - com.sun.jersey - jersey-json - - - org.mortbay.jetty - jetty-util - - - org.mortbay.jetty - jetty-sslengine - - - org.mortbay.jetty - jetty - - - net.java.dev.jets3t - jets3t - - - com.google.code.findbugs - jsr305 - - - javax.ws.rs - jsr311-api - - - javax.servlet.jsp - jsp-api - - - com.jcraft - jsch - - - log4j - log4j - - - org.slf4j - slf4j-api - - - org.slf4j - slf4j-log4j12 - - - xmlenc - xmlenc - - - org.apache.zookeeper - zookeeper - - - com.nimbusds - nimbus-jose-jwt - - - com.google.inject guice @@ -465,4 +222,290 @@ test + + + hadoop2 + + true + + + + org.apache.hadoop + hadoop-mapreduce-client-core + compile + + + aopalliance + aopalliance + + + org.apache.avro + avro + + + org.apache.commons + commons-compress + + + com.google.guava + guava + + + com.google.inject + guice + + + javax.servlet + servlet-api + + + com.google.inject.extensions + guice-servlet + + + com.fasterxml.jackson.core + jackson-annotations + + + com.fasterxml.jackson.core + jackson-core + + + com.fasterxml.jackson.core + jackson-databind + + + javax.inject + javax + + + io.netty + netty + + + org.slf4j + slf4j-log4j12 + + + org.slf4j + slf4j-api + + + com.google.protobuf + protobuf-java + + + + + org.apache.hadoop + hadoop-hdfs-client + runtime + + + + org.apache.hadoop + hadoop-common + ${hadoop.compile.version} + compile + + + org.apache.yetus + audience-annotations + + + org.apache.directory.server + apacheds-kerberos-codec + + + org.apache.avro + avro + + + commons-beanutils + commons-beanutils-core + + + commons-cli + commons-cli + + + commons-codec + commons-codec + + + org.apache.commons + commons-compress + + + commons-io + commons-io + + + commons-lang + commons-lang + + + commons-collections + commons-collections + + + commons-logging + commons-logging + + + org.apache.commons + commons-math3 + + + commons-net + commons-net + + + org.apache.curator + curator-client + + + org.apache.curator + curator-recipes + + + org.apache.curator + curator-framework + + + com.google.code.gson + gson + + + com.google.guava + guava + + + org.apache.httpcomponents + httpclient + + + org.apache.httpcomponents + httpcore + + + com.fasterxml.jackson.core + jackson-annotations + + + com.fasterxml.jackson.core + jackson-core + + + com.fasterxml.jackson.core + jackson-databind + + + com.sun.jersey + jersey-core + + + com.sun.jersey + jersey-server + + + com.sun.jersey + jersey-json + + + org.mortbay.jetty + jetty-util + + + org.mortbay.jetty + jetty-sslengine + + + org.mortbay.jetty + jetty + + + net.java.dev.jets3t + jets3t + + + com.google.code.findbugs + jsr305 + + + javax.ws.rs + jsr311-api + + + javax.servlet.jsp + jsp-api + + + com.jcraft + jsch + + + log4j + log4j + + + org.slf4j + slf4j-api + + + org.slf4j + slf4j-log4j12 + + + xmlenc + xmlenc + + + org.apache.zookeeper + zookeeper + + + com.nimbusds + nimbus-jose-jwt + + + + + + + hadoop3 + + + hadoop3.enabled + true + + + + + org.apache.hadoop + hadoop-client-api + ${hadoop.compile.version} + compile + + + com.google.protobuf + protobuf-java + test + + + org.apache.hadoop + hadoop-client-runtime + ${hadoop.compile.version} + test + + + + diff --git a/extensions-core/parquet-extensions/pom.xml b/extensions-core/parquet-extensions/pom.xml index c2bbcf05851..66ef705906f 100644 --- a/extensions-core/parquet-extensions/pom.xml +++ b/extensions-core/parquet-extensions/pom.xml @@ -115,248 +115,6 @@ ${project.parent.version} provided - - - org.apache.hadoop - hadoop-hdfs-client - runtime - - - - org.apache.hadoop - hadoop-mapreduce-client-core - ${hadoop.compile.version} - compile - - - aopalliance - aopalliance - - - org.apache.commons - commons-compress - - - com.google.guava - guava - - - com.google.inject - guice - - - com.google.inject.extensions - guice-servlet - - - com.fasterxml.jackson.core - jackson-annotations - - - com.fasterxml.jackson.core - jackson-core - - - com.fasterxml.jackson.core - jackson-databind - - - javax.inject - javax - - - io.netty - netty - - - slf4j-log4j12 - org.slf4j - - - org.slf4j - slf4j-api - - - protobuf-java - com.google.protobuf - - - - - org.apache.hadoop - hadoop-common - ${hadoop.compile.version} - compile - - - org.apache.yetus - audience-annotations - - - commons-codec - commons-codec - - - org.apache.commons - commons-compress - - - commons-io - commons-io - - - commons-lang - commons-lang - - - org.apache.commons - commons-math3 - - - commons-net - commons-net - - - org.apache.curator - curator-client - - - org.apache.curator - curator-framework - - - org.apache.curator - curator-recipes - - - com.google.guava - guava - - - com.fasterxml.jackson.core - jackson-annotations - - - com.fasterxml.jackson.core - jackson-core - - - com.fasterxml.jackson.core - jackson-databind - - - com.sun.jersey - jersey-core - - - com.sun.jersey - jersey-server - - - javax.servlet.jsp - jsp-api - - - com.google.code.findbugs - jsr305 - - - javax.ws.rs - jsr311-api - - - org.apache.zookeeper - zookeeper - - - org.slf4j - slf4j-api - - - org.slf4j - slf4j-log4j12 - - - com.sun.jersey - jersey-json - - - log4j - log4j - - - org.mortbay.jetty - jetty-sslengine - - - org.mortbay.jetty - jetty-util - - - net.java.dev.jets3t - jets3t - - - org.mortbay.jetty - jetty - - - com.google.code.gson - gson - - - xmlenc - xmlenc - - - org.apache.httpcomponents - httpclient - - - com.jcraft - jsch - - - com.google.protobuf - protobuf-java - - - commons-collections - commons-collections - - - commons-logging - commons-logging - - - commons-cli - commons-cli - - - commons-digester - commons-digester - - - commons-beanutils - commons-beanutils-core - - - org.apache.directory.server - apacheds-kerberos-codec - - - com.nimbusds - nimbus-jose-jwt - - - com.google.code.findbugs jsr305 @@ -408,5 +166,277 @@ test - + + + hadoop2 + + true + + + + + org.apache.hadoop + hadoop-hdfs-client + runtime + + + + org.apache.hadoop + hadoop-mapreduce-client-core + compile + + + aopalliance + aopalliance + + + org.apache.commons + commons-compress + + + com.google.guava + guava + + + com.google.inject + guice + + + com.google.inject.extensions + guice-servlet + + + com.fasterxml.jackson.core + jackson-annotations + + + com.fasterxml.jackson.core + jackson-core + + + com.fasterxml.jackson.core + jackson-databind + + + javax.inject + javax + + + io.netty + netty + + + slf4j-log4j12 + org.slf4j + + + org.slf4j + slf4j-api + + + protobuf-java + com.google.protobuf + + + + + org.apache.hadoop + hadoop-common + compile + + + org.apache.yetus + audience-annotations + + + commons-codec + commons-codec + + + org.apache.commons + commons-compress + + + commons-io + commons-io + + + commons-lang + commons-lang + + + org.apache.commons + commons-math3 + + + commons-net + commons-net + + + org.apache.curator + curator-client + + + org.apache.curator + curator-framework + + + org.apache.curator + curator-recipes + + + com.google.guava + guava + + + com.fasterxml.jackson.core + jackson-annotations + + + com.fasterxml.jackson.core + jackson-core + + + com.fasterxml.jackson.core + jackson-databind + + + com.sun.jersey + jersey-core + + + com.sun.jersey + jersey-server + + + javax.servlet.jsp + jsp-api + + + com.google.code.findbugs + jsr305 + + + javax.ws.rs + jsr311-api + + + org.apache.zookeeper + zookeeper + + + org.slf4j + slf4j-api + + + org.slf4j + slf4j-log4j12 + + + com.sun.jersey + jersey-json + + + log4j + log4j + + + org.mortbay.jetty + jetty-sslengine + + + org.mortbay.jetty + jetty-util + + + net.java.dev.jets3t + jets3t + + + org.mortbay.jetty + jetty + + + com.google.code.gson + gson + + + xmlenc + xmlenc + + + org.apache.httpcomponents + httpclient + + + com.jcraft + jsch + + + com.google.protobuf + protobuf-java + + + commons-collections + commons-collections + + + commons-logging + commons-logging + + + commons-cli + commons-cli + + + commons-digester + commons-digester + + + commons-beanutils + commons-beanutils-core + + + org.apache.directory.server + apacheds-kerberos-codec + + + com.nimbusds + nimbus-jose-jwt + + + + + + + hadoop3 + + + hadoop3.enabled + true + + + + + org.apache.hadoop + hadoop-client-api + ${hadoop.compile.version} + provided + + + org.apache.hadoop + hadoop-client-runtime + ${hadoop.compile.version} + test + + + + diff --git a/indexing-hadoop/pom.xml b/indexing-hadoop/pom.xml index 77f3aed0e36..56168474e28 100644 --- a/indexing-hadoop/pom.xml +++ b/indexing-hadoop/pom.xml @@ -67,11 +67,6 @@ com.google.guava guava - - org.apache.hadoop - hadoop-client - provided - com.fasterxml.jackson.core jackson-core @@ -84,16 +79,6 @@ com.google.code.findbugs jsr305 - - org.apache.hadoop - hadoop-common - provided - - - org.apache.hadoop - hadoop-mapreduce-client-core - provided - joda-time joda-time @@ -141,13 +126,6 @@ hamcrest-all test - - org.apache.hadoop - hadoop-common - ${hadoop.compile.version} - tests - test - org.apache.druid druid-core @@ -200,7 +178,98 @@ test + + + hadoop2 + + true + + + + org.apache.hadoop + hadoop-client + ${hadoop.compile.version} + provided + + + org.apache.avro + avro + + + javax.servlet + servlet-api + + + + + + org.apache.hadoop + hadoop-common + provided + + + javax.servlet + servlet-api + + + + + org.apache.hadoop + hadoop-mapreduce-client-core + provided + + + javax.servlet + servlet-api + + + + + org.apache.hadoop + hadoop-common + ${hadoop.compile.version} + tests + test + + + + + hadoop3 + + + hadoop3.enabled + true + + + + + org.apache.hadoop + hadoop-client-api + ${hadoop.compile.version} + provided + + + org.apache.hadoop + hadoop-client-runtime + ${hadoop.compile.version} + provided + + + org.apache.hadoop + hadoop-client-minicluster + ${hadoop.compile.version} + test + + + log4j + log4j + 1.2.17 + test + + + + diff --git a/indexing-service/pom.xml b/indexing-service/pom.xml index 59f1e45b0bb..42c6baaf78f 100644 --- a/indexing-service/pom.xml +++ b/indexing-service/pom.xml @@ -42,11 +42,6 @@ druid-server ${project.parent.version} - - org.apache.hadoop - hadoop-client - provided - org.apache.druid druid-indexing-hadoop @@ -114,16 +109,6 @@ javax.inject javax.inject - - org.apache.hadoop - hadoop-mapreduce-client-core - provided - - - org.apache.hadoop - hadoop-common - provided - io.netty netty @@ -297,6 +282,75 @@ + + + hadoop2 + + true + + + + org.apache.hadoop + hadoop-mapreduce-client-core + provided + + + javax.servlet + servlet-api + + + + + org.apache.hadoop + hadoop-common + provided + + + org.apache.hadoop + hadoop-client + ${hadoop.compile.version} + provided + + + org.apache.avro + avro + + + + + org.apache.hadoop + hadoop-yarn-common + provided + + + + org.apache.hadoop:hadoop-client:${hadoop.compile.version} + + + + hadoop3 + + + hadoop3.enabled + true + + + + + org.apache.hadoop + hadoop-client-api + ${hadoop.compile.version} + provided + + + + + org.apache.hadoop:hadoop-client-api:${hadoop.compile.version},org.apache.hadoop:hadoop-client-runtime:${hadoop.compile.version} + + + + + @@ -310,6 +364,20 @@ + + maven-resources-plugin + org.apache.maven.plugins + + ${project.build.outputDirectory} + + + src/main/resources + hadoop.indexer.libs.version + true + + + + diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/config/TaskConfig.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/config/TaskConfig.java index a41488642c4..184e7a9425c 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/config/TaskConfig.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/config/TaskConfig.java @@ -22,13 +22,17 @@ package org.apache.druid.indexing.common.config; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; +import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.EnumUtils; +import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.segment.loading.StorageLocationConfig; import org.joda.time.Period; import javax.annotation.Nullable; import java.io.File; +import java.nio.charset.StandardCharsets; import java.nio.file.Paths; import java.util.Collections; import java.util.List; @@ -42,10 +46,23 @@ import java.util.List; public class TaskConfig { private static final Logger log = new Logger(TaskConfig.class); + private static final String HADOOP_LIB_VERSIONS = "hadoop.indexer.libs.version"; + public static final List DEFAULT_DEFAULT_HADOOP_COORDINATES; - public static final List DEFAULT_DEFAULT_HADOOP_COORDINATES = ImmutableList.of( - "org.apache.hadoop:hadoop-client:2.8.5" - ); + static { + try { + DEFAULT_DEFAULT_HADOOP_COORDINATES = + ImmutableList.copyOf(Lists.newArrayList(IOUtils.toString( + TaskConfig.class.getResourceAsStream("/" + + HADOOP_LIB_VERSIONS), + StandardCharsets.UTF_8 + ).split(","))); + + } + catch (Exception e) { + throw new ISE(e, "Unable to read file %s from classpath ", HADOOP_LIB_VERSIONS); + } + } // This enum controls processing mode of batch ingestion "segment creation" phase (i.e. appenderator logic) public enum BatchProcessingMode diff --git a/indexing-service/src/main/resources/hadoop.indexer.libs.version b/indexing-service/src/main/resources/hadoop.indexer.libs.version new file mode 100644 index 00000000000..61e072272ac --- /dev/null +++ b/indexing-service/src/main/resources/hadoop.indexer.libs.version @@ -0,0 +1 @@ +${hadoop-task-libs} \ No newline at end of file diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/HadoopTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/HadoopTaskTest.java index 47091696aa1..c41ee058d31 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/HadoopTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/HadoopTaskTest.java @@ -30,7 +30,6 @@ import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.granularity.Granularity; import org.apache.druid.timeline.DataSegment; import org.apache.druid.utils.JvmUtils; -import org.apache.hadoop.util.ApplicationClassLoader; import org.easymock.EasyMock; import org.joda.time.Interval; import org.junit.Assert; @@ -146,7 +145,7 @@ public class HadoopTaskTest // This is a check against the current HadoopTask which creates a single URLClassLoader with null parent Assert.assertNull(classLoader.getParent()); } - Assert.assertFalse(classLoader instanceof ApplicationClassLoader); + Assert.assertFalse(classLoader.getClass().getSimpleName().equals("ApplicationClassLoader")); Assert.assertTrue(classLoader instanceof URLClassLoader); final ClassLoader appLoader = HadoopDruidIndexerConfig.class.getClassLoader(); diff --git a/integration-tests/.gitignore b/integration-tests/.gitignore index aa0b0953609..e11916a44f2 100644 --- a/integration-tests/.gitignore +++ b/integration-tests/.gitignore @@ -3,4 +3,5 @@ docker/docker_ip docker/tls/root.key docker/tls/root.pem docker/tls/untrusted_root.key -docker/tls/untrusted_root.pem \ No newline at end of file +docker/tls/untrusted_root.pem +gen-scripts/ \ No newline at end of file diff --git a/integration-tests/build_run_cluster.sh b/integration-tests/build_run_cluster.sh index 97176e5ae86..928fdfc0b78 100755 --- a/integration-tests/build_run_cluster.sh +++ b/integration-tests/build_run_cluster.sh @@ -20,6 +20,12 @@ echo $DRUID_INTEGRATION_TEST_OVERRIDE_CONFIG_PATH export DIR=$(cd $(dirname $0) && pwd) export HADOOP_DOCKER_DIR=$DIR/../examples/quickstart/tutorial/hadoop/docker + +if [ -n "${HADOOP_VERSION}" ] && [ ${HADOOP_VERSION:0:1)} == "3" ]; then + export HADOOP_DOCKER_DIR=$DIR/../examples/quickstart/tutorial/hadoop3/docker +fi + + export DOCKERDIR=$DIR/docker export SHARED_DIR=${HOME}/shared @@ -27,7 +33,7 @@ export SHARED_DIR=${HOME}/shared echo ${DOCKER_IP:=127.0.0.1} > $DOCKERDIR/docker_ip if !($DRUID_INTEGRATION_TEST_SKIP_BUILD_DOCKER); then - bash ./script/copy_resources.sh + bash ./gen-scripts/copy_resources.sh bash ./script/docker_build_containers.sh fi diff --git a/integration-tests/docker/docker-compose.base.yml b/integration-tests/docker/docker-compose.base.yml index 11d7962d993..82870d02f02 100644 --- a/integration-tests/docker/docker-compose.base.yml +++ b/integration-tests/docker/docker-compose.base.yml @@ -317,8 +317,10 @@ services: - ./environment-configs/router-custom-check-tls ### optional supporting infra + druid-it-hadoop: - image: druid-it/hadoop:2.8.5 + ## Giving fake version + image: druid-it/hadoop:9.9.9 container_name: druid-it-hadoop ports: - 2049:2049 diff --git a/integration-tests/docker/environment-configs/override-examples/hadoop/azure_to_azure b/integration-tests/docker/environment-configs/override-examples/hadoop/azure_to_azure index 6564b7d83c4..2382cd120c1 100644 --- a/integration-tests/docker/environment-configs/override-examples/hadoop/azure_to_azure +++ b/integration-tests/docker/environment-configs/override-examples/hadoop/azure_to_azure @@ -28,4 +28,13 @@ druid_azure_container= druid_extensions_loadList=["druid-azure-extensions","druid-hdfs-storage"] -druid_indexer_task_defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client:2.8.5", "org.apache.hadoop:hadoop-azure:2.8.5"] +# +# Please replace with corresponding libs +# Sample hadoop 2 config +# druid_indexer_task_defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client:2.8.5", "org.apache.hadoop:hadoop-aws:2.8.5"] +# +# Sample hadoop 3 config +# druid_indexer_task_defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client-api:3.2.2", "org.apache.hadoop:hadoop-client-runtime:3.2.2", "org.apache.hadoop:hadoop-azure:3.2.2"] +# + +druid_indexer_task_defaultHadoopCoordinates= diff --git a/integration-tests/docker/environment-configs/override-examples/hadoop/azure_to_hdfs b/integration-tests/docker/environment-configs/override-examples/hadoop/azure_to_hdfs index 26ce1343a59..bfc2552d07c 100644 --- a/integration-tests/docker/environment-configs/override-examples/hadoop/azure_to_hdfs +++ b/integration-tests/docker/environment-configs/override-examples/hadoop/azure_to_hdfs @@ -31,4 +31,13 @@ druid_azure_account= druid_azure_key= druid_azure_container= -druid_indexer_task_defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client:2.8.5", "org.apache.hadoop:hadoop-azure:2.8.5"] +# +# Please replace with corresponding libs +# Sample hadoop 2 config +# druid_indexer_task_defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client:2.8.5", "org.apache.hadoop:hadoop-aws:2.8.5"] +# +# Sample hadoop 3 config +# druid_indexer_task_defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client-api:3.2.2", "org.apache.hadoop:hadoop-client-runtime:3.2.2", "org.apache.hadoop:hadoop-azure:3.2.2"] +# + +druid_indexer_task_defaultHadoopCoordinates= diff --git a/integration-tests/docker/environment-configs/override-examples/hadoop/s3_to_hdfs b/integration-tests/docker/environment-configs/override-examples/hadoop/s3_to_hdfs index b16500ad9d6..785e376595b 100644 --- a/integration-tests/docker/environment-configs/override-examples/hadoop/s3_to_hdfs +++ b/integration-tests/docker/environment-configs/override-examples/hadoop/s3_to_hdfs @@ -31,4 +31,13 @@ AWS_REGION= druid_extensions_loadList=["druid-s3-extensions","druid-hdfs-storage"] -druid_indexer_task_defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client:2.8.5", "org.apache.hadoop:hadoop-aws:2.8.5"] +# +# Please replace with corresponding libs +# Sample hadoop 2 config +# druid_indexer_task_defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client:2.8.5", "org.apache.hadoop:hadoop-aws:2.8.5"] +# +# Sample hadoop 3 config +# druid_indexer_task_defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client-api:3.2.2", "org.apache.hadoop:hadoop-client-runtime:3.2.2", "org.apache.hadoop:hadoop-azure:3.2.2"] +# + +druid_indexer_task_defaultHadoopCoordinates= diff --git a/integration-tests/docker/environment-configs/override-examples/hadoop/s3_to_s3 b/integration-tests/docker/environment-configs/override-examples/hadoop/s3_to_s3 index 60dd85673a6..7daf16a63b1 100644 --- a/integration-tests/docker/environment-configs/override-examples/hadoop/s3_to_s3 +++ b/integration-tests/docker/environment-configs/override-examples/hadoop/s3_to_s3 @@ -31,5 +31,13 @@ druid_storage_baseKey= AWS_REGION= druid_extensions_loadList=["druid-s3-extensions","druid-hdfs-storage"] +# +# Please replace with corresponding libs +# Sample hadoop 2 config +# druid_indexer_task_defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client:2.8.5", "org.apache.hadoop:hadoop-aws:2.8.5"] +# +# Sample hadoop 3 config +# druid_indexer_task_defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client-api:3.2.2", "org.apache.hadoop:hadoop-client-runtime:3.2.2", "org.apache.hadoop:hadoop-azure:3.2.2"] +# -druid_indexer_task_defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client:2.8.5", "org.apache.hadoop:hadoop-aws:2.8.5"] +druid_indexer_task_defaultHadoopCoordinates= diff --git a/integration-tests/pom.xml b/integration-tests/pom.xml index ed6b90f5fb4..9985a022c91 100644 --- a/integration-tests/pom.xml +++ b/integration-tests/pom.xml @@ -38,6 +38,11 @@ + + "org.apache.hadoop:hadoop-client:${hadoop.compile.version}", "org.apache.hadoop:hadoop-azure:${hadoop.compile.version}" + org.apache.hadoop.fs.s3native.NativeS3FileSystem + + com.amazonaws @@ -155,6 +160,12 @@ druid-hdfs-storage ${project.parent.version} runtime + + + com.amazonaws + aws-java-sdk-bundle + + org.apache.druid.extensions @@ -466,10 +477,88 @@ true - + + maven-resources-plugin + org.apache.maven.plugins + + ${project.build.outputDirectory} + + + script + copy_resources_template.sh + true + + + src/test/resources/hadoop/ + *template.json + true + + + src/test/resources + false + + + src/main/resources + false + + + + + + com.coderplus.maven.plugins + copy-rename-maven-plugin + 1.0 + + + copy-file-azure + process-resources + + copy + + + ${project.build.outputDirectory}/wikipedia_hadoop_azure_input_index_task_template.json + ${project.build.outputDirectory}/wikipedia_hadoop_azure_input_index_task.sh + + + + copy-file-s3 + process-resources + + copy + + + + ${project.build.outputDirectory}/wikipedia_hadoop_s3_input_index_task_template.json + + + ${project.build.outputDirectory}/wikipedia_hadoop_s3_input_index_task.json + + + + + rename-file + process-resources + + rename + + + ${project.build.outputDirectory}/copy_resources_template.sh + gen-scripts/copy_resources.sh + + + + + + + hadoop3 + + "org.apache.hadoop:hadoop-client-api:${hadoop.compile.version}", "org.apache.hadoop:hadoop-client-runtime:${hadoop.compile.version}", "org.apache.hadoop:hadoop-azure:${hadoop.compile.version}" + org.apache.hadoop.fs.s3a.S3AFileSystem + + integration-tests @@ -512,6 +601,7 @@ 5.5.1 ${apache.kafka.version} ${zookeeper.version} + ${hadoop.compile.version} ${project.basedir}/build_run_cluster.sh diff --git a/integration-tests/script/copy_hadoop_resources.sh b/integration-tests/script/copy_hadoop_resources.sh index ae378cfab3f..8a442c7c466 100755 --- a/integration-tests/script/copy_hadoop_resources.sh +++ b/integration-tests/script/copy_hadoop_resources.sh @@ -35,6 +35,10 @@ else fi set -e +if [ -n "${HADOOP_VERSION}" ] && [ ${HADOOP_VERSION:0:1)} == "3" ]; then + docker exec -t druid-it-hadoop sh -c "./usr/local/hadoop/bin/hdfs dfs -mkdir -p /user/root" + docker exec -t druid-it-hadoop sh -c "./usr/local/hadoop/bin/hdfs dfs -put /usr/local/hadoop/etc/hadoop/ input" +fi # Setup hadoop druid dirs echo "Setting up druid hadoop dirs" diff --git a/integration-tests/script/copy_resources.sh b/integration-tests/script/copy_resources_template.sh similarity index 76% rename from integration-tests/script/copy_resources.sh rename to integration-tests/script/copy_resources_template.sh index 8e4d8f3a81d..87e06df21ea 100755 --- a/integration-tests/script/copy_resources.sh +++ b/integration-tests/script/copy_resources_template.sh @@ -75,8 +75,15 @@ cp $SHARED_DIR/docker/lib/druid-orc-extensions-* $SHARED_DIR/docker/extensions/d # Pull Hadoop dependency if needed if [ -n "$DRUID_INTEGRATION_TEST_START_HADOOP_DOCKER" ] && [ "$DRUID_INTEGRATION_TEST_START_HADOOP_DOCKER" == true ] then - java -cp "$SHARED_DIR/docker/lib/*" -Ddruid.extensions.hadoopDependenciesDir="$SHARED_DIR/hadoop-dependencies" org.apache.druid.cli.Main tools pull-deps -h org.apache.hadoop:hadoop-client:2.8.5 -h org.apache.hadoop:hadoop-aws:2.8.5 -h org.apache.hadoop:hadoop-azure:2.8.5 - curl https://storage.googleapis.com/hadoop-lib/gcs/gcs-connector-hadoop2-latest.jar --output $SHARED_DIR/docker/lib/gcs-connector-hadoop2-latest.jar + ## We put same version in both commands but as we have an if, correct code path will always be executed as this is generated script. + ## Remove if + if [ -n "${HADOOP_VERSION}" ] && [ ${HADOOP_VERSION:0:1)} == "3" ]; then + java -cp "$SHARED_DIR/docker/lib/*" -Ddruid.extensions.hadoopDependenciesDir="$SHARED_DIR/hadoop-dependencies" org.apache.druid.cli.Main tools pull-deps -h org.apache.hadoop:hadoop-client-api:${hadoop.compile.version} -h org.apache.hadoop:hadoop-client-runtime:${hadoop.compile.version} -h org.apache.hadoop:hadoop-aws:${hadoop.compile.version} -h org.apache.hadoop:hadoop-azure:${hadoop.compile.version} + curl https://storage.googleapis.com/hadoop-lib/gcs/gcs-connector-hadoop3-latest.jar --output $SHARED_DIR/docker/lib/gcs-connector-hadoop3-latest.jar + else + java -cp "$SHARED_DIR/docker/lib/*" -Ddruid.extensions.hadoopDependenciesDir="$SHARED_DIR/hadoop-dependencies" org.apache.druid.cli.Main tools pull-deps -h org.apache.hadoop:hadoop-client:${hadoop.compile.version} -h org.apache.hadoop:hadoop-aws:${hadoop.compile.version} -h org.apache.hadoop:hadoop-azure:${hadoop.compile.version} + curl https://storage.googleapis.com/hadoop-lib/gcs/gcs-connector-hadoop2-latest.jar --output $SHARED_DIR/docker/lib/gcs-connector-hadoop2-latest.jar + fi fi # one of the integration tests needs the wikiticker sample data diff --git a/integration-tests/script/docker_build_containers.sh b/integration-tests/script/docker_build_containers.sh index 50d27a21517..9d586150e93 100755 --- a/integration-tests/script/docker_build_containers.sh +++ b/integration-tests/script/docker_build_containers.sh @@ -48,5 +48,5 @@ fi # Build Hadoop docker if needed if [ -n "$DRUID_INTEGRATION_TEST_BUILD_HADOOP_DOCKER" ] && [ "$DRUID_INTEGRATION_TEST_BUILD_HADOOP_DOCKER" == true ] then - docker build -t druid-it/hadoop:2.8.5 --build-arg APACHE_ARCHIVE_MIRROR_HOST $HADOOP_DOCKER_DIR + docker build -t druid-it/hadoop:9.9.9 --build-arg APACHE_ARCHIVE_MIRROR_HOST $HADOOP_DOCKER_DIR fi diff --git a/integration-tests/src/test/resources/hadoop/batch_hadoop_indexer.json b/integration-tests/src/test/resources/hadoop/batch_hadoop_indexer.json index a6710db6e27..6402ca4fcb2 100644 --- a/integration-tests/src/test/resources/hadoop/batch_hadoop_indexer.json +++ b/integration-tests/src/test/resources/hadoop/batch_hadoop_indexer.json @@ -65,18 +65,19 @@ }, "jobProperties": { "fs.permissions.umask-mode": "022", - "fs.default.name" : "hdfs://druid-it-hadoop:9000", - "fs.defaultFS" : "hdfs://druid-it-hadoop:9000", - "dfs.datanode.address" : "druid-it-hadoop", - "dfs.client.use.datanode.hostname" : "true", - "dfs.datanode.use.datanode.hostname" : "true", - "yarn.resourcemanager.hostname" : "druid-it-hadoop", - "yarn.nodemanager.vmem-check-enabled" : "false", - "mapreduce.map.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8", - "mapreduce.job.user.classpath.first" : "true", - "mapreduce.reduce.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8", - "mapreduce.map.memory.mb" : 1024, - "mapreduce.reduce.memory.mb" : 1024 + "fs.default.name": "hdfs://druid-it-hadoop:9000", + "fs.defaultFS": "hdfs://druid-it-hadoop:9000", + "dfs.datanode.address": "druid-it-hadoop", + "dfs.client.use.datanode.hostname": "true", + "dfs.datanode.use.datanode.hostname": "true", + "yarn.resourcemanager.hostname": "druid-it-hadoop", + "yarn.nodemanager.vmem-check-enabled": "false", + "mapreduce.job.classloader": "true", + "mapreduce.map.java.opts": "-Duser.timezone=UTC -Dfile.encoding=UTF-8", + "mapreduce.job.user.classpath.first": "true", + "mapreduce.reduce.java.opts": "-Duser.timezone=UTC -Dfile.encoding=UTF-8", + "mapreduce.map.memory.mb": 1024, + "mapreduce.reduce.memory.mb": 1024 }, "rowFlushBoundary": 10000 } diff --git a/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_azure_input_index_task.json b/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_azure_input_index_task_template.json similarity index 95% rename from integration-tests/src/test/resources/hadoop/wikipedia_hadoop_azure_input_index_task.json rename to integration-tests/src/test/resources/hadoop/wikipedia_hadoop_azure_input_index_task_template.json index 60bd4954271..fb4318bf470 100644 --- a/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_azure_input_index_task.json +++ b/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_azure_input_index_task_template.json @@ -1,6 +1,6 @@ { "type": "index_hadoop", - "hadoopDependencyCoordinates" : ["org.apache.hadoop:hadoop-client:2.8.5", "org.apache.hadoop:hadoop-azure:2.8.5"], + "hadoopDependencyCoordinates" : [${hadoop.integ.libs}], "spec": { "dataSchema": { "dataSource": "%%DATASOURCE%%", @@ -94,7 +94,8 @@ "dfs.datanode.use.datanode.hostname" : "true", "yarn.resourcemanager.hostname" : "druid-it-hadoop", "yarn.nodemanager.vmem-check-enabled" : "false", - "mapreduce.map.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8 -D", + "mapreduce.map.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8", + "mapreduce.job.classloader": "true", "mapreduce.job.user.classpath.first" : "true", "mapreduce.reduce.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8", "mapreduce.map.memory.mb" : 1024, diff --git a/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_gcs_input_index_task.json b/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_gcs_input_index_task.json index 03a1292105a..8d792e06ca3 100644 --- a/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_gcs_input_index_task.json +++ b/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_gcs_input_index_task.json @@ -93,6 +93,7 @@ "dfs.datanode.use.datanode.hostname" : "true", "yarn.resourcemanager.hostname" : "druid-it-hadoop", "yarn.nodemanager.vmem-check-enabled" : "false", + "mapreduce.job.classloader": "true", "mapreduce.map.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8 -D", "mapreduce.job.user.classpath.first" : "true", "mapreduce.reduce.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8", diff --git a/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_index_task.json b/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_index_task.json index 76600f57cf8..203b696da81 100644 --- a/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_index_task.json +++ b/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_index_task.json @@ -90,6 +90,7 @@ "dfs.datanode.use.datanode.hostname" : "true", "yarn.resourcemanager.hostname" : "druid-it-hadoop", "yarn.nodemanager.vmem-check-enabled" : "false", + "mapreduce.job.classloader": "true", "mapreduce.map.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8", "mapreduce.job.user.classpath.first" : "true", "mapreduce.reduce.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8", diff --git a/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_reindex_task.json b/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_reindex_task.json index cf44540a6b4..64852b3f0a9 100644 --- a/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_reindex_task.json +++ b/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_reindex_task.json @@ -61,6 +61,7 @@ "fs.default.name" : "hdfs://druid-it-hadoop:9000", "fs.defaultFS" : "hdfs://druid-it-hadoop:9000", "dfs.datanode.address" : "druid-it-hadoop", + "mapreduce.job.classloader": "true", "dfs.client.use.datanode.hostname" : "true", "dfs.datanode.use.datanode.hostname" : "true", "yarn.resourcemanager.hostname" : "druid-it-hadoop", diff --git a/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_s3_input_index_task.json b/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_s3_input_index_task_template.json similarity index 97% rename from integration-tests/src/test/resources/hadoop/wikipedia_hadoop_s3_input_index_task.json rename to integration-tests/src/test/resources/hadoop/wikipedia_hadoop_s3_input_index_task_template.json index d18b6da1e4e..858dff1d2a2 100644 --- a/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_s3_input_index_task.json +++ b/integration-tests/src/test/resources/hadoop/wikipedia_hadoop_s3_input_index_task_template.json @@ -94,13 +94,14 @@ "yarn.resourcemanager.hostname" : "druid-it-hadoop", "yarn.nodemanager.vmem-check-enabled" : "false", "mapreduce.map.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8 -Daws.region=%%AWS_REGION%%", + "mapreduce.job.classloader": "true", "mapreduce.job.user.classpath.first" : "true", "mapreduce.reduce.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8 -Daws.region=%%AWS_REGION%%", "mapreduce.map.memory.mb" : 1024, "mapreduce.reduce.memory.mb" : 1024, "fs.s3.awsAccessKeyId" : "%%AWS_ACCESS_KEY%%", "fs.s3.awsSecretAccessKey" : "%%AWS_SECRET_KEY%%", - "fs.s3.impl" : "org.apache.hadoop.fs.s3native.NativeS3FileSystem", + "fs.s3.impl" : "${hadoop.s3.impl}", "fs.s3n.awsAccessKeyId" : "%%AWS_ACCESS_KEY%%", "fs.s3n.awsSecretAccessKey" : "%%AWS_SECRET_KEY%%", "fs.s3n.impl" : "org.apache.hadoop.fs.s3native.NativeS3FileSystem", diff --git a/licenses.yaml b/licenses.yaml index 8312ebcd814..e26bfc8f8e8 100644 --- a/licenses.yaml +++ b/licenses.yaml @@ -630,6 +630,16 @@ notices: --- +name: Apache Commons Configuration +license_category: binary +module: java-core +license_name: Apache License version 2.0 +version: 2.1.1 +libraries: + - org.apache.commons: commons-configuration2 + +--- + name: Apache Commons DBCP license_category: binary module: java-core @@ -709,6 +719,15 @@ notices: --- +name: Apache Commons Text +license_category: binary +module: java-core +license_name: Apache License version 2.0 +version: 1.4 +libraries: + - org.apache.commons: commons-text +--- + name: Airline license_category: binary module: java-core @@ -1757,6 +1776,20 @@ notices: --- +name: Apache HttpClient +license_category: binary +module: java-core +license_name: Apache License version 2.0 +version: 4.5.13 +libraries: + - org.apache.httpcomponents: httpclient +notices: + - httpclient: | + Apache HttpClient + Copyright 1999-2017 The Apache Software Foundation + +--- + name: Apache HttpClient license_category: binary module: hadoop-client @@ -1988,6 +2021,8 @@ libraries: - org.eclipse.jetty: jetty-servlets - org.eclipse.jetty: jetty-util - org.eclipse.jetty: jetty-util-ajax + - org.eclipse.jetty: jetty-xml + - org.eclipse.jetty: jetty-webapp notice: | ============================================================== Jetty Web Container @@ -2130,6 +2165,30 @@ libraries: --- +name: Kerby +license_category: binary +module: java-core +license_name: Apache License version 2.0 +version: 1.0.1 +libraries: + - org.apache.kerby: kerb-admin + - org.apache.kerby: kerb-client + - org.apache.kerby: kerb-common + - org.apache.kerby: kerb-core + - org.apache.kerby: kerb-crypto + - org.apache.kerby: kerb-identity + - org.apache.kerby: kerb-server + - org.apache.kerby: kerb-simplekdc + - org.apache.kerby: kerb-util + - org.apache.kerby: kerby-asn1 + - org.apache.kerby: kerby-config + - org.apache.kerby: kerby-pkix + - org.apache.kerby: kerby-util + - org.apache.kerby: kerby-xdr + - org.apache.kerby: token-provider + +--- + name: SIGAR license_category: binary module: java-core @@ -2827,6 +2886,29 @@ notice: | --- +name: Apache Hadoop +license_category: binary +module: hadoop-client +license_name: Apache License version 2.0 +version: 3.3.1 +libraries: + - org.apache.hadoop: hadoop-annotations + - org.apache.hadoop: hadoop-auth + - org.apache.hadoop: hadoop-client + - org.apache.hadoop: hadoop-common + - org.apache.hadoop: hadoop-hdfs-client + - org.apache.hadoop: hadoop-mapreduce-client-app + - org.apache.hadoop: hadoop-mapreduce-client-common + - org.apache.hadoop: hadoop-mapreduce-client-core + - org.apache.hadoop: hadoop-mapreduce-client-jobclient + - org.apache.hadoop: hadoop-mapreduce-client-shuffle + - org.apache.hadoop: hadoop-yarn-api + - org.apache.hadoop: hadoop-yarn-client + - org.apache.hadoop: hadoop-yarn-common + - org.apache.hadoop: hadoop-yarn-server-common + +--- + name: Gson license_category: binary module: hadoop-client @@ -3596,6 +3678,36 @@ libraries: --- +name: Jersey +license_category: binary +module: extensions/druid-avro-extensions +license_name: CDDL 1.1 +version: 1.19.3 +libraries: + - com.sun.jersey: jersey-json + +--- + +name: jaxb +license_category: binary +module: extensions/druid-avro-extensions +license_name: CDDL 1.1 +version: 2.2.3-1 +libraries: + - com.sun.xml.bind: jaxb-impl + +--- + +name: commons-cli +license_category: binary +module: extensions/druid-avro-extensions +license_name: Apache License version 2.0 +version: 1.3.1 +libraries: + - commons-cli: commons-cli + +--- + name: Apache Hive license_category: binary module: extensions/druid-bloom-filter @@ -4773,6 +4885,16 @@ libraries: --- +name: org.codehaus.woodstox stax2-api +license_category: binary +version: 4.2.1 +module: druid-ranger-security +license_name: BSD-3-Clause License +libraries: + - org.codehaus.woodstox: stax2-api + +--- + name: org.codehaus.woodstox woodstox-core-asl license_category: binary version: 4.4.1 @@ -4869,6 +4991,38 @@ notices: --- +name: Woodstox +license_category: binary +module: java-core +license_name: Apache License version 2.0 +version: 5.3.0 +libraries: + - com.fasterxml.woodstox: woodstox-core + +--- + +name: DNS Java +license_category: binary +module: java-core +license_name: BSD 2-Clause license +version: 2.1.7 +libraries: + - dnsjava: dnsjava + +--- + +name: Hadoop shaded +license_category: binary +module: java-core +license_name: Apache License version 2.0 +version: 1.1.1 +libraries: + - org.apache.hadoop.thirdparty: hadoop-shaded-protobuf_3_7 + - org.apache.hadoop.thirdparty: hadoop-shaded-guava + +--- + + # Web console modules start name: "@babel/runtime" license_category: binary diff --git a/owasp-dependency-check-suppressions.xml b/owasp-dependency-check-suppressions.xml index b7da4e5a167..842ebb89a52 100644 --- a/owasp-dependency-check-suppressions.xml +++ b/owasp-dependency-check-suppressions.xml @@ -327,6 +327,14 @@ CVE-2018-11765 CVE-2020-9492 + + + ^pkg:maven/org\.apache\.hadoop/hadoop\-.*@.*$ + CVE-2018-11765 + CVE-2020-9492 + CVE-2021-26291 + + + + CVE-2020-13949 + @@ -402,4 +417,49 @@ --> CVE-2021-40531 + + + + CVE-2020-10740 + CVE-2020-25644 + CVE-2020-10718 + + + + + + CVE-2020-8570 + CVE-2015-8559 + CVE-2021-20291 + CVE-2017-17485 + CVE-2018-5968 + CVE-2017-15095 + CVE-2019-16942 + CVE-2020-25649 + CVE-2020-35491 + CVE-2019-16943 + CVE-2020-35490 + CVE-2019-20330 + CVE-2020-10673 + CVE-2018-11307 + CVE-2018-7489 + CVE-2019-17267 + CVE-2019-17531 + CVE-2019-16335 + CVE-2019-14893 + CVE-2019-14540 + + + + + + CVE-2015-7430 + CVE-2017-3162 + diff --git a/pom.xml b/pom.xml index ee60c3bbf3a..b02322a8ccd 100644 --- a/pom.xml +++ b/pom.xml @@ -112,6 +112,8 @@ 1.12.37 2.8.0 0.8.7 + 5.2.5.Final + 4.5.10 3.5.9 2.5.7 @@ -459,7 +461,7 @@ org.hibernate hibernate-validator - 5.2.5.Final + ${hibernate-validator.version} javax.validation @@ -727,60 +729,13 @@ org.apache.httpcomponents httpclient - 4.5.10 + ${httpclient.version} org.apache.httpcomponents httpcore 4.4.11 - - org.apache.hadoop - hadoop-auth - ${hadoop.compile.version} - - - org.apache.hadoop - hadoop-client - ${hadoop.compile.version} - provided - - - org.apache.avro - avro - - - - - org.apache.hadoop - hadoop-common - ${hadoop.compile.version} - provided - - - org.apache.hadoop - hadoop-hdfs-client - ${hadoop.compile.version} - provided - - - org.apache.hadoop - hadoop-yarn-common - ${hadoop.compile.version} - provided - - - org.apache.hadoop - hadoop-mapreduce-client-core - ${hadoop.compile.version} - provided - - - javax.servlet - servlet-api - - - org.mapdb mapdb @@ -1176,16 +1131,46 @@ test - com.github.stefanbirkner - system-rules - 1.19.0 - test - - - io.timeandspace - cron-scheduler - 0.1 + com.github.stefanbirkner + system-rules + 1.19.0 + test + + io.timeandspace + cron-scheduler + 0.1 + + + org.apache.hadoop + hadoop-common + ${hadoop.compile.version} + provided + + + org.apache.hadoop + hadoop-hdfs-client + ${hadoop.compile.version} + provided + + + org.apache.hadoop + hadoop-yarn-common + ${hadoop.compile.version} + provided + + + org.apache.hadoop + hadoop-mapreduce-client-core + ${hadoop.compile.version} + provided + + + javax.servlet + servlet-api + + + @@ -1558,11 +1543,6 @@ maven-deploy-plugin 2.7 - - org.apache.maven.plugins - maven-help-plugin - 2.1.1 - org.apache.maven.plugins maven-install-plugin @@ -1637,15 +1617,15 @@ ${maven.compiler.target} - - com.github.eirslett - frontend-maven-plugin - 1.11.3 - - ${node.version} - ${npm.version} - - + + com.github.eirslett + frontend-maven-plugin + 1.11.3 + + ${node.version} + ${npm.version} + + @@ -1902,6 +1882,7 @@ .asf.yaml **/dependency-reduced-pom.xml .editorconfig + **/hadoop.indexer.libs.version @@ -1963,5 +1944,19 @@ true + + hadoop3 + + + hadoop3.enabled + true + + + + 3.3.1 + 5.3.6.Final + 4.5.13 + + diff --git a/services/pom.xml b/services/pom.xml index d23f5779dd2..50a2ac0f56b 100644 --- a/services/pom.xml +++ b/services/pom.xml @@ -52,6 +52,12 @@ org.apache.druid druid-indexing-hadoop ${project.parent.version} + + + javax.servlet + servlet-api + + org.apache.druid diff --git a/services/src/main/java/org/apache/druid/cli/PullDependencies.java b/services/src/main/java/org/apache/druid/cli/PullDependencies.java index f2ea7f1b684..bbb91f2930d 100644 --- a/services/src/main/java/org/apache/druid/cli/PullDependencies.java +++ b/services/src/main/java/org/apache/druid/cli/PullDependencies.java @@ -183,7 +183,7 @@ public class PullDependencies implements Runnable @Option( name = "--no-default-hadoop", - description = "Don't pull down the default hadoop coordinate, i.e., org.apache.hadoop:hadoop-client:2.8.5. If `-h` option is supplied, then default hadoop coordinate will not be downloaded." + description = "Don't pull down the default hadoop coordinate, i.e., org.apache.hadoop:hadoop-client if hadoop2 or org.apache.hadoop:hadoop-client-runtime if hadoop3. If `-h` option is supplied, then default hadoop coordinate will not be downloaded." ) public boolean noDefaultHadoop = false; diff --git a/website/.spelling b/website/.spelling index 9b298b702dc..f1324234ee9 100644 --- a/website/.spelling +++ b/website/.spelling @@ -510,6 +510,12 @@ Papache-release Pdist Ddruid.console.skip yaml +Phadoop3 +dist-hadoop3 +hadoop3 +hadoop2 +2.x.x +3.x.x - ../docs/development/extensions-contrib/ambari-metrics-emitter.md ambari-metrics metricName @@ -1927,3 +1933,4 @@ PiB protobuf Golang multiValueHandling +