From 8c4f76aa20e75635bd6d3de14924ec246a8a071a Mon Sep 17 00:00:00 2001 From: Allen Wittenauer Date: Wed, 25 Feb 2015 18:57:41 -0800 Subject: [PATCH] HDFS-7460. Rewrite httpfs to use new shell framework (John Smith via aw) --- .../hadoop-hdfs-httpfs/pom.xml | 2 +- .../src/main/conf/httpfs-env.sh | 53 +++-- .../src/main/libexec/httpfs-config.sh | 222 ++++-------------- .../src/main/sbin/httpfs.sh | 116 ++++++--- .../{ssl-server.xml => ssl-server.xml.conf} | 4 +- .../src/site/apt/ServerSetup.apt.vm | 159 ------------- .../src/site/apt/UsingHttpTools.apt.vm | 87 ------- .../src/site/apt/index.apt.vm | 83 ------- .../src/site/markdown/ServerSetup.md.vm | 121 ++++++++++ .../src/site/markdown/UsingHttpTools.md | 62 +++++ .../src/site/markdown/index.md | 52 ++++ hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 2 + 12 files changed, 408 insertions(+), 555 deletions(-) rename hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/tomcat/{ssl-server.xml => ssl-server.xml.conf} (97%) delete mode 100644 hadoop-hdfs-project/hadoop-hdfs-httpfs/src/site/apt/ServerSetup.apt.vm delete mode 100644 hadoop-hdfs-project/hadoop-hdfs-httpfs/src/site/apt/UsingHttpTools.apt.vm delete mode 100644 hadoop-hdfs-project/hadoop-hdfs-httpfs/src/site/apt/index.apt.vm create mode 100644 hadoop-hdfs-project/hadoop-hdfs-httpfs/src/site/markdown/ServerSetup.md.vm create mode 100644 hadoop-hdfs-project/hadoop-hdfs-httpfs/src/site/markdown/UsingHttpTools.md create mode 100644 hadoop-hdfs-project/hadoop-hdfs-httpfs/src/site/markdown/index.md diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/pom.xml b/hadoop-hdfs-project/hadoop-hdfs-httpfs/pom.xml index 4c42ef9952e..ddc60339669 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/pom.xml +++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/pom.xml @@ -524,7 +524,7 @@ - \(.*\)$'` - if expr "$link" : '/.*' > /dev/null; then - PRG="$link" - else - PRG=`dirname "${PRG}"`/"$link" + if [[ -z "${HADOOP_HTTPFS_ENV_PROCESSED}" ]]; then + if [[ -e "${HADOOP_CONF_DIR}/httpfs-env.sh" ]]; then + . "${HADOOP_CONF_DIR}/httpfs-env.sh" + export HADOOP_HTTPFS_ENV_PROCESSED=true + fi fi -done -BASEDIR=`dirname ${PRG}` -BASEDIR=`cd ${BASEDIR}/..;pwd` + export HADOOP_CATALINA_PREFIX=httpfs + export HADOOP_CATALINA_TEMP="${HTTPFS_TEMP:-${HADOOP_PREFIX}/temp}" -function print() { - if [ "${HTTPFS_SILENT}" != "true" ]; then - echo "$@" + hadoop_deprecate_envvar HTTPFS_CONFIG HADOOP_CONF_DIR + + hadoop_deprecate_envvar HTTPFS_LOG HADOOP_LOG_DIR + + export HADOOP_CATALINA_CONFIG="${HADOOP_CONF_DIR}" + export HADOOP_CATALINA_LOG="${HADOOP_LOG_DIR}" + + export HTTPFS_HTTP_HOSTNAME=${HTTPFS_HTTP_HOSTNAME:-$(hostname -f)} + + export HADOOP_CATALINA_HTTP_PORT="${HTTPFS_HTTP_PORT:-14000}" + export HADOOP_CATALINA_ADMIN_PORT="${HTTPFS_ADMIN_PORT:-$((HADOOP_CATALINA_HTTP_PORT+1))}" + export HADOOP_CATALINA_MAX_THREADS="${HTTPFS_MAX_THREADS:-150}" + + export HTTPFS_SSL_ENABLED=${HTTPFS_SSL_ENABLED:-false} + + export HADOOP_CATALINA_SSL_KEYSTORE_FILE="${HTTPFS_SSL_KEYSTORE_FILE:-${HOME}/.keystore}" + + export CATALINA_BASE="${CATALINA_BASE:-${HADOOP_PREFIX}/share/hadoop/httpfs/tomcat}" + export HADOOP_CATALINA_HOME="${HTTPFS_CATALINA_HOME:-${CATALINA_BASE}}" + + export CATALINA_OUT="${CATALINA_OUT:-${HADOOP_LOG_DIR}/hadoop-${HADOOP_IDENT_STRING}-httpfs-${HOSTNAME}.out}" + + export CATALINA_PID="${CATALINA_PID:-${HADOOP_PID_DIR}/hadoop-${HADOOP_IDENT_STRING}-httpfs.pid}" + + if [[ -n "${HADOOP_SHELL_SCRIPT_DEBUG}" ]]; then + varlist=$(env | egrep '(^HTTPFS|^CATALINA)' | cut -f1 -d= | grep -v _PASS) + for i in ${varlist}; do + hadoop_debug "Setting ${i} to ${!i}" + done fi } -# if HTTPFS_HOME is already set warn it will be ignored -# -if [ "${HTTPFS_HOME}" != "" ]; then - echo "WARNING: current setting of HTTPFS_HOME ignored" -fi - -print - -# setting HTTPFS_HOME to the installation dir, it cannot be changed -# -export HTTPFS_HOME=${BASEDIR} -httpfs_home=${HTTPFS_HOME} -print "Setting HTTPFS_HOME: ${HTTPFS_HOME}" - -# if the installation has a env file, source it -# this is for native packages installations -# -if [ -e "${HTTPFS_HOME}/bin/httpfs-env.sh" ]; then - print "Sourcing: ${HTTPFS_HOME}/bin/httpfs-env.sh" - source ${HTTPFS_HOME}/bin/httpfs-env.sh - grep "^ *export " ${HTTPFS_HOME}/bin/httpfs-env.sh | sed 's/ *export/ setting/' -fi - -# verify that the sourced env file didn't change HTTPFS_HOME -# if so, warn and revert -# -if [ "${HTTPFS_HOME}" != "${httpfs_home}" ]; then - print "WARN: HTTPFS_HOME resetting to ''${HTTPFS_HOME}'' ignored" - export HTTPFS_HOME=${httpfs_home} - print " using HTTPFS_HOME: ${HTTPFS_HOME}" -fi - -if [ "${HTTPFS_CONFIG}" = "" ]; then - export HTTPFS_CONFIG=${HTTPFS_HOME}/etc/hadoop - print "Setting HTTPFS_CONFIG: ${HTTPFS_CONFIG}" +if [[ -n "${HADOOP_COMMON_HOME}" ]] && + [[ -e "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh" ]]; then + . "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh" +elif [[ -e "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]]; then + . "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" +elif [[ -e "${HADOOP_PREFIX}/libexec/hadoop-config.sh" ]]; then + . "${HADOOP_PREFIX}/libexec/hadoop-config.sh" else - print "Using HTTPFS_CONFIG: ${HTTPFS_CONFIG}" + echo "ERROR: Hadoop common not found." 2>&1 + exit 1 fi -httpfs_config=${HTTPFS_CONFIG} - -# if the configuration dir has a env file, source it -# -if [ -e "${HTTPFS_CONFIG}/httpfs-env.sh" ]; then - print "Sourcing: ${HTTPFS_CONFIG}/httpfs-env.sh" - source ${HTTPFS_CONFIG}/httpfs-env.sh - grep "^ *export " ${HTTPFS_CONFIG}/httpfs-env.sh | sed 's/ *export/ setting/' -fi - -# verify that the sourced env file didn't change HTTPFS_HOME -# if so, warn and revert -# -if [ "${HTTPFS_HOME}" != "${httpfs_home}" ]; then - echo "WARN: HTTPFS_HOME resetting to ''${HTTPFS_HOME}'' ignored" - export HTTPFS_HOME=${httpfs_home} -fi - -# verify that the sourced env file didn't change HTTPFS_CONFIG -# if so, warn and revert -# -if [ "${HTTPFS_CONFIG}" != "${httpfs_config}" ]; then - echo "WARN: HTTPFS_CONFIG resetting to ''${HTTPFS_CONFIG}'' ignored" - export HTTPFS_CONFIG=${httpfs_config} -fi - -if [ "${HTTPFS_LOG}" = "" ]; then - export HTTPFS_LOG=${HTTPFS_HOME}/logs - print "Setting HTTPFS_LOG: ${HTTPFS_LOG}" -else - print "Using HTTPFS_LOG: ${HTTPFS_LOG}" -fi - -if [ ! -f ${HTTPFS_LOG} ]; then - mkdir -p ${HTTPFS_LOG} -fi - -if [ "${HTTPFS_TEMP}" = "" ]; then - export HTTPFS_TEMP=${HTTPFS_HOME}/temp - print "Setting HTTPFS_TEMP: ${HTTPFS_TEMP}" -else - print "Using HTTPFS_TEMP: ${HTTPFS_TEMP}" -fi - -if [ ! -f ${HTTPFS_TEMP} ]; then - mkdir -p ${HTTPFS_TEMP} -fi - -if [ "${HTTPFS_HTTP_PORT}" = "" ]; then - export HTTPFS_HTTP_PORT=14000 - print "Setting HTTPFS_HTTP_PORT: ${HTTPFS_HTTP_PORT}" -else - print "Using HTTPFS_HTTP_PORT: ${HTTPFS_HTTP_PORT}" -fi - -if [ "${HTTPFS_ADMIN_PORT}" = "" ]; then - export HTTPFS_ADMIN_PORT=`expr $HTTPFS_HTTP_PORT + 1` - print "Setting HTTPFS_ADMIN_PORT: ${HTTPFS_ADMIN_PORT}" -else - print "Using HTTPFS_ADMIN_PORT: ${HTTPFS_ADMIN_PORT}" -fi - -if [ "${HTTPFS_HTTP_HOSTNAME}" = "" ]; then - export HTTPFS_HTTP_HOSTNAME=`hostname -f` - print "Setting HTTPFS_HTTP_HOSTNAME: ${HTTPFS_HTTP_HOSTNAME}" -else - print "Using HTTPFS_HTTP_HOSTNAME: ${HTTPFS_HTTP_HOSTNAME}" -fi - -if [ "${HTTPFS_SSL_ENABLED}" = "" ]; then - export HTTPFS_SSL_ENABLED="false" - print "Setting HTTPFS_SSL_ENABLED: ${HTTPFS_SSL_ENABLED}" -else - print "Using HTTPFS_SSL_ENABLED: ${HTTPFS_SSL_ENABLED}" -fi - -if [ "${HTTPFS_SSL_KEYSTORE_FILE}" = "" ]; then - export HTTPFS_SSL_KEYSTORE_FILE=${HOME}/.keystore - print "Setting HTTPFS_SSL_KEYSTORE_FILE: ${HTTPFS_SSL_KEYSTORE_FILE}" -else - print "Using HTTPFS_SSL_KEYSTORE_FILE: ${HTTPFS_SSL_KEYSTORE_FILE}" -fi - -if [ "${HTTPFS_SSL_KEYSTORE_PASS}" = "" ]; then - export HTTPFS_SSL_KEYSTORE_PASS=password - print "Setting HTTPFS_SSL_KEYSTORE_PASS: ${HTTPFS_SSL_KEYSTORE_PASS}" -else - print "Using HTTPFS_SSL_KEYSTORE_PASS: ${HTTPFS_SSL_KEYSTORE_PASS}" -fi - -if [ "${CATALINA_BASE}" = "" ]; then - export CATALINA_BASE=${HTTPFS_HOME}/share/hadoop/httpfs/tomcat - print "Setting CATALINA_BASE: ${CATALINA_BASE}" -else - print "Using CATALINA_BASE: ${CATALINA_BASE}" -fi - -if [ "${HTTPFS_CATALINA_HOME}" = "" ]; then - export HTTPFS_CATALINA_HOME=${CATALINA_BASE} - print "Setting HTTPFS_CATALINA_HOME: ${HTTPFS_CATALINA_HOME}" -else - print "Using HTTPFS_CATALINA_HOME: ${HTTPFS_CATALINA_HOME}" -fi - -if [ "${CATALINA_OUT}" = "" ]; then - export CATALINA_OUT=${HTTPFS_LOG}/httpfs-catalina.out - print "Setting CATALINA_OUT: ${CATALINA_OUT}" -else - print "Using CATALINA_OUT: ${CATALINA_OUT}" -fi - -if [ "${CATALINA_PID}" = "" ]; then - export CATALINA_PID=/tmp/httpfs.pid - print "Setting CATALINA_PID: ${CATALINA_PID}" -else - print "Using CATALINA_PID: ${CATALINA_PID}" -fi - -print diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/sbin/httpfs.sh b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/sbin/httpfs.sh index 65903dc5d72..72dca938f99 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/sbin/httpfs.sh +++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/sbin/httpfs.sh @@ -1,4 +1,4 @@ -#!/usr/bin/env bash +#!/bin/bash # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,53 +13,99 @@ # limitations under the License. # -# resolve links - $0 may be a softlink -PRG="${0}" +function hadoop_usage() +{ + echo "Usage: httpfs.sh [--config confdir] [--debug] --daemon start|status|stop" + echo " httpfs.sh [--config confdir] [--debug] COMMAND" + echo " where COMMAND is one of:" + echo " run Start httpfs in the current window" + echo " run -security Start in the current window with security manager" + echo " start Start httpfs in a separate window" + echo " start -security Start in a separate window with security manager" + echo " status Return the LSB compliant status" + echo " stop Stop httpfs, waiting up to 5 seconds for the process to end" + echo " stop n Stop httpfs, waiting up to n seconds for the process to end" + echo " stop -force Stop httpfs, wait up to 5 seconds and then use kill -KILL if still running" + echo " stop n -force Stop httpfs, wait up to n seconds and then use kill -KILL if still running" +} -while [ -h "${PRG}" ]; do - ls=`ls -ld "${PRG}"` - link=`expr "$ls" : '.*-> \(.*\)$'` - if expr "$link" : '/.*' > /dev/null; then - PRG="$link" - else - PRG=`dirname "${PRG}"`/"$link" - fi -done +# let's locate libexec... +if [[ -n "${HADOOP_PREFIX}" ]]; then + DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec" +else + this="${BASH_SOURCE-$0}" + bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P) + DEFAULT_LIBEXEC_DIR="${bin}/../libexec" +fi -BASEDIR=`dirname ${PRG}` -BASEDIR=`cd ${BASEDIR}/..;pwd` +HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}" +# shellcheck disable=SC2034 +HADOOP_NEW_CONFIG=true +if [[ -f "${HADOOP_LIBEXEC_DIR}/httpfs-config.sh" ]]; then + . "${HADOOP_LIBEXEC_DIR}/httpfs-config.sh" +else + echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/httpfs-config.sh." 2>&1 + exit 1 +fi -source ${HADOOP_LIBEXEC_DIR:-${BASEDIR}/libexec}/httpfs-config.sh - -# The Java System property 'httpfs.http.port' it is not used by HttpFS, +# The Java System property 'httpfs.http.port' it is not used by Kms, # it is used in Tomcat's server.xml configuration file # -print "Using CATALINA_OPTS: ${CATALINA_OPTS}" -catalina_opts="-Dhttpfs.home.dir=${HTTPFS_HOME}"; -catalina_opts="${catalina_opts} -Dhttpfs.config.dir=${HTTPFS_CONFIG}"; -catalina_opts="${catalina_opts} -Dhttpfs.log.dir=${HTTPFS_LOG}"; -catalina_opts="${catalina_opts} -Dhttpfs.temp.dir=${HTTPFS_TEMP}"; -catalina_opts="${catalina_opts} -Dhttpfs.admin.port=${HTTPFS_ADMIN_PORT}"; -catalina_opts="${catalina_opts} -Dhttpfs.http.port=${HTTPFS_HTTP_PORT}"; -catalina_opts="${catalina_opts} -Dhttpfs.http.hostname=${HTTPFS_HTTP_HOSTNAME}"; -catalina_opts="${catalina_opts} -Dhttpfs.ssl.enabled=${HTTPFS_SSL_ENABLED}"; -catalina_opts="${catalina_opts} -Dhttpfs.ssl.keystore.file=${HTTPFS_SSL_KEYSTORE_FILE}"; -catalina_opts="${catalina_opts} -Dhttpfs.ssl.keystore.pass=${HTTPFS_SSL_KEYSTORE_PASS}"; +# Mask the trustStorePassword +# shellcheck disable=SC2086 +CATALINA_OPTS_DISP="$(echo ${CATALINA_OPTS} | sed -e 's/trustStorePassword=[^ ]*/trustStorePassword=***/')" -print "Adding to CATALINA_OPTS: ${catalina_opts}" +hadoop_debug "Using CATALINA_OPTS: ${CATALINA_OPTS_DISP}" -export CATALINA_OPTS="${CATALINA_OPTS} ${catalina_opts}" +# We're using hadoop-common, so set up some stuff it might need: +hadoop_finalize + +hadoop_verify_logdir + +if [[ $# = 0 ]]; then + case "${HADOOP_DAEMON_MODE}" in + status) + hadoop_status_daemon "${CATALINA_PID}" + exit + ;; + start) + set -- "start" + ;; + stop) + set -- "stop" + ;; + esac +fi + +hadoop_finalize_catalina_opts +export CATALINA_OPTS # A bug in catalina.sh script does not use CATALINA_OPTS for stopping the server # -if [ "${1}" = "stop" ]; then +if [[ "${1}" = "stop" ]]; then export JAVA_OPTS=${CATALINA_OPTS} fi -if [ "${HTTPFS_SILENT}" != "true" ]; then - exec ${HTTPFS_CATALINA_HOME}/bin/catalina.sh "$@" -else - exec ${HTTPFS_CATALINA_HOME}/bin/catalina.sh "$@" > /dev/null +# If ssl, the populate the passwords into ssl-server.xml before starting tomcat +# +# HTTPFS_SSL_KEYSTORE_PASS is a bit odd. +# if undefined, then the if test will not enable ssl on its own +# if "", set it to "password". +# if custom, use provided password +# +if [[ -f "${HADOOP_CATALINA_HOME}/conf/ssl-server.xml.conf" ]]; then + if [[ -n "${HTTPFS_SSL_KEYSTORE_PASS+x}" ]] || [[ -n "${HTTPFS_SSL_TRUSTSTORE_PASS}" ]]; then + export HTTPFS_SSL_KEYSTORE_PASS=${HTTPFS_SSL_KEYSTORE_PASS:-password} + sed -e 's/_httpfs_ssl_keystore_pass_/'${HTTPFS_SSL_KEYSTORE_PASS}'/g' \ + -e 's/_httpfs_ssl_truststore_pass_/'${HTTPFS_SSL_TRUSTSTORE_PASS}'/g' \ + "${HADOOP_CATALINA_HOME}/conf/ssl-server.xml.conf" \ + > "${HADOOP_CATALINA_HOME}/conf/ssl-server.xml" + chmod 700 "${HADOOP_CATALINA_HOME}/conf/ssl-server.xml" >/dev/null 2>&1 + fi fi +hadoop_add_param CATALINA_OPTS -Dhttpfs.http.hostname "-Dhttpfs.http.hostname=${HTTPFS_HOST_NAME}" +hadoop_add_param CATALINA_OPTS -Dhttpfs.ssl.enabled "-Dhttpfs.ssl.enabled=${HTTPFS_SSL_ENABLED}" + +exec "${HADOOP_CATALINA_HOME}/bin/catalina.sh" "$@" diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/tomcat/ssl-server.xml b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/tomcat/ssl-server.xml.conf similarity index 97% rename from hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/tomcat/ssl-server.xml rename to hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/tomcat/ssl-server.xml.conf index c91c2e285d2..4a9053265f3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/tomcat/ssl-server.xml +++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/tomcat/ssl-server.xml.conf @@ -61,7 +61,7 @@ diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/site/apt/ServerSetup.apt.vm b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/site/apt/ServerSetup.apt.vm deleted file mode 100644 index 878ab1f7747..00000000000 --- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/site/apt/ServerSetup.apt.vm +++ /dev/null @@ -1,159 +0,0 @@ -~~ Licensed under the Apache License, Version 2.0 (the "License"); -~~ you may not use this file except in compliance with the License. -~~ You may obtain a copy of the License at -~~ -~~ http://www.apache.org/licenses/LICENSE-2.0 -~~ -~~ Unless required by applicable law or agreed to in writing, software -~~ distributed under the License is distributed on an "AS IS" BASIS, -~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -~~ See the License for the specific language governing permissions and -~~ limitations under the License. - - --- - Hadoop HDFS over HTTP ${project.version} - Server Setup - --- - --- - ${maven.build.timestamp} - -Hadoop HDFS over HTTP ${project.version} - Server Setup - - This page explains how to quickly setup HttpFS with Pseudo authentication - against a Hadoop cluster with Pseudo authentication. - -* Requirements - - * Java 6+ - - * Maven 3+ - -* Install HttpFS - -+---+ -~ $ tar xzf httpfs-${project.version}.tar.gz -+---+ - -* Configure HttpFS - - By default, HttpFS assumes that Hadoop configuration files - (<<>>) are in the HttpFS - configuration directory. - - If this is not the case, add to the <<>> file the - <<>> property set to the location - of the Hadoop configuration directory. - -* Configure Hadoop - - Edit Hadoop <<>> and defined the Unix user that will - run the HttpFS server as a proxyuser. For example: - -+---+ - ... - - hadoop.proxyuser.#HTTPFSUSER#.hosts - httpfs-host.foo.com - - - hadoop.proxyuser.#HTTPFSUSER#.groups - * - - ... -+---+ - - IMPORTANT: Replace <<<#HTTPFSUSER#>>> with the Unix user that will - start the HttpFS server. - -* Restart Hadoop - - You need to restart Hadoop for the proxyuser configuration ot become - active. - -* Start/Stop HttpFS - - To start/stop HttpFS use HttpFS's bin/httpfs.sh script. For example: - -+---+ -httpfs-${project.version} $ bin/httpfs.sh start -+---+ - - NOTE: Invoking the script without any parameters list all possible - parameters (start, stop, run, etc.). The <<>> script is a wrapper - for Tomcat's <<>> script that sets the environment variables - and Java System properties required to run HttpFS server. - -* Test HttpFS is working - -+---+ -~ $ curl -i "http://:14000?user.name=babu&op=homedir" -HTTP/1.1 200 OK -Content-Type: application/json -Transfer-Encoding: chunked - -{"homeDir":"http:\/\/:14000\/user\/babu"} -+---+ - -* Embedded Tomcat Configuration - - To configure the embedded Tomcat go to the <<>>. - - HttpFS preconfigures the HTTP and Admin ports in Tomcat's <<>> to - 14000 and 14001. - - Tomcat logs are also preconfigured to go to HttpFS's <<>> directory. - - The following environment variables (which can be set in HttpFS's - <<>> script) can be used to alter those values: - - * HTTPFS_HTTP_PORT - - * HTTPFS_ADMIN_PORT - - * HTTPFS_LOG - -* HttpFS Configuration - - HttpFS supports the following {{{./httpfs-default.html}configuration properties}} - in the HttpFS's <<>> configuration file. - -* HttpFS over HTTPS (SSL) - - To configure HttpFS to work over SSL edit the {{httpfs-env.sh}} script in the - configuration directory setting the {{HTTPFS_SSL_ENABLED}} to {{true}}. - - In addition, the following 2 properties may be defined (shown with default - values): - - * HTTPFS_SSL_KEYSTORE_FILE=${HOME}/.keystore - - * HTTPFS_SSL_KEYSTORE_PASS=password - - In the HttpFS <<>> directory, replace the <<>> file - with the <<>> file. - - - You need to create an SSL certificate for the HttpFS server. As the - <<>> Unix user, using the Java <<>> command to create the - SSL certificate: - -+---+ -$ keytool -genkey -alias tomcat -keyalg RSA -+---+ - - You will be asked a series of questions in an interactive prompt. It will - create the keystore file, which will be named <<.keystore>> and located in the - <<>> user home directory. - - The password you enter for "keystore password" must match the value of the - <<>> environment variable set in the - <<>> script in the configuration directory. - - The answer to "What is your first and last name?" (i.e. "CN") must be the - hostname of the machine where the HttpFS Server will be running. - - Start HttpFS. It should work over HTTPS. - - Using the Hadoop <<>> API or the Hadoop FS shell, use the - <<>> scheme. Make sure the JVM is picking up the truststore - containing the public key of the SSL certificate if using a self-signed - certificate. diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/site/apt/UsingHttpTools.apt.vm b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/site/apt/UsingHttpTools.apt.vm deleted file mode 100644 index c93e20b9d80..00000000000 --- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/site/apt/UsingHttpTools.apt.vm +++ /dev/null @@ -1,87 +0,0 @@ -~~ Licensed under the Apache License, Version 2.0 (the "License"); -~~ you may not use this file except in compliance with the License. -~~ You may obtain a copy of the License at -~~ -~~ http://www.apache.org/licenses/LICENSE-2.0 -~~ -~~ Unless required by applicable law or agreed to in writing, software -~~ distributed under the License is distributed on an "AS IS" BASIS, -~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -~~ See the License for the specific language governing permissions and -~~ limitations under the License. - - --- - Hadoop HDFS over HTTP ${project.version} - Using HTTP Tools - --- - --- - ${maven.build.timestamp} - -Hadoop HDFS over HTTP ${project.version} - Using HTTP Tools - -* Security - - Out of the box HttpFS supports both pseudo authentication and Kerberos HTTP - SPNEGO authentication. - -** Pseudo Authentication - - With pseudo authentication the user name must be specified in the - <<>>> query string parameter of a HttpFS URL. - For example: - -+---+ -$ curl "http://:14000/webhdfs/v1?op=homedir&user.name=babu" -+---+ - -** Kerberos HTTP SPNEGO Authentication - - Kerberos HTTP SPNEGO authentication requires a tool or library supporting - Kerberos HTTP SPNEGO protocol. - - IMPORTANT: If using <<>>, the <<>> version being used must support - GSS (<<>> prints out 'GSS' if it supports it). - - For example: - -+---+ -$ kinit -Please enter the password for tucu@LOCALHOST: -$ curl --negotiate -u foo "http://:14000/webhdfs/v1?op=homedir" -Enter host password for user 'foo': -+---+ - - NOTE: the <<<-u USER>>> option is required by the <<<--negotiate>>> but it is - not used. Use any value as <<>> and when asked for the password press - [ENTER] as the password value is ignored. - -** {Remembering Who I Am} (Establishing an Authenticated Session) - - As most authentication mechanisms, Hadoop HTTP authentication authenticates - users once and issues a short-lived authentication token to be presented in - subsequent requests. This authentication token is a signed HTTP Cookie. - - When using tools like <<>>, the authentication token must be stored on - the first request doing authentication, and submitted in subsequent requests. - To do this with curl the <<<-b>>> and <<<-c>>> options to save and send HTTP - Cookies must be used. - - For example, the first request doing authentication should save the received - HTTP Cookies. - - Using Pseudo Authentication: - -+---+ -$ curl -c ~/.httpfsauth "http://:14000/webhdfs/v1?op=homedir&user.name=babu" -+---+ - - Using Kerberos HTTP SPNEGO authentication: - -+---+ -$ curl --negotiate -u foo -c ~/.httpfsauth "http://:14000/webhdfs/v1?op=homedir" -+---+ - - Then, subsequent requests forward the previously received HTTP Cookie: - -+---+ -$ curl -b ~/.httpfsauth "http://:14000/webhdfs/v1?op=liststatus" -+---+ diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/site/apt/index.apt.vm b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/site/apt/index.apt.vm deleted file mode 100644 index f51e74349ed..00000000000 --- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/site/apt/index.apt.vm +++ /dev/null @@ -1,83 +0,0 @@ -~~ Licensed under the Apache License, Version 2.0 (the "License"); -~~ you may not use this file except in compliance with the License. -~~ You may obtain a copy of the License at -~~ -~~ http://www.apache.org/licenses/LICENSE-2.0 -~~ -~~ Unless required by applicable law or agreed to in writing, software -~~ distributed under the License is distributed on an "AS IS" BASIS, -~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -~~ See the License for the specific language governing permissions and -~~ limitations under the License. - - --- - Hadoop HDFS over HTTP - Documentation Sets ${project.version} - --- - --- - ${maven.build.timestamp} - -Hadoop HDFS over HTTP - Documentation Sets ${project.version} - - HttpFS is a server that provides a REST HTTP gateway supporting all HDFS - File System operations (read and write). And it is inteoperable with the - <> REST HTTP API. - - HttpFS can be used to transfer data between clusters running different - versions of Hadoop (overcoming RPC versioning issues), for example using - Hadoop DistCP. - - HttpFS can be used to access data in HDFS on a cluster behind of a firewall - (the HttpFS server acts as a gateway and is the only system that is allowed - to cross the firewall into the cluster). - - HttpFS can be used to access data in HDFS using HTTP utilities (such as curl - and wget) and HTTP libraries Perl from other languages than Java. - - The <> client FileSytem implementation can be used to access HttpFS - using the Hadoop filesystem command (<<>>) line tool as well as - from Java aplications using the Hadoop FileSystem Java API. - - HttpFS has built-in security supporting Hadoop pseudo authentication and - HTTP SPNEGO Kerberos and other pluggable authentication mechanims. It also - provides Hadoop proxy user support. - -* How Does HttpFS Works? - - HttpFS is a separate service from Hadoop NameNode. - - HttpFS itself is Java web-application and it runs using a preconfigured Tomcat - bundled with HttpFS binary distribution. - - HttpFS HTTP web-service API calls are HTTP REST calls that map to a HDFS file - system operation. For example, using the <<>> Unix command: - - * <<<$ curl http://httpfs-host:14000/webhdfs/v1/user/foo/README.txt>>> returns - the contents of the HDFS <<>> file. - - * <<<$ curl http://httpfs-host:14000/webhdfs/v1/user/foo?op=list>>> returns the - contents of the HDFS <<>> directory in JSON format. - - * <<<$ curl -X POST http://httpfs-host:14000/webhdfs/v1/user/foo/bar?op=mkdirs>>> - creates the HDFS <<>> directory. - -* How HttpFS and Hadoop HDFS Proxy differ? - - HttpFS was inspired by Hadoop HDFS proxy. - - HttpFS can be seen as a full rewrite of Hadoop HDFS proxy. - - Hadoop HDFS proxy provides a subset of file system operations (read only), - HttpFS provides support for all file system operations. - - HttpFS uses a clean HTTP REST API making its use with HTTP tools more - intuitive. - - HttpFS supports Hadoop pseudo authentication, Kerberos SPNEGOS authentication - and Hadoop proxy users. Hadoop HDFS proxy did not. - -* User and Developer Documentation - - * {{{./ServerSetup.html}HttpFS Server Setup}} - - * {{{./UsingHttpTools.html}Using HTTP Tools}} - diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/site/markdown/ServerSetup.md.vm b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/site/markdown/ServerSetup.md.vm new file mode 100644 index 00000000000..3c7f9d317d6 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/site/markdown/ServerSetup.md.vm @@ -0,0 +1,121 @@ + + +Hadoop HDFS over HTTP - Server Setup +==================================== + +This page explains how to quickly setup HttpFS with Pseudo authentication against a Hadoop cluster with Pseudo authentication. + +Install HttpFS +-------------- + + ~ $ tar xzf httpfs-${project.version}.tar.gz + +Configure HttpFS +---------------- + +By default, HttpFS assumes that Hadoop configuration files (`core-site.xml & hdfs-site.xml`) are in the HttpFS configuration directory. + +If this is not the case, add to the `httpfs-site.xml` file the `httpfs.hadoop.config.dir` property set to the location of the Hadoop configuration directory. + +Configure Hadoop +---------------- + +Edit Hadoop `core-site.xml` and defined the Unix user that will run the HttpFS server as a proxyuser. For example: + +```xml + + hadoop.proxyuser.#HTTPFSUSER#.hosts + httpfs-host.foo.com + + + hadoop.proxyuser.#HTTPFSUSER#.groups + * + +``` + +IMPORTANT: Replace `#HTTPFSUSER#` with the Unix user that will start the HttpFS server. + +Restart Hadoop +-------------- + +You need to restart Hadoop for the proxyuser configuration ot become active. + +Start/Stop HttpFS +----------------- + +To start/stop HttpFS use HttpFS's sbin/httpfs.sh script. For example: + + $ sbin/httpfs.sh start + +NOTE: Invoking the script without any parameters list all possible parameters (start, stop, run, etc.). The `httpfs.sh` script is a wrapper for Tomcat's `catalina.sh` script that sets the environment variables and Java System properties required to run HttpFS server. + +Test HttpFS is working +---------------------- + + ~ $ curl -i "http://:14000?user.name=babu&op=homedir" + HTTP/1.1 200 OK + Content-Type: application/json + Transfer-Encoding: chunked + + {"homeDir":"http:\/\/:14000\/user\/babu"} + +Embedded Tomcat Configuration +----------------------------- + +To configure the embedded Tomcat go to the `tomcat/conf`. + +HttpFS preconfigures the HTTP and Admin ports in Tomcat's `server.xml` to 14000 and 14001. + +Tomcat logs are also preconfigured to go to HttpFS's `logs/` directory. + +The following environment variables (which can be set in HttpFS's `etc/hadoop/httpfs-env.sh` script) can be used to alter those values: + +* HTTPFS\_HTTP\_PORT + +* HTTPFS\_ADMIN\_PORT + +* HADOOP\_LOG\_DIR + +HttpFS Configuration +-------------------- + +HttpFS supports the following [configuration properties](./httpfs-default.html) in the HttpFS's `etc/hadoop/httpfs-site.xml` configuration file. + +HttpFS over HTTPS (SSL) +----------------------- + +To configure HttpFS to work over SSL edit the [httpfs-env.sh](#httpfs-env.sh) script in the configuration directory setting the [HTTPFS\_SSL\_ENABLED](#HTTPFS_SSL_ENABLED) to [true](#true). + +In addition, the following 2 properties may be defined (shown with default values): + +* HTTPFS\_SSL\_KEYSTORE\_FILE=$HOME/.keystore + +* HTTPFS\_SSL\_KEYSTORE\_PASS=password + +In the HttpFS `tomcat/conf` directory, replace the `server.xml` file with the `ssl-server.xml` file. + +You need to create an SSL certificate for the HttpFS server. As the `httpfs` Unix user, using the Java `keytool` command to create the SSL certificate: + + $ keytool -genkey -alias tomcat -keyalg RSA + +You will be asked a series of questions in an interactive prompt. It will create the keystore file, which will be named **.keystore** and located in the `httpfs` user home directory. + +The password you enter for "keystore password" must match the value of the `HTTPFS_SSL_KEYSTORE_PASS` environment variable set in the `httpfs-env.sh` script in the configuration directory. + +The answer to "What is your first and last name?" (i.e. "CN") must be the hostname of the machine where the HttpFS Server will be running. + +Start HttpFS. It should work over HTTPS. + +Using the Hadoop `FileSystem` API or the Hadoop FS shell, use the `swebhdfs://` scheme. Make sure the JVM is picking up the truststore containing the public key of the SSL certificate if using a self-signed certificate. diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/site/markdown/UsingHttpTools.md b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/site/markdown/UsingHttpTools.md new file mode 100644 index 00000000000..3045ad6506a --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/site/markdown/UsingHttpTools.md @@ -0,0 +1,62 @@ + + +Hadoop HDFS over HTTP - Using HTTP Tools +======================================== + +Security +-------- + +Out of the box HttpFS supports both pseudo authentication and Kerberos HTTP SPNEGO authentication. + +### Pseudo Authentication + +With pseudo authentication the user name must be specified in the `user.name=` query string parameter of a HttpFS URL. For example: + + $ curl "http://:14000/webhdfs/v1?op=homedir&user.name=babu" + +### Kerberos HTTP SPNEGO Authentication + +Kerberos HTTP SPNEGO authentication requires a tool or library supporting Kerberos HTTP SPNEGO protocol. + +IMPORTANT: If using `curl`, the `curl` version being used must support GSS (`curl -V` prints out 'GSS' if it supports it). + +For example: + + $ kinit + Please enter the password for user@LOCALHOST: + $ curl --negotiate -u foo "http://:14000/webhdfs/v1?op=homedir" + Enter host password for user 'foo': + +NOTE: the `-u USER` option is required by the `--negotiate` but it is not used. Use any value as `USER` and when asked for the password press [ENTER] as the password value is ignored. + +### Remembering Who I Am (Establishing an Authenticated Session) + +As most authentication mechanisms, Hadoop HTTP authentication authenticates users once and issues a short-lived authentication token to be presented in subsequent requests. This authentication token is a signed HTTP Cookie. + +When using tools like `curl`, the authentication token must be stored on the first request doing authentication, and submitted in subsequent requests. To do this with curl the `-b` and `-c` options to save and send HTTP Cookies must be used. + +For example, the first request doing authentication should save the received HTTP Cookies. + +Using Pseudo Authentication: + + $ curl -c ~/.httpfsauth "http://:14000/webhdfs/v1?op=homedir&user.name=foo" + +Using Kerberos HTTP SPNEGO authentication: + + $ curl --negotiate -u foo -c ~/.httpfsauth "http://:14000/webhdfs/v1?op=homedir" + +Then, subsequent requests forward the previously received HTTP Cookie: + + $ curl -b ~/.httpfsauth "http://:14000/webhdfs/v1?op=liststatus" diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/site/markdown/index.md b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/site/markdown/index.md new file mode 100644 index 00000000000..ac8f4ca2112 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/site/markdown/index.md @@ -0,0 +1,52 @@ + + +Hadoop HDFS over HTTP - Documentation Sets +========================================== + +HttpFS is a server that provides a REST HTTP gateway supporting all HDFS File System operations (read and write). And it is inteoperable with the **webhdfs** REST HTTP API. + +HttpFS can be used to transfer data between clusters running different versions of Hadoop (overcoming RPC versioning issues), for example using Hadoop DistCP. + +HttpFS can be used to access data in HDFS on a cluster behind of a firewall (the HttpFS server acts as a gateway and is the only system that is allowed to cross the firewall into the cluster). + +HttpFS can be used to access data in HDFS using HTTP utilities (such as curl and wget) and HTTP libraries Perl from other languages than Java. + +The **webhdfs** client FileSytem implementation can be used to access HttpFS using the Hadoop filesystem command (`hadoop fs`) line tool as well as from Java aplications using the Hadoop FileSystem Java API. + +HttpFS has built-in security supporting Hadoop pseudo authentication and HTTP SPNEGO Kerberos and other pluggable authentication mechanims. It also provides Hadoop proxy user support. + +How Does HttpFS Works? +---------------------- + +HttpFS is a separate service from Hadoop NameNode. + +HttpFS itself is Java web-application and it runs using a preconfigured Tomcat bundled with HttpFS binary distribution. + +HttpFS HTTP web-service API calls are HTTP REST calls that map to a HDFS file system operation. For example, using the `curl` Unix command: + +* `$ curl http://httpfs-host:14000/webhdfs/v1/user/foo/README.txt` returns the contents of the HDFS `/user/foo/README.txt` file. + +* `$ curl http://httpfs-host:14000/webhdfs/v1/user/foo?op=list` returns the contents of the HDFS `/user/foo` directory in JSON format. + +* `$ curl -X POST http://httpfs-host:14000/webhdfs/v1/user/foo/bar?op=mkdirs` creates the HDFS `/user/foo.bar` directory. + +User and Developer Documentation +-------------------------------- + +* [HttpFS Server Setup](./ServerSetup.html) + +* [Using HTTP Tools](./UsingHttpTools.html) + + diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index f8b0c37df12..6dc7a0fae35 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -141,6 +141,8 @@ Trunk (Unreleased) HDFS-7668. Convert site documentation from apt to markdown (Masatake Iwasaki via aw) + HDFS-7460. Rewrite httpfs to use new shell framework (John Smith via aw) + OPTIMIZATIONS BUG FIXES