HDFS-7460. Rewrite httpfs to use new shell framework (John Smith via aw)
This commit is contained in:
parent
606f5b517f
commit
8c4f76aa20
|
@ -524,7 +524,7 @@
|
||||||
<copy file="${basedir}/src/main/tomcat/server.xml"
|
<copy file="${basedir}/src/main/tomcat/server.xml"
|
||||||
toDir="${httpfs.tomcat.dist.dir}/conf"/>
|
toDir="${httpfs.tomcat.dist.dir}/conf"/>
|
||||||
<delete file="${httpfs.tomcat.dist.dir}/conf/ssl-server.xml"/>
|
<delete file="${httpfs.tomcat.dist.dir}/conf/ssl-server.xml"/>
|
||||||
<copy file="${basedir}/src/main/tomcat/ssl-server.xml"
|
<copy file="${basedir}/src/main/tomcat/ssl-server.xml.conf"
|
||||||
toDir="${httpfs.tomcat.dist.dir}/conf"/>
|
toDir="${httpfs.tomcat.dist.dir}/conf"/>
|
||||||
<delete file="${httpfs.tomcat.dist.dir}/conf/logging.properties"/>
|
<delete file="${httpfs.tomcat.dist.dir}/conf/logging.properties"/>
|
||||||
<copy file="${basedir}/src/main/tomcat/logging.properties"
|
<copy file="${basedir}/src/main/tomcat/logging.properties"
|
||||||
|
|
|
@ -14,40 +14,59 @@
|
||||||
#
|
#
|
||||||
|
|
||||||
# Set httpfs specific environment variables here.
|
# Set httpfs specific environment variables here.
|
||||||
|
|
||||||
# Settings for the Embedded Tomcat that runs HttpFS
|
|
||||||
# Java System properties for HttpFS should be specified in this variable
|
|
||||||
#
|
#
|
||||||
# export CATALINA_OPTS=
|
# hadoop-env.sh is read prior to this file.
|
||||||
|
|
||||||
# HttpFS logs directory
|
|
||||||
#
|
#
|
||||||
# export HTTPFS_LOG=${HTTPFS_HOME}/logs
|
|
||||||
|
|
||||||
# HttpFS temporary directory
|
# HTTPFS temporary directory
|
||||||
#
|
#
|
||||||
# export HTTPFS_TEMP=${HTTPFS_HOME}/temp
|
# export HTTPFS_TEMP=${HADOOP_PREFIX}/temp
|
||||||
|
|
||||||
# The HTTP port used by HttpFS
|
# The HTTP port used by HTTPFS
|
||||||
#
|
#
|
||||||
# export HTTPFS_HTTP_PORT=14000
|
# export HTTPFS_HTTP_PORT=14000
|
||||||
|
|
||||||
# The Admin port used by HttpFS
|
# The Admin port used by HTTPFS
|
||||||
#
|
#
|
||||||
# export HTTPFS_ADMIN_PORT=`expr ${HTTPFS_HTTP_PORT} + 1`
|
# export HTTPFS_ADMIN_PORT=$((HTTPFS_HTTP_PORT + 1))
|
||||||
|
|
||||||
|
# The maximum number of Tomcat handler threads
|
||||||
|
#
|
||||||
|
# export HTTPFS_MAX_THREADS=1000
|
||||||
|
|
||||||
# The hostname HttpFS server runs on
|
# The hostname HttpFS server runs on
|
||||||
#
|
#
|
||||||
# export HTTPFS_HTTP_HOSTNAME=`hostname -f`
|
# export HTTPFS_HTTP_HOSTNAME=$(hostname -f)
|
||||||
|
|
||||||
# Indicates if HttpFS is using SSL
|
|
||||||
#
|
|
||||||
# export HTTPFS_SSL_ENABLED=false
|
|
||||||
|
|
||||||
# The location of the SSL keystore if using SSL
|
# The location of the SSL keystore if using SSL
|
||||||
#
|
#
|
||||||
# export HTTPFS_SSL_KEYSTORE_FILE=${HOME}/.keystore
|
# export HTTPFS_SSL_KEYSTORE_FILE=${HOME}/.keystore
|
||||||
|
|
||||||
|
#
|
||||||
# The password of the SSL keystore if using SSL
|
# The password of the SSL keystore if using SSL
|
||||||
#
|
#
|
||||||
# export HTTPFS_SSL_KEYSTORE_PASS=password
|
# export HTTPFS_SSL_KEYSTORE_PASS=password
|
||||||
|
|
||||||
|
##
|
||||||
|
## Tomcat specific settings
|
||||||
|
##
|
||||||
|
#
|
||||||
|
# Location of tomcat
|
||||||
|
#
|
||||||
|
# export HTTPFS_CATALINA_HOME=${HADOOP_PREFIX}/share/hadoop/httpfs/tomcat
|
||||||
|
|
||||||
|
# Java System properties for HTTPFS should be specified in this variable.
|
||||||
|
# The java.library.path and hadoop.home.dir properties are automatically
|
||||||
|
# configured. In order to supplement java.library.path,
|
||||||
|
# one should add to the JAVA_LIBRARY_PATH env var.
|
||||||
|
#
|
||||||
|
# export CATALINA_OPTS=
|
||||||
|
|
||||||
|
# PID file
|
||||||
|
#
|
||||||
|
# export CATALINA_PID=${HADOOP_PID_DIR}/hadoop-${HADOOP_IDENT_STRING}-httpfs.pid
|
||||||
|
|
||||||
|
# Output file
|
||||||
|
#
|
||||||
|
# export CATALINA_OUT=${HTTPFS_LOG}/hadoop-${HADOOP_IDENT_STRING}-httpfs-${HOSTNAME}.out
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
#!/usr/bin/env bash
|
#!/bin/bash
|
||||||
#
|
#
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
# you may not use this file except in compliance with the License.
|
# you may not use this file except in compliance with the License.
|
||||||
|
@ -13,183 +13,63 @@
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
#
|
#
|
||||||
|
|
||||||
# resolve links - $0 may be a softlink
|
function hadoop_subproject_init
|
||||||
PRG="${0}"
|
{
|
||||||
|
local this
|
||||||
|
local binparent
|
||||||
|
local varlist
|
||||||
|
|
||||||
while [ -h "${PRG}" ]; do
|
if [[ -z "${HADOOP_HTTPFS_ENV_PROCESSED}" ]]; then
|
||||||
ls=`ls -ld "${PRG}"`
|
if [[ -e "${HADOOP_CONF_DIR}/httpfs-env.sh" ]]; then
|
||||||
link=`expr "$ls" : '.*-> \(.*\)$'`
|
. "${HADOOP_CONF_DIR}/httpfs-env.sh"
|
||||||
if expr "$link" : '/.*' > /dev/null; then
|
export HADOOP_HTTPFS_ENV_PROCESSED=true
|
||||||
PRG="$link"
|
fi
|
||||||
else
|
|
||||||
PRG=`dirname "${PRG}"`/"$link"
|
|
||||||
fi
|
fi
|
||||||
done
|
|
||||||
|
|
||||||
BASEDIR=`dirname ${PRG}`
|
export HADOOP_CATALINA_PREFIX=httpfs
|
||||||
BASEDIR=`cd ${BASEDIR}/..;pwd`
|
|
||||||
|
|
||||||
|
export HADOOP_CATALINA_TEMP="${HTTPFS_TEMP:-${HADOOP_PREFIX}/temp}"
|
||||||
|
|
||||||
function print() {
|
hadoop_deprecate_envvar HTTPFS_CONFIG HADOOP_CONF_DIR
|
||||||
if [ "${HTTPFS_SILENT}" != "true" ]; then
|
|
||||||
echo "$@"
|
hadoop_deprecate_envvar HTTPFS_LOG HADOOP_LOG_DIR
|
||||||
|
|
||||||
|
export HADOOP_CATALINA_CONFIG="${HADOOP_CONF_DIR}"
|
||||||
|
export HADOOP_CATALINA_LOG="${HADOOP_LOG_DIR}"
|
||||||
|
|
||||||
|
export HTTPFS_HTTP_HOSTNAME=${HTTPFS_HTTP_HOSTNAME:-$(hostname -f)}
|
||||||
|
|
||||||
|
export HADOOP_CATALINA_HTTP_PORT="${HTTPFS_HTTP_PORT:-14000}"
|
||||||
|
export HADOOP_CATALINA_ADMIN_PORT="${HTTPFS_ADMIN_PORT:-$((HADOOP_CATALINA_HTTP_PORT+1))}"
|
||||||
|
export HADOOP_CATALINA_MAX_THREADS="${HTTPFS_MAX_THREADS:-150}"
|
||||||
|
|
||||||
|
export HTTPFS_SSL_ENABLED=${HTTPFS_SSL_ENABLED:-false}
|
||||||
|
|
||||||
|
export HADOOP_CATALINA_SSL_KEYSTORE_FILE="${HTTPFS_SSL_KEYSTORE_FILE:-${HOME}/.keystore}"
|
||||||
|
|
||||||
|
export CATALINA_BASE="${CATALINA_BASE:-${HADOOP_PREFIX}/share/hadoop/httpfs/tomcat}"
|
||||||
|
export HADOOP_CATALINA_HOME="${HTTPFS_CATALINA_HOME:-${CATALINA_BASE}}"
|
||||||
|
|
||||||
|
export CATALINA_OUT="${CATALINA_OUT:-${HADOOP_LOG_DIR}/hadoop-${HADOOP_IDENT_STRING}-httpfs-${HOSTNAME}.out}"
|
||||||
|
|
||||||
|
export CATALINA_PID="${CATALINA_PID:-${HADOOP_PID_DIR}/hadoop-${HADOOP_IDENT_STRING}-httpfs.pid}"
|
||||||
|
|
||||||
|
if [[ -n "${HADOOP_SHELL_SCRIPT_DEBUG}" ]]; then
|
||||||
|
varlist=$(env | egrep '(^HTTPFS|^CATALINA)' | cut -f1 -d= | grep -v _PASS)
|
||||||
|
for i in ${varlist}; do
|
||||||
|
hadoop_debug "Setting ${i} to ${!i}"
|
||||||
|
done
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
# if HTTPFS_HOME is already set warn it will be ignored
|
if [[ -n "${HADOOP_COMMON_HOME}" ]] &&
|
||||||
#
|
[[ -e "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh" ]]; then
|
||||||
if [ "${HTTPFS_HOME}" != "" ]; then
|
. "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh"
|
||||||
echo "WARNING: current setting of HTTPFS_HOME ignored"
|
elif [[ -e "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]]; then
|
||||||
fi
|
. "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh"
|
||||||
|
elif [[ -e "${HADOOP_PREFIX}/libexec/hadoop-config.sh" ]]; then
|
||||||
print
|
. "${HADOOP_PREFIX}/libexec/hadoop-config.sh"
|
||||||
|
|
||||||
# setting HTTPFS_HOME to the installation dir, it cannot be changed
|
|
||||||
#
|
|
||||||
export HTTPFS_HOME=${BASEDIR}
|
|
||||||
httpfs_home=${HTTPFS_HOME}
|
|
||||||
print "Setting HTTPFS_HOME: ${HTTPFS_HOME}"
|
|
||||||
|
|
||||||
# if the installation has a env file, source it
|
|
||||||
# this is for native packages installations
|
|
||||||
#
|
|
||||||
if [ -e "${HTTPFS_HOME}/bin/httpfs-env.sh" ]; then
|
|
||||||
print "Sourcing: ${HTTPFS_HOME}/bin/httpfs-env.sh"
|
|
||||||
source ${HTTPFS_HOME}/bin/httpfs-env.sh
|
|
||||||
grep "^ *export " ${HTTPFS_HOME}/bin/httpfs-env.sh | sed 's/ *export/ setting/'
|
|
||||||
fi
|
|
||||||
|
|
||||||
# verify that the sourced env file didn't change HTTPFS_HOME
|
|
||||||
# if so, warn and revert
|
|
||||||
#
|
|
||||||
if [ "${HTTPFS_HOME}" != "${httpfs_home}" ]; then
|
|
||||||
print "WARN: HTTPFS_HOME resetting to ''${HTTPFS_HOME}'' ignored"
|
|
||||||
export HTTPFS_HOME=${httpfs_home}
|
|
||||||
print " using HTTPFS_HOME: ${HTTPFS_HOME}"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ "${HTTPFS_CONFIG}" = "" ]; then
|
|
||||||
export HTTPFS_CONFIG=${HTTPFS_HOME}/etc/hadoop
|
|
||||||
print "Setting HTTPFS_CONFIG: ${HTTPFS_CONFIG}"
|
|
||||||
else
|
else
|
||||||
print "Using HTTPFS_CONFIG: ${HTTPFS_CONFIG}"
|
echo "ERROR: Hadoop common not found." 2>&1
|
||||||
|
exit 1
|
||||||
fi
|
fi
|
||||||
httpfs_config=${HTTPFS_CONFIG}
|
|
||||||
|
|
||||||
# if the configuration dir has a env file, source it
|
|
||||||
#
|
|
||||||
if [ -e "${HTTPFS_CONFIG}/httpfs-env.sh" ]; then
|
|
||||||
print "Sourcing: ${HTTPFS_CONFIG}/httpfs-env.sh"
|
|
||||||
source ${HTTPFS_CONFIG}/httpfs-env.sh
|
|
||||||
grep "^ *export " ${HTTPFS_CONFIG}/httpfs-env.sh | sed 's/ *export/ setting/'
|
|
||||||
fi
|
|
||||||
|
|
||||||
# verify that the sourced env file didn't change HTTPFS_HOME
|
|
||||||
# if so, warn and revert
|
|
||||||
#
|
|
||||||
if [ "${HTTPFS_HOME}" != "${httpfs_home}" ]; then
|
|
||||||
echo "WARN: HTTPFS_HOME resetting to ''${HTTPFS_HOME}'' ignored"
|
|
||||||
export HTTPFS_HOME=${httpfs_home}
|
|
||||||
fi
|
|
||||||
|
|
||||||
# verify that the sourced env file didn't change HTTPFS_CONFIG
|
|
||||||
# if so, warn and revert
|
|
||||||
#
|
|
||||||
if [ "${HTTPFS_CONFIG}" != "${httpfs_config}" ]; then
|
|
||||||
echo "WARN: HTTPFS_CONFIG resetting to ''${HTTPFS_CONFIG}'' ignored"
|
|
||||||
export HTTPFS_CONFIG=${httpfs_config}
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ "${HTTPFS_LOG}" = "" ]; then
|
|
||||||
export HTTPFS_LOG=${HTTPFS_HOME}/logs
|
|
||||||
print "Setting HTTPFS_LOG: ${HTTPFS_LOG}"
|
|
||||||
else
|
|
||||||
print "Using HTTPFS_LOG: ${HTTPFS_LOG}"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ ! -f ${HTTPFS_LOG} ]; then
|
|
||||||
mkdir -p ${HTTPFS_LOG}
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ "${HTTPFS_TEMP}" = "" ]; then
|
|
||||||
export HTTPFS_TEMP=${HTTPFS_HOME}/temp
|
|
||||||
print "Setting HTTPFS_TEMP: ${HTTPFS_TEMP}"
|
|
||||||
else
|
|
||||||
print "Using HTTPFS_TEMP: ${HTTPFS_TEMP}"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ ! -f ${HTTPFS_TEMP} ]; then
|
|
||||||
mkdir -p ${HTTPFS_TEMP}
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ "${HTTPFS_HTTP_PORT}" = "" ]; then
|
|
||||||
export HTTPFS_HTTP_PORT=14000
|
|
||||||
print "Setting HTTPFS_HTTP_PORT: ${HTTPFS_HTTP_PORT}"
|
|
||||||
else
|
|
||||||
print "Using HTTPFS_HTTP_PORT: ${HTTPFS_HTTP_PORT}"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ "${HTTPFS_ADMIN_PORT}" = "" ]; then
|
|
||||||
export HTTPFS_ADMIN_PORT=`expr $HTTPFS_HTTP_PORT + 1`
|
|
||||||
print "Setting HTTPFS_ADMIN_PORT: ${HTTPFS_ADMIN_PORT}"
|
|
||||||
else
|
|
||||||
print "Using HTTPFS_ADMIN_PORT: ${HTTPFS_ADMIN_PORT}"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ "${HTTPFS_HTTP_HOSTNAME}" = "" ]; then
|
|
||||||
export HTTPFS_HTTP_HOSTNAME=`hostname -f`
|
|
||||||
print "Setting HTTPFS_HTTP_HOSTNAME: ${HTTPFS_HTTP_HOSTNAME}"
|
|
||||||
else
|
|
||||||
print "Using HTTPFS_HTTP_HOSTNAME: ${HTTPFS_HTTP_HOSTNAME}"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ "${HTTPFS_SSL_ENABLED}" = "" ]; then
|
|
||||||
export HTTPFS_SSL_ENABLED="false"
|
|
||||||
print "Setting HTTPFS_SSL_ENABLED: ${HTTPFS_SSL_ENABLED}"
|
|
||||||
else
|
|
||||||
print "Using HTTPFS_SSL_ENABLED: ${HTTPFS_SSL_ENABLED}"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ "${HTTPFS_SSL_KEYSTORE_FILE}" = "" ]; then
|
|
||||||
export HTTPFS_SSL_KEYSTORE_FILE=${HOME}/.keystore
|
|
||||||
print "Setting HTTPFS_SSL_KEYSTORE_FILE: ${HTTPFS_SSL_KEYSTORE_FILE}"
|
|
||||||
else
|
|
||||||
print "Using HTTPFS_SSL_KEYSTORE_FILE: ${HTTPFS_SSL_KEYSTORE_FILE}"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ "${HTTPFS_SSL_KEYSTORE_PASS}" = "" ]; then
|
|
||||||
export HTTPFS_SSL_KEYSTORE_PASS=password
|
|
||||||
print "Setting HTTPFS_SSL_KEYSTORE_PASS: ${HTTPFS_SSL_KEYSTORE_PASS}"
|
|
||||||
else
|
|
||||||
print "Using HTTPFS_SSL_KEYSTORE_PASS: ${HTTPFS_SSL_KEYSTORE_PASS}"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ "${CATALINA_BASE}" = "" ]; then
|
|
||||||
export CATALINA_BASE=${HTTPFS_HOME}/share/hadoop/httpfs/tomcat
|
|
||||||
print "Setting CATALINA_BASE: ${CATALINA_BASE}"
|
|
||||||
else
|
|
||||||
print "Using CATALINA_BASE: ${CATALINA_BASE}"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ "${HTTPFS_CATALINA_HOME}" = "" ]; then
|
|
||||||
export HTTPFS_CATALINA_HOME=${CATALINA_BASE}
|
|
||||||
print "Setting HTTPFS_CATALINA_HOME: ${HTTPFS_CATALINA_HOME}"
|
|
||||||
else
|
|
||||||
print "Using HTTPFS_CATALINA_HOME: ${HTTPFS_CATALINA_HOME}"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ "${CATALINA_OUT}" = "" ]; then
|
|
||||||
export CATALINA_OUT=${HTTPFS_LOG}/httpfs-catalina.out
|
|
||||||
print "Setting CATALINA_OUT: ${CATALINA_OUT}"
|
|
||||||
else
|
|
||||||
print "Using CATALINA_OUT: ${CATALINA_OUT}"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ "${CATALINA_PID}" = "" ]; then
|
|
||||||
export CATALINA_PID=/tmp/httpfs.pid
|
|
||||||
print "Setting CATALINA_PID: ${CATALINA_PID}"
|
|
||||||
else
|
|
||||||
print "Using CATALINA_PID: ${CATALINA_PID}"
|
|
||||||
fi
|
|
||||||
|
|
||||||
print
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
#!/usr/bin/env bash
|
#!/bin/bash
|
||||||
#
|
#
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
# you may not use this file except in compliance with the License.
|
# you may not use this file except in compliance with the License.
|
||||||
|
@ -13,53 +13,99 @@
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
#
|
#
|
||||||
|
|
||||||
# resolve links - $0 may be a softlink
|
function hadoop_usage()
|
||||||
PRG="${0}"
|
{
|
||||||
|
echo "Usage: httpfs.sh [--config confdir] [--debug] --daemon start|status|stop"
|
||||||
|
echo " httpfs.sh [--config confdir] [--debug] COMMAND"
|
||||||
|
echo " where COMMAND is one of:"
|
||||||
|
echo " run Start httpfs in the current window"
|
||||||
|
echo " run -security Start in the current window with security manager"
|
||||||
|
echo " start Start httpfs in a separate window"
|
||||||
|
echo " start -security Start in a separate window with security manager"
|
||||||
|
echo " status Return the LSB compliant status"
|
||||||
|
echo " stop Stop httpfs, waiting up to 5 seconds for the process to end"
|
||||||
|
echo " stop n Stop httpfs, waiting up to n seconds for the process to end"
|
||||||
|
echo " stop -force Stop httpfs, wait up to 5 seconds and then use kill -KILL if still running"
|
||||||
|
echo " stop n -force Stop httpfs, wait up to n seconds and then use kill -KILL if still running"
|
||||||
|
}
|
||||||
|
|
||||||
while [ -h "${PRG}" ]; do
|
# let's locate libexec...
|
||||||
ls=`ls -ld "${PRG}"`
|
if [[ -n "${HADOOP_PREFIX}" ]]; then
|
||||||
link=`expr "$ls" : '.*-> \(.*\)$'`
|
DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
|
||||||
if expr "$link" : '/.*' > /dev/null; then
|
else
|
||||||
PRG="$link"
|
this="${BASH_SOURCE-$0}"
|
||||||
else
|
bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
|
||||||
PRG=`dirname "${PRG}"`/"$link"
|
DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
|
||||||
fi
|
fi
|
||||||
done
|
|
||||||
|
|
||||||
BASEDIR=`dirname ${PRG}`
|
HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
|
||||||
BASEDIR=`cd ${BASEDIR}/..;pwd`
|
# shellcheck disable=SC2034
|
||||||
|
HADOOP_NEW_CONFIG=true
|
||||||
|
if [[ -f "${HADOOP_LIBEXEC_DIR}/httpfs-config.sh" ]]; then
|
||||||
|
. "${HADOOP_LIBEXEC_DIR}/httpfs-config.sh"
|
||||||
|
else
|
||||||
|
echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/httpfs-config.sh." 2>&1
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
source ${HADOOP_LIBEXEC_DIR:-${BASEDIR}/libexec}/httpfs-config.sh
|
# The Java System property 'httpfs.http.port' it is not used by Kms,
|
||||||
|
|
||||||
# The Java System property 'httpfs.http.port' it is not used by HttpFS,
|
|
||||||
# it is used in Tomcat's server.xml configuration file
|
# it is used in Tomcat's server.xml configuration file
|
||||||
#
|
#
|
||||||
print "Using CATALINA_OPTS: ${CATALINA_OPTS}"
|
|
||||||
|
|
||||||
catalina_opts="-Dhttpfs.home.dir=${HTTPFS_HOME}";
|
# Mask the trustStorePassword
|
||||||
catalina_opts="${catalina_opts} -Dhttpfs.config.dir=${HTTPFS_CONFIG}";
|
# shellcheck disable=SC2086
|
||||||
catalina_opts="${catalina_opts} -Dhttpfs.log.dir=${HTTPFS_LOG}";
|
CATALINA_OPTS_DISP="$(echo ${CATALINA_OPTS} | sed -e 's/trustStorePassword=[^ ]*/trustStorePassword=***/')"
|
||||||
catalina_opts="${catalina_opts} -Dhttpfs.temp.dir=${HTTPFS_TEMP}";
|
|
||||||
catalina_opts="${catalina_opts} -Dhttpfs.admin.port=${HTTPFS_ADMIN_PORT}";
|
|
||||||
catalina_opts="${catalina_opts} -Dhttpfs.http.port=${HTTPFS_HTTP_PORT}";
|
|
||||||
catalina_opts="${catalina_opts} -Dhttpfs.http.hostname=${HTTPFS_HTTP_HOSTNAME}";
|
|
||||||
catalina_opts="${catalina_opts} -Dhttpfs.ssl.enabled=${HTTPFS_SSL_ENABLED}";
|
|
||||||
catalina_opts="${catalina_opts} -Dhttpfs.ssl.keystore.file=${HTTPFS_SSL_KEYSTORE_FILE}";
|
|
||||||
catalina_opts="${catalina_opts} -Dhttpfs.ssl.keystore.pass=${HTTPFS_SSL_KEYSTORE_PASS}";
|
|
||||||
|
|
||||||
print "Adding to CATALINA_OPTS: ${catalina_opts}"
|
hadoop_debug "Using CATALINA_OPTS: ${CATALINA_OPTS_DISP}"
|
||||||
|
|
||||||
export CATALINA_OPTS="${CATALINA_OPTS} ${catalina_opts}"
|
# We're using hadoop-common, so set up some stuff it might need:
|
||||||
|
hadoop_finalize
|
||||||
|
|
||||||
|
hadoop_verify_logdir
|
||||||
|
|
||||||
|
if [[ $# = 0 ]]; then
|
||||||
|
case "${HADOOP_DAEMON_MODE}" in
|
||||||
|
status)
|
||||||
|
hadoop_status_daemon "${CATALINA_PID}"
|
||||||
|
exit
|
||||||
|
;;
|
||||||
|
start)
|
||||||
|
set -- "start"
|
||||||
|
;;
|
||||||
|
stop)
|
||||||
|
set -- "stop"
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
fi
|
||||||
|
|
||||||
|
hadoop_finalize_catalina_opts
|
||||||
|
export CATALINA_OPTS
|
||||||
|
|
||||||
# A bug in catalina.sh script does not use CATALINA_OPTS for stopping the server
|
# A bug in catalina.sh script does not use CATALINA_OPTS for stopping the server
|
||||||
#
|
#
|
||||||
if [ "${1}" = "stop" ]; then
|
if [[ "${1}" = "stop" ]]; then
|
||||||
export JAVA_OPTS=${CATALINA_OPTS}
|
export JAVA_OPTS=${CATALINA_OPTS}
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ "${HTTPFS_SILENT}" != "true" ]; then
|
# If ssl, the populate the passwords into ssl-server.xml before starting tomcat
|
||||||
exec ${HTTPFS_CATALINA_HOME}/bin/catalina.sh "$@"
|
#
|
||||||
else
|
# HTTPFS_SSL_KEYSTORE_PASS is a bit odd.
|
||||||
exec ${HTTPFS_CATALINA_HOME}/bin/catalina.sh "$@" > /dev/null
|
# if undefined, then the if test will not enable ssl on its own
|
||||||
|
# if "", set it to "password".
|
||||||
|
# if custom, use provided password
|
||||||
|
#
|
||||||
|
if [[ -f "${HADOOP_CATALINA_HOME}/conf/ssl-server.xml.conf" ]]; then
|
||||||
|
if [[ -n "${HTTPFS_SSL_KEYSTORE_PASS+x}" ]] || [[ -n "${HTTPFS_SSL_TRUSTSTORE_PASS}" ]]; then
|
||||||
|
export HTTPFS_SSL_KEYSTORE_PASS=${HTTPFS_SSL_KEYSTORE_PASS:-password}
|
||||||
|
sed -e 's/_httpfs_ssl_keystore_pass_/'${HTTPFS_SSL_KEYSTORE_PASS}'/g' \
|
||||||
|
-e 's/_httpfs_ssl_truststore_pass_/'${HTTPFS_SSL_TRUSTSTORE_PASS}'/g' \
|
||||||
|
"${HADOOP_CATALINA_HOME}/conf/ssl-server.xml.conf" \
|
||||||
|
> "${HADOOP_CATALINA_HOME}/conf/ssl-server.xml"
|
||||||
|
chmod 700 "${HADOOP_CATALINA_HOME}/conf/ssl-server.xml" >/dev/null 2>&1
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
hadoop_add_param CATALINA_OPTS -Dhttpfs.http.hostname "-Dhttpfs.http.hostname=${HTTPFS_HOST_NAME}"
|
||||||
|
hadoop_add_param CATALINA_OPTS -Dhttpfs.ssl.enabled "-Dhttpfs.ssl.enabled=${HTTPFS_SSL_ENABLED}"
|
||||||
|
|
||||||
|
exec "${HADOOP_CATALINA_HOME}/bin/catalina.sh" "$@"
|
||||||
|
|
|
@ -61,7 +61,7 @@
|
||||||
<!--The connectors can use a shared executor, you can define one or more named thread pools-->
|
<!--The connectors can use a shared executor, you can define one or more named thread pools-->
|
||||||
<!--
|
<!--
|
||||||
<Executor name="tomcatThreadPool" namePrefix="catalina-exec-"
|
<Executor name="tomcatThreadPool" namePrefix="catalina-exec-"
|
||||||
maxThreads="150" minSpareThreads="4"/>
|
maxThreads="httpfs.max.threads" minSpareThreads="4"/>
|
||||||
-->
|
-->
|
||||||
|
|
||||||
<!-- Define a SSL HTTP/1.1 Connector on port 8443
|
<!-- Define a SSL HTTP/1.1 Connector on port 8443
|
||||||
|
@ -72,7 +72,7 @@
|
||||||
maxThreads="150" scheme="https" secure="true"
|
maxThreads="150" scheme="https" secure="true"
|
||||||
clientAuth="false" sslEnabledProtocols="TLSv1,SSLv2Hello"
|
clientAuth="false" sslEnabledProtocols="TLSv1,SSLv2Hello"
|
||||||
keystoreFile="${httpfs.ssl.keystore.file}"
|
keystoreFile="${httpfs.ssl.keystore.file}"
|
||||||
keystorePass="${httpfs.ssl.keystore.pass}"/>
|
keystorePass="_httpfs_ssl_keystore_pass_"/>
|
||||||
|
|
||||||
<!-- Define an AJP 1.3 Connector on port 8009 -->
|
<!-- Define an AJP 1.3 Connector on port 8009 -->
|
||||||
|
|
|
@ -1,159 +0,0 @@
|
||||||
~~ Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
~~ you may not use this file except in compliance with the License.
|
|
||||||
~~ You may obtain a copy of the License at
|
|
||||||
~~
|
|
||||||
~~ http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
~~
|
|
||||||
~~ Unless required by applicable law or agreed to in writing, software
|
|
||||||
~~ distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
~~ See the License for the specific language governing permissions and
|
|
||||||
~~ limitations under the License.
|
|
||||||
|
|
||||||
---
|
|
||||||
Hadoop HDFS over HTTP ${project.version} - Server Setup
|
|
||||||
---
|
|
||||||
---
|
|
||||||
${maven.build.timestamp}
|
|
||||||
|
|
||||||
Hadoop HDFS over HTTP ${project.version} - Server Setup
|
|
||||||
|
|
||||||
This page explains how to quickly setup HttpFS with Pseudo authentication
|
|
||||||
against a Hadoop cluster with Pseudo authentication.
|
|
||||||
|
|
||||||
* Requirements
|
|
||||||
|
|
||||||
* Java 6+
|
|
||||||
|
|
||||||
* Maven 3+
|
|
||||||
|
|
||||||
* Install HttpFS
|
|
||||||
|
|
||||||
+---+
|
|
||||||
~ $ tar xzf httpfs-${project.version}.tar.gz
|
|
||||||
+---+
|
|
||||||
|
|
||||||
* Configure HttpFS
|
|
||||||
|
|
||||||
By default, HttpFS assumes that Hadoop configuration files
|
|
||||||
(<<<core-site.xml & hdfs-site.xml>>>) are in the HttpFS
|
|
||||||
configuration directory.
|
|
||||||
|
|
||||||
If this is not the case, add to the <<<httpfs-site.xml>>> file the
|
|
||||||
<<<httpfs.hadoop.config.dir>>> property set to the location
|
|
||||||
of the Hadoop configuration directory.
|
|
||||||
|
|
||||||
* Configure Hadoop
|
|
||||||
|
|
||||||
Edit Hadoop <<<core-site.xml>>> and defined the Unix user that will
|
|
||||||
run the HttpFS server as a proxyuser. For example:
|
|
||||||
|
|
||||||
+---+
|
|
||||||
...
|
|
||||||
<property>
|
|
||||||
<name>hadoop.proxyuser.#HTTPFSUSER#.hosts</name>
|
|
||||||
<value>httpfs-host.foo.com</value>
|
|
||||||
</property>
|
|
||||||
<property>
|
|
||||||
<name>hadoop.proxyuser.#HTTPFSUSER#.groups</name>
|
|
||||||
<value>*</value>
|
|
||||||
</property>
|
|
||||||
...
|
|
||||||
+---+
|
|
||||||
|
|
||||||
IMPORTANT: Replace <<<#HTTPFSUSER#>>> with the Unix user that will
|
|
||||||
start the HttpFS server.
|
|
||||||
|
|
||||||
* Restart Hadoop
|
|
||||||
|
|
||||||
You need to restart Hadoop for the proxyuser configuration ot become
|
|
||||||
active.
|
|
||||||
|
|
||||||
* Start/Stop HttpFS
|
|
||||||
|
|
||||||
To start/stop HttpFS use HttpFS's bin/httpfs.sh script. For example:
|
|
||||||
|
|
||||||
+---+
|
|
||||||
httpfs-${project.version} $ bin/httpfs.sh start
|
|
||||||
+---+
|
|
||||||
|
|
||||||
NOTE: Invoking the script without any parameters list all possible
|
|
||||||
parameters (start, stop, run, etc.). The <<<httpfs.sh>>> script is a wrapper
|
|
||||||
for Tomcat's <<<catalina.sh>>> script that sets the environment variables
|
|
||||||
and Java System properties required to run HttpFS server.
|
|
||||||
|
|
||||||
* Test HttpFS is working
|
|
||||||
|
|
||||||
+---+
|
|
||||||
~ $ curl -i "http://<HTTPFSHOSTNAME>:14000?user.name=babu&op=homedir"
|
|
||||||
HTTP/1.1 200 OK
|
|
||||||
Content-Type: application/json
|
|
||||||
Transfer-Encoding: chunked
|
|
||||||
|
|
||||||
{"homeDir":"http:\/\/<HTTPFS_HOST>:14000\/user\/babu"}
|
|
||||||
+---+
|
|
||||||
|
|
||||||
* Embedded Tomcat Configuration
|
|
||||||
|
|
||||||
To configure the embedded Tomcat go to the <<<tomcat/conf>>>.
|
|
||||||
|
|
||||||
HttpFS preconfigures the HTTP and Admin ports in Tomcat's <<<server.xml>>> to
|
|
||||||
14000 and 14001.
|
|
||||||
|
|
||||||
Tomcat logs are also preconfigured to go to HttpFS's <<<logs/>>> directory.
|
|
||||||
|
|
||||||
The following environment variables (which can be set in HttpFS's
|
|
||||||
<<<conf/httpfs-env.sh>>> script) can be used to alter those values:
|
|
||||||
|
|
||||||
* HTTPFS_HTTP_PORT
|
|
||||||
|
|
||||||
* HTTPFS_ADMIN_PORT
|
|
||||||
|
|
||||||
* HTTPFS_LOG
|
|
||||||
|
|
||||||
* HttpFS Configuration
|
|
||||||
|
|
||||||
HttpFS supports the following {{{./httpfs-default.html}configuration properties}}
|
|
||||||
in the HttpFS's <<<conf/httpfs-site.xml>>> configuration file.
|
|
||||||
|
|
||||||
* HttpFS over HTTPS (SSL)
|
|
||||||
|
|
||||||
To configure HttpFS to work over SSL edit the {{httpfs-env.sh}} script in the
|
|
||||||
configuration directory setting the {{HTTPFS_SSL_ENABLED}} to {{true}}.
|
|
||||||
|
|
||||||
In addition, the following 2 properties may be defined (shown with default
|
|
||||||
values):
|
|
||||||
|
|
||||||
* HTTPFS_SSL_KEYSTORE_FILE=${HOME}/.keystore
|
|
||||||
|
|
||||||
* HTTPFS_SSL_KEYSTORE_PASS=password
|
|
||||||
|
|
||||||
In the HttpFS <<<tomcat/conf>>> directory, replace the <<<server.xml>>> file
|
|
||||||
with the <<<ssl-server.xml>>> file.
|
|
||||||
|
|
||||||
|
|
||||||
You need to create an SSL certificate for the HttpFS server. As the
|
|
||||||
<<<httpfs>>> Unix user, using the Java <<<keytool>>> command to create the
|
|
||||||
SSL certificate:
|
|
||||||
|
|
||||||
+---+
|
|
||||||
$ keytool -genkey -alias tomcat -keyalg RSA
|
|
||||||
+---+
|
|
||||||
|
|
||||||
You will be asked a series of questions in an interactive prompt. It will
|
|
||||||
create the keystore file, which will be named <<.keystore>> and located in the
|
|
||||||
<<<httpfs>>> user home directory.
|
|
||||||
|
|
||||||
The password you enter for "keystore password" must match the value of the
|
|
||||||
<<<HTTPFS_SSL_KEYSTORE_PASS>>> environment variable set in the
|
|
||||||
<<<httpfs-env.sh>>> script in the configuration directory.
|
|
||||||
|
|
||||||
The answer to "What is your first and last name?" (i.e. "CN") must be the
|
|
||||||
hostname of the machine where the HttpFS Server will be running.
|
|
||||||
|
|
||||||
Start HttpFS. It should work over HTTPS.
|
|
||||||
|
|
||||||
Using the Hadoop <<<FileSystem>>> API or the Hadoop FS shell, use the
|
|
||||||
<<<swebhdfs://>>> scheme. Make sure the JVM is picking up the truststore
|
|
||||||
containing the public key of the SSL certificate if using a self-signed
|
|
||||||
certificate.
|
|
|
@ -1,87 +0,0 @@
|
||||||
~~ Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
~~ you may not use this file except in compliance with the License.
|
|
||||||
~~ You may obtain a copy of the License at
|
|
||||||
~~
|
|
||||||
~~ http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
~~
|
|
||||||
~~ Unless required by applicable law or agreed to in writing, software
|
|
||||||
~~ distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
~~ See the License for the specific language governing permissions and
|
|
||||||
~~ limitations under the License.
|
|
||||||
|
|
||||||
---
|
|
||||||
Hadoop HDFS over HTTP ${project.version} - Using HTTP Tools
|
|
||||||
---
|
|
||||||
---
|
|
||||||
${maven.build.timestamp}
|
|
||||||
|
|
||||||
Hadoop HDFS over HTTP ${project.version} - Using HTTP Tools
|
|
||||||
|
|
||||||
* Security
|
|
||||||
|
|
||||||
Out of the box HttpFS supports both pseudo authentication and Kerberos HTTP
|
|
||||||
SPNEGO authentication.
|
|
||||||
|
|
||||||
** Pseudo Authentication
|
|
||||||
|
|
||||||
With pseudo authentication the user name must be specified in the
|
|
||||||
<<<user.name=\<USERNAME\>>>> query string parameter of a HttpFS URL.
|
|
||||||
For example:
|
|
||||||
|
|
||||||
+---+
|
|
||||||
$ curl "http://<HTTFS_HOST>:14000/webhdfs/v1?op=homedir&user.name=babu"
|
|
||||||
+---+
|
|
||||||
|
|
||||||
** Kerberos HTTP SPNEGO Authentication
|
|
||||||
|
|
||||||
Kerberos HTTP SPNEGO authentication requires a tool or library supporting
|
|
||||||
Kerberos HTTP SPNEGO protocol.
|
|
||||||
|
|
||||||
IMPORTANT: If using <<<curl>>>, the <<<curl>>> version being used must support
|
|
||||||
GSS (<<<curl -V>>> prints out 'GSS' if it supports it).
|
|
||||||
|
|
||||||
For example:
|
|
||||||
|
|
||||||
+---+
|
|
||||||
$ kinit
|
|
||||||
Please enter the password for tucu@LOCALHOST:
|
|
||||||
$ curl --negotiate -u foo "http://<HTTPFS_HOST>:14000/webhdfs/v1?op=homedir"
|
|
||||||
Enter host password for user 'foo':
|
|
||||||
+---+
|
|
||||||
|
|
||||||
NOTE: the <<<-u USER>>> option is required by the <<<--negotiate>>> but it is
|
|
||||||
not used. Use any value as <<<USER>>> and when asked for the password press
|
|
||||||
[ENTER] as the password value is ignored.
|
|
||||||
|
|
||||||
** {Remembering Who I Am} (Establishing an Authenticated Session)
|
|
||||||
|
|
||||||
As most authentication mechanisms, Hadoop HTTP authentication authenticates
|
|
||||||
users once and issues a short-lived authentication token to be presented in
|
|
||||||
subsequent requests. This authentication token is a signed HTTP Cookie.
|
|
||||||
|
|
||||||
When using tools like <<<curl>>>, the authentication token must be stored on
|
|
||||||
the first request doing authentication, and submitted in subsequent requests.
|
|
||||||
To do this with curl the <<<-b>>> and <<<-c>>> options to save and send HTTP
|
|
||||||
Cookies must be used.
|
|
||||||
|
|
||||||
For example, the first request doing authentication should save the received
|
|
||||||
HTTP Cookies.
|
|
||||||
|
|
||||||
Using Pseudo Authentication:
|
|
||||||
|
|
||||||
+---+
|
|
||||||
$ curl -c ~/.httpfsauth "http://<HTTPFS_HOST>:14000/webhdfs/v1?op=homedir&user.name=babu"
|
|
||||||
+---+
|
|
||||||
|
|
||||||
Using Kerberos HTTP SPNEGO authentication:
|
|
||||||
|
|
||||||
+---+
|
|
||||||
$ curl --negotiate -u foo -c ~/.httpfsauth "http://<HTTPFS_HOST>:14000/webhdfs/v1?op=homedir"
|
|
||||||
+---+
|
|
||||||
|
|
||||||
Then, subsequent requests forward the previously received HTTP Cookie:
|
|
||||||
|
|
||||||
+---+
|
|
||||||
$ curl -b ~/.httpfsauth "http://<HTTPFS_HOST>:14000/webhdfs/v1?op=liststatus"
|
|
||||||
+---+
|
|
|
@ -1,83 +0,0 @@
|
||||||
~~ Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
~~ you may not use this file except in compliance with the License.
|
|
||||||
~~ You may obtain a copy of the License at
|
|
||||||
~~
|
|
||||||
~~ http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
~~
|
|
||||||
~~ Unless required by applicable law or agreed to in writing, software
|
|
||||||
~~ distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
~~ See the License for the specific language governing permissions and
|
|
||||||
~~ limitations under the License.
|
|
||||||
|
|
||||||
---
|
|
||||||
Hadoop HDFS over HTTP - Documentation Sets ${project.version}
|
|
||||||
---
|
|
||||||
---
|
|
||||||
${maven.build.timestamp}
|
|
||||||
|
|
||||||
Hadoop HDFS over HTTP - Documentation Sets ${project.version}
|
|
||||||
|
|
||||||
HttpFS is a server that provides a REST HTTP gateway supporting all HDFS
|
|
||||||
File System operations (read and write). And it is inteoperable with the
|
|
||||||
<<webhdfs>> REST HTTP API.
|
|
||||||
|
|
||||||
HttpFS can be used to transfer data between clusters running different
|
|
||||||
versions of Hadoop (overcoming RPC versioning issues), for example using
|
|
||||||
Hadoop DistCP.
|
|
||||||
|
|
||||||
HttpFS can be used to access data in HDFS on a cluster behind of a firewall
|
|
||||||
(the HttpFS server acts as a gateway and is the only system that is allowed
|
|
||||||
to cross the firewall into the cluster).
|
|
||||||
|
|
||||||
HttpFS can be used to access data in HDFS using HTTP utilities (such as curl
|
|
||||||
and wget) and HTTP libraries Perl from other languages than Java.
|
|
||||||
|
|
||||||
The <<webhdfs>> client FileSytem implementation can be used to access HttpFS
|
|
||||||
using the Hadoop filesystem command (<<<hadoop fs>>>) line tool as well as
|
|
||||||
from Java aplications using the Hadoop FileSystem Java API.
|
|
||||||
|
|
||||||
HttpFS has built-in security supporting Hadoop pseudo authentication and
|
|
||||||
HTTP SPNEGO Kerberos and other pluggable authentication mechanims. It also
|
|
||||||
provides Hadoop proxy user support.
|
|
||||||
|
|
||||||
* How Does HttpFS Works?
|
|
||||||
|
|
||||||
HttpFS is a separate service from Hadoop NameNode.
|
|
||||||
|
|
||||||
HttpFS itself is Java web-application and it runs using a preconfigured Tomcat
|
|
||||||
bundled with HttpFS binary distribution.
|
|
||||||
|
|
||||||
HttpFS HTTP web-service API calls are HTTP REST calls that map to a HDFS file
|
|
||||||
system operation. For example, using the <<<curl>>> Unix command:
|
|
||||||
|
|
||||||
* <<<$ curl http://httpfs-host:14000/webhdfs/v1/user/foo/README.txt>>> returns
|
|
||||||
the contents of the HDFS <<</user/foo/README.txt>>> file.
|
|
||||||
|
|
||||||
* <<<$ curl http://httpfs-host:14000/webhdfs/v1/user/foo?op=list>>> returns the
|
|
||||||
contents of the HDFS <<</user/foo>>> directory in JSON format.
|
|
||||||
|
|
||||||
* <<<$ curl -X POST http://httpfs-host:14000/webhdfs/v1/user/foo/bar?op=mkdirs>>>
|
|
||||||
creates the HDFS <<</user/foo.bar>>> directory.
|
|
||||||
|
|
||||||
* How HttpFS and Hadoop HDFS Proxy differ?
|
|
||||||
|
|
||||||
HttpFS was inspired by Hadoop HDFS proxy.
|
|
||||||
|
|
||||||
HttpFS can be seen as a full rewrite of Hadoop HDFS proxy.
|
|
||||||
|
|
||||||
Hadoop HDFS proxy provides a subset of file system operations (read only),
|
|
||||||
HttpFS provides support for all file system operations.
|
|
||||||
|
|
||||||
HttpFS uses a clean HTTP REST API making its use with HTTP tools more
|
|
||||||
intuitive.
|
|
||||||
|
|
||||||
HttpFS supports Hadoop pseudo authentication, Kerberos SPNEGOS authentication
|
|
||||||
and Hadoop proxy users. Hadoop HDFS proxy did not.
|
|
||||||
|
|
||||||
* User and Developer Documentation
|
|
||||||
|
|
||||||
* {{{./ServerSetup.html}HttpFS Server Setup}}
|
|
||||||
|
|
||||||
* {{{./UsingHttpTools.html}Using HTTP Tools}}
|
|
||||||
|
|
|
@ -0,0 +1,121 @@
|
||||||
|
<!---
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. See accompanying LICENSE file.
|
||||||
|
-->
|
||||||
|
|
||||||
|
Hadoop HDFS over HTTP - Server Setup
|
||||||
|
====================================
|
||||||
|
|
||||||
|
This page explains how to quickly setup HttpFS with Pseudo authentication against a Hadoop cluster with Pseudo authentication.
|
||||||
|
|
||||||
|
Install HttpFS
|
||||||
|
--------------
|
||||||
|
|
||||||
|
~ $ tar xzf httpfs-${project.version}.tar.gz
|
||||||
|
|
||||||
|
Configure HttpFS
|
||||||
|
----------------
|
||||||
|
|
||||||
|
By default, HttpFS assumes that Hadoop configuration files (`core-site.xml & hdfs-site.xml`) are in the HttpFS configuration directory.
|
||||||
|
|
||||||
|
If this is not the case, add to the `httpfs-site.xml` file the `httpfs.hadoop.config.dir` property set to the location of the Hadoop configuration directory.
|
||||||
|
|
||||||
|
Configure Hadoop
|
||||||
|
----------------
|
||||||
|
|
||||||
|
Edit Hadoop `core-site.xml` and defined the Unix user that will run the HttpFS server as a proxyuser. For example:
|
||||||
|
|
||||||
|
```xml
|
||||||
|
<property>
|
||||||
|
<name>hadoop.proxyuser.#HTTPFSUSER#.hosts</name>
|
||||||
|
<value>httpfs-host.foo.com</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hadoop.proxyuser.#HTTPFSUSER#.groups</name>
|
||||||
|
<value>*</value>
|
||||||
|
</property>
|
||||||
|
```
|
||||||
|
|
||||||
|
IMPORTANT: Replace `#HTTPFSUSER#` with the Unix user that will start the HttpFS server.
|
||||||
|
|
||||||
|
Restart Hadoop
|
||||||
|
--------------
|
||||||
|
|
||||||
|
You need to restart Hadoop for the proxyuser configuration ot become active.
|
||||||
|
|
||||||
|
Start/Stop HttpFS
|
||||||
|
-----------------
|
||||||
|
|
||||||
|
To start/stop HttpFS use HttpFS's sbin/httpfs.sh script. For example:
|
||||||
|
|
||||||
|
$ sbin/httpfs.sh start
|
||||||
|
|
||||||
|
NOTE: Invoking the script without any parameters list all possible parameters (start, stop, run, etc.). The `httpfs.sh` script is a wrapper for Tomcat's `catalina.sh` script that sets the environment variables and Java System properties required to run HttpFS server.
|
||||||
|
|
||||||
|
Test HttpFS is working
|
||||||
|
----------------------
|
||||||
|
|
||||||
|
~ $ curl -i "http://<HTTPFSHOSTNAME>:14000?user.name=babu&op=homedir"
|
||||||
|
HTTP/1.1 200 OK
|
||||||
|
Content-Type: application/json
|
||||||
|
Transfer-Encoding: chunked
|
||||||
|
|
||||||
|
{"homeDir":"http:\/\/<HTTPFS_HOST>:14000\/user\/babu"}
|
||||||
|
|
||||||
|
Embedded Tomcat Configuration
|
||||||
|
-----------------------------
|
||||||
|
|
||||||
|
To configure the embedded Tomcat go to the `tomcat/conf`.
|
||||||
|
|
||||||
|
HttpFS preconfigures the HTTP and Admin ports in Tomcat's `server.xml` to 14000 and 14001.
|
||||||
|
|
||||||
|
Tomcat logs are also preconfigured to go to HttpFS's `logs/` directory.
|
||||||
|
|
||||||
|
The following environment variables (which can be set in HttpFS's `etc/hadoop/httpfs-env.sh` script) can be used to alter those values:
|
||||||
|
|
||||||
|
* HTTPFS\_HTTP\_PORT
|
||||||
|
|
||||||
|
* HTTPFS\_ADMIN\_PORT
|
||||||
|
|
||||||
|
* HADOOP\_LOG\_DIR
|
||||||
|
|
||||||
|
HttpFS Configuration
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
HttpFS supports the following [configuration properties](./httpfs-default.html) in the HttpFS's `etc/hadoop/httpfs-site.xml` configuration file.
|
||||||
|
|
||||||
|
HttpFS over HTTPS (SSL)
|
||||||
|
-----------------------
|
||||||
|
|
||||||
|
To configure HttpFS to work over SSL edit the [httpfs-env.sh](#httpfs-env.sh) script in the configuration directory setting the [HTTPFS\_SSL\_ENABLED](#HTTPFS_SSL_ENABLED) to [true](#true).
|
||||||
|
|
||||||
|
In addition, the following 2 properties may be defined (shown with default values):
|
||||||
|
|
||||||
|
* HTTPFS\_SSL\_KEYSTORE\_FILE=$HOME/.keystore
|
||||||
|
|
||||||
|
* HTTPFS\_SSL\_KEYSTORE\_PASS=password
|
||||||
|
|
||||||
|
In the HttpFS `tomcat/conf` directory, replace the `server.xml` file with the `ssl-server.xml` file.
|
||||||
|
|
||||||
|
You need to create an SSL certificate for the HttpFS server. As the `httpfs` Unix user, using the Java `keytool` command to create the SSL certificate:
|
||||||
|
|
||||||
|
$ keytool -genkey -alias tomcat -keyalg RSA
|
||||||
|
|
||||||
|
You will be asked a series of questions in an interactive prompt. It will create the keystore file, which will be named **.keystore** and located in the `httpfs` user home directory.
|
||||||
|
|
||||||
|
The password you enter for "keystore password" must match the value of the `HTTPFS_SSL_KEYSTORE_PASS` environment variable set in the `httpfs-env.sh` script in the configuration directory.
|
||||||
|
|
||||||
|
The answer to "What is your first and last name?" (i.e. "CN") must be the hostname of the machine where the HttpFS Server will be running.
|
||||||
|
|
||||||
|
Start HttpFS. It should work over HTTPS.
|
||||||
|
|
||||||
|
Using the Hadoop `FileSystem` API or the Hadoop FS shell, use the `swebhdfs://` scheme. Make sure the JVM is picking up the truststore containing the public key of the SSL certificate if using a self-signed certificate.
|
|
@ -0,0 +1,62 @@
|
||||||
|
<!---
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. See accompanying LICENSE file.
|
||||||
|
-->
|
||||||
|
|
||||||
|
Hadoop HDFS over HTTP - Using HTTP Tools
|
||||||
|
========================================
|
||||||
|
|
||||||
|
Security
|
||||||
|
--------
|
||||||
|
|
||||||
|
Out of the box HttpFS supports both pseudo authentication and Kerberos HTTP SPNEGO authentication.
|
||||||
|
|
||||||
|
### Pseudo Authentication
|
||||||
|
|
||||||
|
With pseudo authentication the user name must be specified in the `user.name=<USERNAME>` query string parameter of a HttpFS URL. For example:
|
||||||
|
|
||||||
|
$ curl "http://<HTTFS_HOST>:14000/webhdfs/v1?op=homedir&user.name=babu"
|
||||||
|
|
||||||
|
### Kerberos HTTP SPNEGO Authentication
|
||||||
|
|
||||||
|
Kerberos HTTP SPNEGO authentication requires a tool or library supporting Kerberos HTTP SPNEGO protocol.
|
||||||
|
|
||||||
|
IMPORTANT: If using `curl`, the `curl` version being used must support GSS (`curl -V` prints out 'GSS' if it supports it).
|
||||||
|
|
||||||
|
For example:
|
||||||
|
|
||||||
|
$ kinit
|
||||||
|
Please enter the password for user@LOCALHOST:
|
||||||
|
$ curl --negotiate -u foo "http://<HTTPFS_HOST>:14000/webhdfs/v1?op=homedir"
|
||||||
|
Enter host password for user 'foo':
|
||||||
|
|
||||||
|
NOTE: the `-u USER` option is required by the `--negotiate` but it is not used. Use any value as `USER` and when asked for the password press [ENTER] as the password value is ignored.
|
||||||
|
|
||||||
|
### Remembering Who I Am (Establishing an Authenticated Session)
|
||||||
|
|
||||||
|
As most authentication mechanisms, Hadoop HTTP authentication authenticates users once and issues a short-lived authentication token to be presented in subsequent requests. This authentication token is a signed HTTP Cookie.
|
||||||
|
|
||||||
|
When using tools like `curl`, the authentication token must be stored on the first request doing authentication, and submitted in subsequent requests. To do this with curl the `-b` and `-c` options to save and send HTTP Cookies must be used.
|
||||||
|
|
||||||
|
For example, the first request doing authentication should save the received HTTP Cookies.
|
||||||
|
|
||||||
|
Using Pseudo Authentication:
|
||||||
|
|
||||||
|
$ curl -c ~/.httpfsauth "http://<HTTPFS_HOST>:14000/webhdfs/v1?op=homedir&user.name=foo"
|
||||||
|
|
||||||
|
Using Kerberos HTTP SPNEGO authentication:
|
||||||
|
|
||||||
|
$ curl --negotiate -u foo -c ~/.httpfsauth "http://<HTTPFS_HOST>:14000/webhdfs/v1?op=homedir"
|
||||||
|
|
||||||
|
Then, subsequent requests forward the previously received HTTP Cookie:
|
||||||
|
|
||||||
|
$ curl -b ~/.httpfsauth "http://<HTTPFS_HOST>:14000/webhdfs/v1?op=liststatus"
|
|
@ -0,0 +1,52 @@
|
||||||
|
<!---
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. See accompanying LICENSE file.
|
||||||
|
-->
|
||||||
|
|
||||||
|
Hadoop HDFS over HTTP - Documentation Sets
|
||||||
|
==========================================
|
||||||
|
|
||||||
|
HttpFS is a server that provides a REST HTTP gateway supporting all HDFS File System operations (read and write). And it is inteoperable with the **webhdfs** REST HTTP API.
|
||||||
|
|
||||||
|
HttpFS can be used to transfer data between clusters running different versions of Hadoop (overcoming RPC versioning issues), for example using Hadoop DistCP.
|
||||||
|
|
||||||
|
HttpFS can be used to access data in HDFS on a cluster behind of a firewall (the HttpFS server acts as a gateway and is the only system that is allowed to cross the firewall into the cluster).
|
||||||
|
|
||||||
|
HttpFS can be used to access data in HDFS using HTTP utilities (such as curl and wget) and HTTP libraries Perl from other languages than Java.
|
||||||
|
|
||||||
|
The **webhdfs** client FileSytem implementation can be used to access HttpFS using the Hadoop filesystem command (`hadoop fs`) line tool as well as from Java aplications using the Hadoop FileSystem Java API.
|
||||||
|
|
||||||
|
HttpFS has built-in security supporting Hadoop pseudo authentication and HTTP SPNEGO Kerberos and other pluggable authentication mechanims. It also provides Hadoop proxy user support.
|
||||||
|
|
||||||
|
How Does HttpFS Works?
|
||||||
|
----------------------
|
||||||
|
|
||||||
|
HttpFS is a separate service from Hadoop NameNode.
|
||||||
|
|
||||||
|
HttpFS itself is Java web-application and it runs using a preconfigured Tomcat bundled with HttpFS binary distribution.
|
||||||
|
|
||||||
|
HttpFS HTTP web-service API calls are HTTP REST calls that map to a HDFS file system operation. For example, using the `curl` Unix command:
|
||||||
|
|
||||||
|
* `$ curl http://httpfs-host:14000/webhdfs/v1/user/foo/README.txt` returns the contents of the HDFS `/user/foo/README.txt` file.
|
||||||
|
|
||||||
|
* `$ curl http://httpfs-host:14000/webhdfs/v1/user/foo?op=list` returns the contents of the HDFS `/user/foo` directory in JSON format.
|
||||||
|
|
||||||
|
* `$ curl -X POST http://httpfs-host:14000/webhdfs/v1/user/foo/bar?op=mkdirs` creates the HDFS `/user/foo.bar` directory.
|
||||||
|
|
||||||
|
User and Developer Documentation
|
||||||
|
--------------------------------
|
||||||
|
|
||||||
|
* [HttpFS Server Setup](./ServerSetup.html)
|
||||||
|
|
||||||
|
* [Using HTTP Tools](./UsingHttpTools.html)
|
||||||
|
|
||||||
|
|
|
@ -141,6 +141,8 @@ Trunk (Unreleased)
|
||||||
HDFS-7668. Convert site documentation from apt to markdown (Masatake
|
HDFS-7668. Convert site documentation from apt to markdown (Masatake
|
||||||
Iwasaki via aw)
|
Iwasaki via aw)
|
||||||
|
|
||||||
|
HDFS-7460. Rewrite httpfs to use new shell framework (John Smith via aw)
|
||||||
|
|
||||||
OPTIMIZATIONS
|
OPTIMIZATIONS
|
||||||
|
|
||||||
BUG FIXES
|
BUG FIXES
|
||||||
|
|
Loading…
Reference in New Issue