HDFS-7460. Rewrite httpfs to use new shell framework (John Smith via aw)
This commit is contained in:
parent
606f5b517f
commit
8c4f76aa20
|
@ -524,7 +524,7 @@
|
|||
<copy file="${basedir}/src/main/tomcat/server.xml"
|
||||
toDir="${httpfs.tomcat.dist.dir}/conf"/>
|
||||
<delete file="${httpfs.tomcat.dist.dir}/conf/ssl-server.xml"/>
|
||||
<copy file="${basedir}/src/main/tomcat/ssl-server.xml"
|
||||
<copy file="${basedir}/src/main/tomcat/ssl-server.xml.conf"
|
||||
toDir="${httpfs.tomcat.dist.dir}/conf"/>
|
||||
<delete file="${httpfs.tomcat.dist.dir}/conf/logging.properties"/>
|
||||
<copy file="${basedir}/src/main/tomcat/logging.properties"
|
||||
|
|
|
@ -14,40 +14,59 @@
|
|||
#
|
||||
|
||||
# Set httpfs specific environment variables here.
|
||||
|
||||
# Settings for the Embedded Tomcat that runs HttpFS
|
||||
# Java System properties for HttpFS should be specified in this variable
|
||||
#
|
||||
# export CATALINA_OPTS=
|
||||
|
||||
# HttpFS logs directory
|
||||
# hadoop-env.sh is read prior to this file.
|
||||
#
|
||||
# export HTTPFS_LOG=${HTTPFS_HOME}/logs
|
||||
|
||||
# HttpFS temporary directory
|
||||
# HTTPFS temporary directory
|
||||
#
|
||||
# export HTTPFS_TEMP=${HTTPFS_HOME}/temp
|
||||
# export HTTPFS_TEMP=${HADOOP_PREFIX}/temp
|
||||
|
||||
# The HTTP port used by HttpFS
|
||||
# The HTTP port used by HTTPFS
|
||||
#
|
||||
# export HTTPFS_HTTP_PORT=14000
|
||||
|
||||
# The Admin port used by HttpFS
|
||||
# The Admin port used by HTTPFS
|
||||
#
|
||||
# export HTTPFS_ADMIN_PORT=`expr ${HTTPFS_HTTP_PORT} + 1`
|
||||
# export HTTPFS_ADMIN_PORT=$((HTTPFS_HTTP_PORT + 1))
|
||||
|
||||
# The maximum number of Tomcat handler threads
|
||||
#
|
||||
# export HTTPFS_MAX_THREADS=1000
|
||||
|
||||
# The hostname HttpFS server runs on
|
||||
#
|
||||
# export HTTPFS_HTTP_HOSTNAME=`hostname -f`
|
||||
|
||||
# Indicates if HttpFS is using SSL
|
||||
#
|
||||
# export HTTPFS_SSL_ENABLED=false
|
||||
# export HTTPFS_HTTP_HOSTNAME=$(hostname -f)
|
||||
|
||||
# The location of the SSL keystore if using SSL
|
||||
#
|
||||
# export HTTPFS_SSL_KEYSTORE_FILE=${HOME}/.keystore
|
||||
|
||||
#
|
||||
# The password of the SSL keystore if using SSL
|
||||
#
|
||||
# export HTTPFS_SSL_KEYSTORE_PASS=password
|
||||
|
||||
##
|
||||
## Tomcat specific settings
|
||||
##
|
||||
#
|
||||
# Location of tomcat
|
||||
#
|
||||
# export HTTPFS_CATALINA_HOME=${HADOOP_PREFIX}/share/hadoop/httpfs/tomcat
|
||||
|
||||
# Java System properties for HTTPFS should be specified in this variable.
|
||||
# The java.library.path and hadoop.home.dir properties are automatically
|
||||
# configured. In order to supplement java.library.path,
|
||||
# one should add to the JAVA_LIBRARY_PATH env var.
|
||||
#
|
||||
# export CATALINA_OPTS=
|
||||
|
||||
# PID file
|
||||
#
|
||||
# export CATALINA_PID=${HADOOP_PID_DIR}/hadoop-${HADOOP_IDENT_STRING}-httpfs.pid
|
||||
|
||||
# Output file
|
||||
#
|
||||
# export CATALINA_OUT=${HTTPFS_LOG}/hadoop-${HADOOP_IDENT_STRING}-httpfs-${HOSTNAME}.out
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#!/usr/bin/env bash
|
||||
#!/bin/bash
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
@ -13,183 +13,63 @@
|
|||
# limitations under the License.
|
||||
#
|
||||
|
||||
# resolve links - $0 may be a softlink
|
||||
PRG="${0}"
|
||||
function hadoop_subproject_init
|
||||
{
|
||||
local this
|
||||
local binparent
|
||||
local varlist
|
||||
|
||||
while [ -h "${PRG}" ]; do
|
||||
ls=`ls -ld "${PRG}"`
|
||||
link=`expr "$ls" : '.*-> \(.*\)$'`
|
||||
if expr "$link" : '/.*' > /dev/null; then
|
||||
PRG="$link"
|
||||
else
|
||||
PRG=`dirname "${PRG}"`/"$link"
|
||||
if [[ -z "${HADOOP_HTTPFS_ENV_PROCESSED}" ]]; then
|
||||
if [[ -e "${HADOOP_CONF_DIR}/httpfs-env.sh" ]]; then
|
||||
. "${HADOOP_CONF_DIR}/httpfs-env.sh"
|
||||
export HADOOP_HTTPFS_ENV_PROCESSED=true
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
BASEDIR=`dirname ${PRG}`
|
||||
BASEDIR=`cd ${BASEDIR}/..;pwd`
|
||||
export HADOOP_CATALINA_PREFIX=httpfs
|
||||
|
||||
export HADOOP_CATALINA_TEMP="${HTTPFS_TEMP:-${HADOOP_PREFIX}/temp}"
|
||||
|
||||
function print() {
|
||||
if [ "${HTTPFS_SILENT}" != "true" ]; then
|
||||
echo "$@"
|
||||
hadoop_deprecate_envvar HTTPFS_CONFIG HADOOP_CONF_DIR
|
||||
|
||||
hadoop_deprecate_envvar HTTPFS_LOG HADOOP_LOG_DIR
|
||||
|
||||
export HADOOP_CATALINA_CONFIG="${HADOOP_CONF_DIR}"
|
||||
export HADOOP_CATALINA_LOG="${HADOOP_LOG_DIR}"
|
||||
|
||||
export HTTPFS_HTTP_HOSTNAME=${HTTPFS_HTTP_HOSTNAME:-$(hostname -f)}
|
||||
|
||||
export HADOOP_CATALINA_HTTP_PORT="${HTTPFS_HTTP_PORT:-14000}"
|
||||
export HADOOP_CATALINA_ADMIN_PORT="${HTTPFS_ADMIN_PORT:-$((HADOOP_CATALINA_HTTP_PORT+1))}"
|
||||
export HADOOP_CATALINA_MAX_THREADS="${HTTPFS_MAX_THREADS:-150}"
|
||||
|
||||
export HTTPFS_SSL_ENABLED=${HTTPFS_SSL_ENABLED:-false}
|
||||
|
||||
export HADOOP_CATALINA_SSL_KEYSTORE_FILE="${HTTPFS_SSL_KEYSTORE_FILE:-${HOME}/.keystore}"
|
||||
|
||||
export CATALINA_BASE="${CATALINA_BASE:-${HADOOP_PREFIX}/share/hadoop/httpfs/tomcat}"
|
||||
export HADOOP_CATALINA_HOME="${HTTPFS_CATALINA_HOME:-${CATALINA_BASE}}"
|
||||
|
||||
export CATALINA_OUT="${CATALINA_OUT:-${HADOOP_LOG_DIR}/hadoop-${HADOOP_IDENT_STRING}-httpfs-${HOSTNAME}.out}"
|
||||
|
||||
export CATALINA_PID="${CATALINA_PID:-${HADOOP_PID_DIR}/hadoop-${HADOOP_IDENT_STRING}-httpfs.pid}"
|
||||
|
||||
if [[ -n "${HADOOP_SHELL_SCRIPT_DEBUG}" ]]; then
|
||||
varlist=$(env | egrep '(^HTTPFS|^CATALINA)' | cut -f1 -d= | grep -v _PASS)
|
||||
for i in ${varlist}; do
|
||||
hadoop_debug "Setting ${i} to ${!i}"
|
||||
done
|
||||
fi
|
||||
}
|
||||
|
||||
# if HTTPFS_HOME is already set warn it will be ignored
|
||||
#
|
||||
if [ "${HTTPFS_HOME}" != "" ]; then
|
||||
echo "WARNING: current setting of HTTPFS_HOME ignored"
|
||||
fi
|
||||
|
||||
print
|
||||
|
||||
# setting HTTPFS_HOME to the installation dir, it cannot be changed
|
||||
#
|
||||
export HTTPFS_HOME=${BASEDIR}
|
||||
httpfs_home=${HTTPFS_HOME}
|
||||
print "Setting HTTPFS_HOME: ${HTTPFS_HOME}"
|
||||
|
||||
# if the installation has a env file, source it
|
||||
# this is for native packages installations
|
||||
#
|
||||
if [ -e "${HTTPFS_HOME}/bin/httpfs-env.sh" ]; then
|
||||
print "Sourcing: ${HTTPFS_HOME}/bin/httpfs-env.sh"
|
||||
source ${HTTPFS_HOME}/bin/httpfs-env.sh
|
||||
grep "^ *export " ${HTTPFS_HOME}/bin/httpfs-env.sh | sed 's/ *export/ setting/'
|
||||
fi
|
||||
|
||||
# verify that the sourced env file didn't change HTTPFS_HOME
|
||||
# if so, warn and revert
|
||||
#
|
||||
if [ "${HTTPFS_HOME}" != "${httpfs_home}" ]; then
|
||||
print "WARN: HTTPFS_HOME resetting to ''${HTTPFS_HOME}'' ignored"
|
||||
export HTTPFS_HOME=${httpfs_home}
|
||||
print " using HTTPFS_HOME: ${HTTPFS_HOME}"
|
||||
fi
|
||||
|
||||
if [ "${HTTPFS_CONFIG}" = "" ]; then
|
||||
export HTTPFS_CONFIG=${HTTPFS_HOME}/etc/hadoop
|
||||
print "Setting HTTPFS_CONFIG: ${HTTPFS_CONFIG}"
|
||||
if [[ -n "${HADOOP_COMMON_HOME}" ]] &&
|
||||
[[ -e "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh" ]]; then
|
||||
. "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh"
|
||||
elif [[ -e "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]]; then
|
||||
. "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh"
|
||||
elif [[ -e "${HADOOP_PREFIX}/libexec/hadoop-config.sh" ]]; then
|
||||
. "${HADOOP_PREFIX}/libexec/hadoop-config.sh"
|
||||
else
|
||||
print "Using HTTPFS_CONFIG: ${HTTPFS_CONFIG}"
|
||||
echo "ERROR: Hadoop common not found." 2>&1
|
||||
exit 1
|
||||
fi
|
||||
httpfs_config=${HTTPFS_CONFIG}
|
||||
|
||||
# if the configuration dir has a env file, source it
|
||||
#
|
||||
if [ -e "${HTTPFS_CONFIG}/httpfs-env.sh" ]; then
|
||||
print "Sourcing: ${HTTPFS_CONFIG}/httpfs-env.sh"
|
||||
source ${HTTPFS_CONFIG}/httpfs-env.sh
|
||||
grep "^ *export " ${HTTPFS_CONFIG}/httpfs-env.sh | sed 's/ *export/ setting/'
|
||||
fi
|
||||
|
||||
# verify that the sourced env file didn't change HTTPFS_HOME
|
||||
# if so, warn and revert
|
||||
#
|
||||
if [ "${HTTPFS_HOME}" != "${httpfs_home}" ]; then
|
||||
echo "WARN: HTTPFS_HOME resetting to ''${HTTPFS_HOME}'' ignored"
|
||||
export HTTPFS_HOME=${httpfs_home}
|
||||
fi
|
||||
|
||||
# verify that the sourced env file didn't change HTTPFS_CONFIG
|
||||
# if so, warn and revert
|
||||
#
|
||||
if [ "${HTTPFS_CONFIG}" != "${httpfs_config}" ]; then
|
||||
echo "WARN: HTTPFS_CONFIG resetting to ''${HTTPFS_CONFIG}'' ignored"
|
||||
export HTTPFS_CONFIG=${httpfs_config}
|
||||
fi
|
||||
|
||||
if [ "${HTTPFS_LOG}" = "" ]; then
|
||||
export HTTPFS_LOG=${HTTPFS_HOME}/logs
|
||||
print "Setting HTTPFS_LOG: ${HTTPFS_LOG}"
|
||||
else
|
||||
print "Using HTTPFS_LOG: ${HTTPFS_LOG}"
|
||||
fi
|
||||
|
||||
if [ ! -f ${HTTPFS_LOG} ]; then
|
||||
mkdir -p ${HTTPFS_LOG}
|
||||
fi
|
||||
|
||||
if [ "${HTTPFS_TEMP}" = "" ]; then
|
||||
export HTTPFS_TEMP=${HTTPFS_HOME}/temp
|
||||
print "Setting HTTPFS_TEMP: ${HTTPFS_TEMP}"
|
||||
else
|
||||
print "Using HTTPFS_TEMP: ${HTTPFS_TEMP}"
|
||||
fi
|
||||
|
||||
if [ ! -f ${HTTPFS_TEMP} ]; then
|
||||
mkdir -p ${HTTPFS_TEMP}
|
||||
fi
|
||||
|
||||
if [ "${HTTPFS_HTTP_PORT}" = "" ]; then
|
||||
export HTTPFS_HTTP_PORT=14000
|
||||
print "Setting HTTPFS_HTTP_PORT: ${HTTPFS_HTTP_PORT}"
|
||||
else
|
||||
print "Using HTTPFS_HTTP_PORT: ${HTTPFS_HTTP_PORT}"
|
||||
fi
|
||||
|
||||
if [ "${HTTPFS_ADMIN_PORT}" = "" ]; then
|
||||
export HTTPFS_ADMIN_PORT=`expr $HTTPFS_HTTP_PORT + 1`
|
||||
print "Setting HTTPFS_ADMIN_PORT: ${HTTPFS_ADMIN_PORT}"
|
||||
else
|
||||
print "Using HTTPFS_ADMIN_PORT: ${HTTPFS_ADMIN_PORT}"
|
||||
fi
|
||||
|
||||
if [ "${HTTPFS_HTTP_HOSTNAME}" = "" ]; then
|
||||
export HTTPFS_HTTP_HOSTNAME=`hostname -f`
|
||||
print "Setting HTTPFS_HTTP_HOSTNAME: ${HTTPFS_HTTP_HOSTNAME}"
|
||||
else
|
||||
print "Using HTTPFS_HTTP_HOSTNAME: ${HTTPFS_HTTP_HOSTNAME}"
|
||||
fi
|
||||
|
||||
if [ "${HTTPFS_SSL_ENABLED}" = "" ]; then
|
||||
export HTTPFS_SSL_ENABLED="false"
|
||||
print "Setting HTTPFS_SSL_ENABLED: ${HTTPFS_SSL_ENABLED}"
|
||||
else
|
||||
print "Using HTTPFS_SSL_ENABLED: ${HTTPFS_SSL_ENABLED}"
|
||||
fi
|
||||
|
||||
if [ "${HTTPFS_SSL_KEYSTORE_FILE}" = "" ]; then
|
||||
export HTTPFS_SSL_KEYSTORE_FILE=${HOME}/.keystore
|
||||
print "Setting HTTPFS_SSL_KEYSTORE_FILE: ${HTTPFS_SSL_KEYSTORE_FILE}"
|
||||
else
|
||||
print "Using HTTPFS_SSL_KEYSTORE_FILE: ${HTTPFS_SSL_KEYSTORE_FILE}"
|
||||
fi
|
||||
|
||||
if [ "${HTTPFS_SSL_KEYSTORE_PASS}" = "" ]; then
|
||||
export HTTPFS_SSL_KEYSTORE_PASS=password
|
||||
print "Setting HTTPFS_SSL_KEYSTORE_PASS: ${HTTPFS_SSL_KEYSTORE_PASS}"
|
||||
else
|
||||
print "Using HTTPFS_SSL_KEYSTORE_PASS: ${HTTPFS_SSL_KEYSTORE_PASS}"
|
||||
fi
|
||||
|
||||
if [ "${CATALINA_BASE}" = "" ]; then
|
||||
export CATALINA_BASE=${HTTPFS_HOME}/share/hadoop/httpfs/tomcat
|
||||
print "Setting CATALINA_BASE: ${CATALINA_BASE}"
|
||||
else
|
||||
print "Using CATALINA_BASE: ${CATALINA_BASE}"
|
||||
fi
|
||||
|
||||
if [ "${HTTPFS_CATALINA_HOME}" = "" ]; then
|
||||
export HTTPFS_CATALINA_HOME=${CATALINA_BASE}
|
||||
print "Setting HTTPFS_CATALINA_HOME: ${HTTPFS_CATALINA_HOME}"
|
||||
else
|
||||
print "Using HTTPFS_CATALINA_HOME: ${HTTPFS_CATALINA_HOME}"
|
||||
fi
|
||||
|
||||
if [ "${CATALINA_OUT}" = "" ]; then
|
||||
export CATALINA_OUT=${HTTPFS_LOG}/httpfs-catalina.out
|
||||
print "Setting CATALINA_OUT: ${CATALINA_OUT}"
|
||||
else
|
||||
print "Using CATALINA_OUT: ${CATALINA_OUT}"
|
||||
fi
|
||||
|
||||
if [ "${CATALINA_PID}" = "" ]; then
|
||||
export CATALINA_PID=/tmp/httpfs.pid
|
||||
print "Setting CATALINA_PID: ${CATALINA_PID}"
|
||||
else
|
||||
print "Using CATALINA_PID: ${CATALINA_PID}"
|
||||
fi
|
||||
|
||||
print
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#!/usr/bin/env bash
|
||||
#!/bin/bash
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
@ -13,53 +13,99 @@
|
|||
# limitations under the License.
|
||||
#
|
||||
|
||||
# resolve links - $0 may be a softlink
|
||||
PRG="${0}"
|
||||
function hadoop_usage()
|
||||
{
|
||||
echo "Usage: httpfs.sh [--config confdir] [--debug] --daemon start|status|stop"
|
||||
echo " httpfs.sh [--config confdir] [--debug] COMMAND"
|
||||
echo " where COMMAND is one of:"
|
||||
echo " run Start httpfs in the current window"
|
||||
echo " run -security Start in the current window with security manager"
|
||||
echo " start Start httpfs in a separate window"
|
||||
echo " start -security Start in a separate window with security manager"
|
||||
echo " status Return the LSB compliant status"
|
||||
echo " stop Stop httpfs, waiting up to 5 seconds for the process to end"
|
||||
echo " stop n Stop httpfs, waiting up to n seconds for the process to end"
|
||||
echo " stop -force Stop httpfs, wait up to 5 seconds and then use kill -KILL if still running"
|
||||
echo " stop n -force Stop httpfs, wait up to n seconds and then use kill -KILL if still running"
|
||||
}
|
||||
|
||||
while [ -h "${PRG}" ]; do
|
||||
ls=`ls -ld "${PRG}"`
|
||||
link=`expr "$ls" : '.*-> \(.*\)$'`
|
||||
if expr "$link" : '/.*' > /dev/null; then
|
||||
PRG="$link"
|
||||
else
|
||||
PRG=`dirname "${PRG}"`/"$link"
|
||||
fi
|
||||
done
|
||||
# let's locate libexec...
|
||||
if [[ -n "${HADOOP_PREFIX}" ]]; then
|
||||
DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
|
||||
else
|
||||
this="${BASH_SOURCE-$0}"
|
||||
bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
|
||||
DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
|
||||
fi
|
||||
|
||||
BASEDIR=`dirname ${PRG}`
|
||||
BASEDIR=`cd ${BASEDIR}/..;pwd`
|
||||
HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
|
||||
# shellcheck disable=SC2034
|
||||
HADOOP_NEW_CONFIG=true
|
||||
if [[ -f "${HADOOP_LIBEXEC_DIR}/httpfs-config.sh" ]]; then
|
||||
. "${HADOOP_LIBEXEC_DIR}/httpfs-config.sh"
|
||||
else
|
||||
echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/httpfs-config.sh." 2>&1
|
||||
exit 1
|
||||
fi
|
||||
|
||||
source ${HADOOP_LIBEXEC_DIR:-${BASEDIR}/libexec}/httpfs-config.sh
|
||||
|
||||
# The Java System property 'httpfs.http.port' it is not used by HttpFS,
|
||||
# The Java System property 'httpfs.http.port' it is not used by Kms,
|
||||
# it is used in Tomcat's server.xml configuration file
|
||||
#
|
||||
print "Using CATALINA_OPTS: ${CATALINA_OPTS}"
|
||||
|
||||
catalina_opts="-Dhttpfs.home.dir=${HTTPFS_HOME}";
|
||||
catalina_opts="${catalina_opts} -Dhttpfs.config.dir=${HTTPFS_CONFIG}";
|
||||
catalina_opts="${catalina_opts} -Dhttpfs.log.dir=${HTTPFS_LOG}";
|
||||
catalina_opts="${catalina_opts} -Dhttpfs.temp.dir=${HTTPFS_TEMP}";
|
||||
catalina_opts="${catalina_opts} -Dhttpfs.admin.port=${HTTPFS_ADMIN_PORT}";
|
||||
catalina_opts="${catalina_opts} -Dhttpfs.http.port=${HTTPFS_HTTP_PORT}";
|
||||
catalina_opts="${catalina_opts} -Dhttpfs.http.hostname=${HTTPFS_HTTP_HOSTNAME}";
|
||||
catalina_opts="${catalina_opts} -Dhttpfs.ssl.enabled=${HTTPFS_SSL_ENABLED}";
|
||||
catalina_opts="${catalina_opts} -Dhttpfs.ssl.keystore.file=${HTTPFS_SSL_KEYSTORE_FILE}";
|
||||
catalina_opts="${catalina_opts} -Dhttpfs.ssl.keystore.pass=${HTTPFS_SSL_KEYSTORE_PASS}";
|
||||
# Mask the trustStorePassword
|
||||
# shellcheck disable=SC2086
|
||||
CATALINA_OPTS_DISP="$(echo ${CATALINA_OPTS} | sed -e 's/trustStorePassword=[^ ]*/trustStorePassword=***/')"
|
||||
|
||||
print "Adding to CATALINA_OPTS: ${catalina_opts}"
|
||||
hadoop_debug "Using CATALINA_OPTS: ${CATALINA_OPTS_DISP}"
|
||||
|
||||
export CATALINA_OPTS="${CATALINA_OPTS} ${catalina_opts}"
|
||||
# We're using hadoop-common, so set up some stuff it might need:
|
||||
hadoop_finalize
|
||||
|
||||
hadoop_verify_logdir
|
||||
|
||||
if [[ $# = 0 ]]; then
|
||||
case "${HADOOP_DAEMON_MODE}" in
|
||||
status)
|
||||
hadoop_status_daemon "${CATALINA_PID}"
|
||||
exit
|
||||
;;
|
||||
start)
|
||||
set -- "start"
|
||||
;;
|
||||
stop)
|
||||
set -- "stop"
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
|
||||
hadoop_finalize_catalina_opts
|
||||
export CATALINA_OPTS
|
||||
|
||||
# A bug in catalina.sh script does not use CATALINA_OPTS for stopping the server
|
||||
#
|
||||
if [ "${1}" = "stop" ]; then
|
||||
if [[ "${1}" = "stop" ]]; then
|
||||
export JAVA_OPTS=${CATALINA_OPTS}
|
||||
fi
|
||||
|
||||
if [ "${HTTPFS_SILENT}" != "true" ]; then
|
||||
exec ${HTTPFS_CATALINA_HOME}/bin/catalina.sh "$@"
|
||||
else
|
||||
exec ${HTTPFS_CATALINA_HOME}/bin/catalina.sh "$@" > /dev/null
|
||||
# If ssl, the populate the passwords into ssl-server.xml before starting tomcat
|
||||
#
|
||||
# HTTPFS_SSL_KEYSTORE_PASS is a bit odd.
|
||||
# if undefined, then the if test will not enable ssl on its own
|
||||
# if "", set it to "password".
|
||||
# if custom, use provided password
|
||||
#
|
||||
if [[ -f "${HADOOP_CATALINA_HOME}/conf/ssl-server.xml.conf" ]]; then
|
||||
if [[ -n "${HTTPFS_SSL_KEYSTORE_PASS+x}" ]] || [[ -n "${HTTPFS_SSL_TRUSTSTORE_PASS}" ]]; then
|
||||
export HTTPFS_SSL_KEYSTORE_PASS=${HTTPFS_SSL_KEYSTORE_PASS:-password}
|
||||
sed -e 's/_httpfs_ssl_keystore_pass_/'${HTTPFS_SSL_KEYSTORE_PASS}'/g' \
|
||||
-e 's/_httpfs_ssl_truststore_pass_/'${HTTPFS_SSL_TRUSTSTORE_PASS}'/g' \
|
||||
"${HADOOP_CATALINA_HOME}/conf/ssl-server.xml.conf" \
|
||||
> "${HADOOP_CATALINA_HOME}/conf/ssl-server.xml"
|
||||
chmod 700 "${HADOOP_CATALINA_HOME}/conf/ssl-server.xml" >/dev/null 2>&1
|
||||
fi
|
||||
fi
|
||||
|
||||
hadoop_add_param CATALINA_OPTS -Dhttpfs.http.hostname "-Dhttpfs.http.hostname=${HTTPFS_HOST_NAME}"
|
||||
hadoop_add_param CATALINA_OPTS -Dhttpfs.ssl.enabled "-Dhttpfs.ssl.enabled=${HTTPFS_SSL_ENABLED}"
|
||||
|
||||
exec "${HADOOP_CATALINA_HOME}/bin/catalina.sh" "$@"
|
||||
|
|
|
@ -61,7 +61,7 @@
|
|||
<!--The connectors can use a shared executor, you can define one or more named thread pools-->
|
||||
<!--
|
||||
<Executor name="tomcatThreadPool" namePrefix="catalina-exec-"
|
||||
maxThreads="150" minSpareThreads="4"/>
|
||||
maxThreads="httpfs.max.threads" minSpareThreads="4"/>
|
||||
-->
|
||||
|
||||
<!-- Define a SSL HTTP/1.1 Connector on port 8443
|
||||
|
@ -72,7 +72,7 @@
|
|||
maxThreads="150" scheme="https" secure="true"
|
||||
clientAuth="false" sslEnabledProtocols="TLSv1,SSLv2Hello"
|
||||
keystoreFile="${httpfs.ssl.keystore.file}"
|
||||
keystorePass="${httpfs.ssl.keystore.pass}"/>
|
||||
keystorePass="_httpfs_ssl_keystore_pass_"/>
|
||||
|
||||
<!-- Define an AJP 1.3 Connector on port 8009 -->
|
||||
|
|
@ -1,159 +0,0 @@
|
|||
~~ Licensed under the Apache License, Version 2.0 (the "License");
|
||||
~~ you may not use this file except in compliance with the License.
|
||||
~~ You may obtain a copy of the License at
|
||||
~~
|
||||
~~ http://www.apache.org/licenses/LICENSE-2.0
|
||||
~~
|
||||
~~ Unless required by applicable law or agreed to in writing, software
|
||||
~~ distributed under the License is distributed on an "AS IS" BASIS,
|
||||
~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
~~ See the License for the specific language governing permissions and
|
||||
~~ limitations under the License.
|
||||
|
||||
---
|
||||
Hadoop HDFS over HTTP ${project.version} - Server Setup
|
||||
---
|
||||
---
|
||||
${maven.build.timestamp}
|
||||
|
||||
Hadoop HDFS over HTTP ${project.version} - Server Setup
|
||||
|
||||
This page explains how to quickly setup HttpFS with Pseudo authentication
|
||||
against a Hadoop cluster with Pseudo authentication.
|
||||
|
||||
* Requirements
|
||||
|
||||
* Java 6+
|
||||
|
||||
* Maven 3+
|
||||
|
||||
* Install HttpFS
|
||||
|
||||
+---+
|
||||
~ $ tar xzf httpfs-${project.version}.tar.gz
|
||||
+---+
|
||||
|
||||
* Configure HttpFS
|
||||
|
||||
By default, HttpFS assumes that Hadoop configuration files
|
||||
(<<<core-site.xml & hdfs-site.xml>>>) are in the HttpFS
|
||||
configuration directory.
|
||||
|
||||
If this is not the case, add to the <<<httpfs-site.xml>>> file the
|
||||
<<<httpfs.hadoop.config.dir>>> property set to the location
|
||||
of the Hadoop configuration directory.
|
||||
|
||||
* Configure Hadoop
|
||||
|
||||
Edit Hadoop <<<core-site.xml>>> and defined the Unix user that will
|
||||
run the HttpFS server as a proxyuser. For example:
|
||||
|
||||
+---+
|
||||
...
|
||||
<property>
|
||||
<name>hadoop.proxyuser.#HTTPFSUSER#.hosts</name>
|
||||
<value>httpfs-host.foo.com</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hadoop.proxyuser.#HTTPFSUSER#.groups</name>
|
||||
<value>*</value>
|
||||
</property>
|
||||
...
|
||||
+---+
|
||||
|
||||
IMPORTANT: Replace <<<#HTTPFSUSER#>>> with the Unix user that will
|
||||
start the HttpFS server.
|
||||
|
||||
* Restart Hadoop
|
||||
|
||||
You need to restart Hadoop for the proxyuser configuration ot become
|
||||
active.
|
||||
|
||||
* Start/Stop HttpFS
|
||||
|
||||
To start/stop HttpFS use HttpFS's bin/httpfs.sh script. For example:
|
||||
|
||||
+---+
|
||||
httpfs-${project.version} $ bin/httpfs.sh start
|
||||
+---+
|
||||
|
||||
NOTE: Invoking the script without any parameters list all possible
|
||||
parameters (start, stop, run, etc.). The <<<httpfs.sh>>> script is a wrapper
|
||||
for Tomcat's <<<catalina.sh>>> script that sets the environment variables
|
||||
and Java System properties required to run HttpFS server.
|
||||
|
||||
* Test HttpFS is working
|
||||
|
||||
+---+
|
||||
~ $ curl -i "http://<HTTPFSHOSTNAME>:14000?user.name=babu&op=homedir"
|
||||
HTTP/1.1 200 OK
|
||||
Content-Type: application/json
|
||||
Transfer-Encoding: chunked
|
||||
|
||||
{"homeDir":"http:\/\/<HTTPFS_HOST>:14000\/user\/babu"}
|
||||
+---+
|
||||
|
||||
* Embedded Tomcat Configuration
|
||||
|
||||
To configure the embedded Tomcat go to the <<<tomcat/conf>>>.
|
||||
|
||||
HttpFS preconfigures the HTTP and Admin ports in Tomcat's <<<server.xml>>> to
|
||||
14000 and 14001.
|
||||
|
||||
Tomcat logs are also preconfigured to go to HttpFS's <<<logs/>>> directory.
|
||||
|
||||
The following environment variables (which can be set in HttpFS's
|
||||
<<<conf/httpfs-env.sh>>> script) can be used to alter those values:
|
||||
|
||||
* HTTPFS_HTTP_PORT
|
||||
|
||||
* HTTPFS_ADMIN_PORT
|
||||
|
||||
* HTTPFS_LOG
|
||||
|
||||
* HttpFS Configuration
|
||||
|
||||
HttpFS supports the following {{{./httpfs-default.html}configuration properties}}
|
||||
in the HttpFS's <<<conf/httpfs-site.xml>>> configuration file.
|
||||
|
||||
* HttpFS over HTTPS (SSL)
|
||||
|
||||
To configure HttpFS to work over SSL edit the {{httpfs-env.sh}} script in the
|
||||
configuration directory setting the {{HTTPFS_SSL_ENABLED}} to {{true}}.
|
||||
|
||||
In addition, the following 2 properties may be defined (shown with default
|
||||
values):
|
||||
|
||||
* HTTPFS_SSL_KEYSTORE_FILE=${HOME}/.keystore
|
||||
|
||||
* HTTPFS_SSL_KEYSTORE_PASS=password
|
||||
|
||||
In the HttpFS <<<tomcat/conf>>> directory, replace the <<<server.xml>>> file
|
||||
with the <<<ssl-server.xml>>> file.
|
||||
|
||||
|
||||
You need to create an SSL certificate for the HttpFS server. As the
|
||||
<<<httpfs>>> Unix user, using the Java <<<keytool>>> command to create the
|
||||
SSL certificate:
|
||||
|
||||
+---+
|
||||
$ keytool -genkey -alias tomcat -keyalg RSA
|
||||
+---+
|
||||
|
||||
You will be asked a series of questions in an interactive prompt. It will
|
||||
create the keystore file, which will be named <<.keystore>> and located in the
|
||||
<<<httpfs>>> user home directory.
|
||||
|
||||
The password you enter for "keystore password" must match the value of the
|
||||
<<<HTTPFS_SSL_KEYSTORE_PASS>>> environment variable set in the
|
||||
<<<httpfs-env.sh>>> script in the configuration directory.
|
||||
|
||||
The answer to "What is your first and last name?" (i.e. "CN") must be the
|
||||
hostname of the machine where the HttpFS Server will be running.
|
||||
|
||||
Start HttpFS. It should work over HTTPS.
|
||||
|
||||
Using the Hadoop <<<FileSystem>>> API or the Hadoop FS shell, use the
|
||||
<<<swebhdfs://>>> scheme. Make sure the JVM is picking up the truststore
|
||||
containing the public key of the SSL certificate if using a self-signed
|
||||
certificate.
|
|
@ -1,87 +0,0 @@
|
|||
~~ Licensed under the Apache License, Version 2.0 (the "License");
|
||||
~~ you may not use this file except in compliance with the License.
|
||||
~~ You may obtain a copy of the License at
|
||||
~~
|
||||
~~ http://www.apache.org/licenses/LICENSE-2.0
|
||||
~~
|
||||
~~ Unless required by applicable law or agreed to in writing, software
|
||||
~~ distributed under the License is distributed on an "AS IS" BASIS,
|
||||
~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
~~ See the License for the specific language governing permissions and
|
||||
~~ limitations under the License.
|
||||
|
||||
---
|
||||
Hadoop HDFS over HTTP ${project.version} - Using HTTP Tools
|
||||
---
|
||||
---
|
||||
${maven.build.timestamp}
|
||||
|
||||
Hadoop HDFS over HTTP ${project.version} - Using HTTP Tools
|
||||
|
||||
* Security
|
||||
|
||||
Out of the box HttpFS supports both pseudo authentication and Kerberos HTTP
|
||||
SPNEGO authentication.
|
||||
|
||||
** Pseudo Authentication
|
||||
|
||||
With pseudo authentication the user name must be specified in the
|
||||
<<<user.name=\<USERNAME\>>>> query string parameter of a HttpFS URL.
|
||||
For example:
|
||||
|
||||
+---+
|
||||
$ curl "http://<HTTFS_HOST>:14000/webhdfs/v1?op=homedir&user.name=babu"
|
||||
+---+
|
||||
|
||||
** Kerberos HTTP SPNEGO Authentication
|
||||
|
||||
Kerberos HTTP SPNEGO authentication requires a tool or library supporting
|
||||
Kerberos HTTP SPNEGO protocol.
|
||||
|
||||
IMPORTANT: If using <<<curl>>>, the <<<curl>>> version being used must support
|
||||
GSS (<<<curl -V>>> prints out 'GSS' if it supports it).
|
||||
|
||||
For example:
|
||||
|
||||
+---+
|
||||
$ kinit
|
||||
Please enter the password for tucu@LOCALHOST:
|
||||
$ curl --negotiate -u foo "http://<HTTPFS_HOST>:14000/webhdfs/v1?op=homedir"
|
||||
Enter host password for user 'foo':
|
||||
+---+
|
||||
|
||||
NOTE: the <<<-u USER>>> option is required by the <<<--negotiate>>> but it is
|
||||
not used. Use any value as <<<USER>>> and when asked for the password press
|
||||
[ENTER] as the password value is ignored.
|
||||
|
||||
** {Remembering Who I Am} (Establishing an Authenticated Session)
|
||||
|
||||
As most authentication mechanisms, Hadoop HTTP authentication authenticates
|
||||
users once and issues a short-lived authentication token to be presented in
|
||||
subsequent requests. This authentication token is a signed HTTP Cookie.
|
||||
|
||||
When using tools like <<<curl>>>, the authentication token must be stored on
|
||||
the first request doing authentication, and submitted in subsequent requests.
|
||||
To do this with curl the <<<-b>>> and <<<-c>>> options to save and send HTTP
|
||||
Cookies must be used.
|
||||
|
||||
For example, the first request doing authentication should save the received
|
||||
HTTP Cookies.
|
||||
|
||||
Using Pseudo Authentication:
|
||||
|
||||
+---+
|
||||
$ curl -c ~/.httpfsauth "http://<HTTPFS_HOST>:14000/webhdfs/v1?op=homedir&user.name=babu"
|
||||
+---+
|
||||
|
||||
Using Kerberos HTTP SPNEGO authentication:
|
||||
|
||||
+---+
|
||||
$ curl --negotiate -u foo -c ~/.httpfsauth "http://<HTTPFS_HOST>:14000/webhdfs/v1?op=homedir"
|
||||
+---+
|
||||
|
||||
Then, subsequent requests forward the previously received HTTP Cookie:
|
||||
|
||||
+---+
|
||||
$ curl -b ~/.httpfsauth "http://<HTTPFS_HOST>:14000/webhdfs/v1?op=liststatus"
|
||||
+---+
|
|
@ -1,83 +0,0 @@
|
|||
~~ Licensed under the Apache License, Version 2.0 (the "License");
|
||||
~~ you may not use this file except in compliance with the License.
|
||||
~~ You may obtain a copy of the License at
|
||||
~~
|
||||
~~ http://www.apache.org/licenses/LICENSE-2.0
|
||||
~~
|
||||
~~ Unless required by applicable law or agreed to in writing, software
|
||||
~~ distributed under the License is distributed on an "AS IS" BASIS,
|
||||
~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
~~ See the License for the specific language governing permissions and
|
||||
~~ limitations under the License.
|
||||
|
||||
---
|
||||
Hadoop HDFS over HTTP - Documentation Sets ${project.version}
|
||||
---
|
||||
---
|
||||
${maven.build.timestamp}
|
||||
|
||||
Hadoop HDFS over HTTP - Documentation Sets ${project.version}
|
||||
|
||||
HttpFS is a server that provides a REST HTTP gateway supporting all HDFS
|
||||
File System operations (read and write). And it is inteoperable with the
|
||||
<<webhdfs>> REST HTTP API.
|
||||
|
||||
HttpFS can be used to transfer data between clusters running different
|
||||
versions of Hadoop (overcoming RPC versioning issues), for example using
|
||||
Hadoop DistCP.
|
||||
|
||||
HttpFS can be used to access data in HDFS on a cluster behind of a firewall
|
||||
(the HttpFS server acts as a gateway and is the only system that is allowed
|
||||
to cross the firewall into the cluster).
|
||||
|
||||
HttpFS can be used to access data in HDFS using HTTP utilities (such as curl
|
||||
and wget) and HTTP libraries Perl from other languages than Java.
|
||||
|
||||
The <<webhdfs>> client FileSytem implementation can be used to access HttpFS
|
||||
using the Hadoop filesystem command (<<<hadoop fs>>>) line tool as well as
|
||||
from Java aplications using the Hadoop FileSystem Java API.
|
||||
|
||||
HttpFS has built-in security supporting Hadoop pseudo authentication and
|
||||
HTTP SPNEGO Kerberos and other pluggable authentication mechanims. It also
|
||||
provides Hadoop proxy user support.
|
||||
|
||||
* How Does HttpFS Works?
|
||||
|
||||
HttpFS is a separate service from Hadoop NameNode.
|
||||
|
||||
HttpFS itself is Java web-application and it runs using a preconfigured Tomcat
|
||||
bundled with HttpFS binary distribution.
|
||||
|
||||
HttpFS HTTP web-service API calls are HTTP REST calls that map to a HDFS file
|
||||
system operation. For example, using the <<<curl>>> Unix command:
|
||||
|
||||
* <<<$ curl http://httpfs-host:14000/webhdfs/v1/user/foo/README.txt>>> returns
|
||||
the contents of the HDFS <<</user/foo/README.txt>>> file.
|
||||
|
||||
* <<<$ curl http://httpfs-host:14000/webhdfs/v1/user/foo?op=list>>> returns the
|
||||
contents of the HDFS <<</user/foo>>> directory in JSON format.
|
||||
|
||||
* <<<$ curl -X POST http://httpfs-host:14000/webhdfs/v1/user/foo/bar?op=mkdirs>>>
|
||||
creates the HDFS <<</user/foo.bar>>> directory.
|
||||
|
||||
* How HttpFS and Hadoop HDFS Proxy differ?
|
||||
|
||||
HttpFS was inspired by Hadoop HDFS proxy.
|
||||
|
||||
HttpFS can be seen as a full rewrite of Hadoop HDFS proxy.
|
||||
|
||||
Hadoop HDFS proxy provides a subset of file system operations (read only),
|
||||
HttpFS provides support for all file system operations.
|
||||
|
||||
HttpFS uses a clean HTTP REST API making its use with HTTP tools more
|
||||
intuitive.
|
||||
|
||||
HttpFS supports Hadoop pseudo authentication, Kerberos SPNEGOS authentication
|
||||
and Hadoop proxy users. Hadoop HDFS proxy did not.
|
||||
|
||||
* User and Developer Documentation
|
||||
|
||||
* {{{./ServerSetup.html}HttpFS Server Setup}}
|
||||
|
||||
* {{{./UsingHttpTools.html}Using HTTP Tools}}
|
||||
|
|
@ -0,0 +1,121 @@
|
|||
<!---
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. See accompanying LICENSE file.
|
||||
-->
|
||||
|
||||
Hadoop HDFS over HTTP - Server Setup
|
||||
====================================
|
||||
|
||||
This page explains how to quickly setup HttpFS with Pseudo authentication against a Hadoop cluster with Pseudo authentication.
|
||||
|
||||
Install HttpFS
|
||||
--------------
|
||||
|
||||
~ $ tar xzf httpfs-${project.version}.tar.gz
|
||||
|
||||
Configure HttpFS
|
||||
----------------
|
||||
|
||||
By default, HttpFS assumes that Hadoop configuration files (`core-site.xml & hdfs-site.xml`) are in the HttpFS configuration directory.
|
||||
|
||||
If this is not the case, add to the `httpfs-site.xml` file the `httpfs.hadoop.config.dir` property set to the location of the Hadoop configuration directory.
|
||||
|
||||
Configure Hadoop
|
||||
----------------
|
||||
|
||||
Edit Hadoop `core-site.xml` and defined the Unix user that will run the HttpFS server as a proxyuser. For example:
|
||||
|
||||
```xml
|
||||
<property>
|
||||
<name>hadoop.proxyuser.#HTTPFSUSER#.hosts</name>
|
||||
<value>httpfs-host.foo.com</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hadoop.proxyuser.#HTTPFSUSER#.groups</name>
|
||||
<value>*</value>
|
||||
</property>
|
||||
```
|
||||
|
||||
IMPORTANT: Replace `#HTTPFSUSER#` with the Unix user that will start the HttpFS server.
|
||||
|
||||
Restart Hadoop
|
||||
--------------
|
||||
|
||||
You need to restart Hadoop for the proxyuser configuration ot become active.
|
||||
|
||||
Start/Stop HttpFS
|
||||
-----------------
|
||||
|
||||
To start/stop HttpFS use HttpFS's sbin/httpfs.sh script. For example:
|
||||
|
||||
$ sbin/httpfs.sh start
|
||||
|
||||
NOTE: Invoking the script without any parameters list all possible parameters (start, stop, run, etc.). The `httpfs.sh` script is a wrapper for Tomcat's `catalina.sh` script that sets the environment variables and Java System properties required to run HttpFS server.
|
||||
|
||||
Test HttpFS is working
|
||||
----------------------
|
||||
|
||||
~ $ curl -i "http://<HTTPFSHOSTNAME>:14000?user.name=babu&op=homedir"
|
||||
HTTP/1.1 200 OK
|
||||
Content-Type: application/json
|
||||
Transfer-Encoding: chunked
|
||||
|
||||
{"homeDir":"http:\/\/<HTTPFS_HOST>:14000\/user\/babu"}
|
||||
|
||||
Embedded Tomcat Configuration
|
||||
-----------------------------
|
||||
|
||||
To configure the embedded Tomcat go to the `tomcat/conf`.
|
||||
|
||||
HttpFS preconfigures the HTTP and Admin ports in Tomcat's `server.xml` to 14000 and 14001.
|
||||
|
||||
Tomcat logs are also preconfigured to go to HttpFS's `logs/` directory.
|
||||
|
||||
The following environment variables (which can be set in HttpFS's `etc/hadoop/httpfs-env.sh` script) can be used to alter those values:
|
||||
|
||||
* HTTPFS\_HTTP\_PORT
|
||||
|
||||
* HTTPFS\_ADMIN\_PORT
|
||||
|
||||
* HADOOP\_LOG\_DIR
|
||||
|
||||
HttpFS Configuration
|
||||
--------------------
|
||||
|
||||
HttpFS supports the following [configuration properties](./httpfs-default.html) in the HttpFS's `etc/hadoop/httpfs-site.xml` configuration file.
|
||||
|
||||
HttpFS over HTTPS (SSL)
|
||||
-----------------------
|
||||
|
||||
To configure HttpFS to work over SSL edit the [httpfs-env.sh](#httpfs-env.sh) script in the configuration directory setting the [HTTPFS\_SSL\_ENABLED](#HTTPFS_SSL_ENABLED) to [true](#true).
|
||||
|
||||
In addition, the following 2 properties may be defined (shown with default values):
|
||||
|
||||
* HTTPFS\_SSL\_KEYSTORE\_FILE=$HOME/.keystore
|
||||
|
||||
* HTTPFS\_SSL\_KEYSTORE\_PASS=password
|
||||
|
||||
In the HttpFS `tomcat/conf` directory, replace the `server.xml` file with the `ssl-server.xml` file.
|
||||
|
||||
You need to create an SSL certificate for the HttpFS server. As the `httpfs` Unix user, using the Java `keytool` command to create the SSL certificate:
|
||||
|
||||
$ keytool -genkey -alias tomcat -keyalg RSA
|
||||
|
||||
You will be asked a series of questions in an interactive prompt. It will create the keystore file, which will be named **.keystore** and located in the `httpfs` user home directory.
|
||||
|
||||
The password you enter for "keystore password" must match the value of the `HTTPFS_SSL_KEYSTORE_PASS` environment variable set in the `httpfs-env.sh` script in the configuration directory.
|
||||
|
||||
The answer to "What is your first and last name?" (i.e. "CN") must be the hostname of the machine where the HttpFS Server will be running.
|
||||
|
||||
Start HttpFS. It should work over HTTPS.
|
||||
|
||||
Using the Hadoop `FileSystem` API or the Hadoop FS shell, use the `swebhdfs://` scheme. Make sure the JVM is picking up the truststore containing the public key of the SSL certificate if using a self-signed certificate.
|
|
@ -0,0 +1,62 @@
|
|||
<!---
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. See accompanying LICENSE file.
|
||||
-->
|
||||
|
||||
Hadoop HDFS over HTTP - Using HTTP Tools
|
||||
========================================
|
||||
|
||||
Security
|
||||
--------
|
||||
|
||||
Out of the box HttpFS supports both pseudo authentication and Kerberos HTTP SPNEGO authentication.
|
||||
|
||||
### Pseudo Authentication
|
||||
|
||||
With pseudo authentication the user name must be specified in the `user.name=<USERNAME>` query string parameter of a HttpFS URL. For example:
|
||||
|
||||
$ curl "http://<HTTFS_HOST>:14000/webhdfs/v1?op=homedir&user.name=babu"
|
||||
|
||||
### Kerberos HTTP SPNEGO Authentication
|
||||
|
||||
Kerberos HTTP SPNEGO authentication requires a tool or library supporting Kerberos HTTP SPNEGO protocol.
|
||||
|
||||
IMPORTANT: If using `curl`, the `curl` version being used must support GSS (`curl -V` prints out 'GSS' if it supports it).
|
||||
|
||||
For example:
|
||||
|
||||
$ kinit
|
||||
Please enter the password for user@LOCALHOST:
|
||||
$ curl --negotiate -u foo "http://<HTTPFS_HOST>:14000/webhdfs/v1?op=homedir"
|
||||
Enter host password for user 'foo':
|
||||
|
||||
NOTE: the `-u USER` option is required by the `--negotiate` but it is not used. Use any value as `USER` and when asked for the password press [ENTER] as the password value is ignored.
|
||||
|
||||
### Remembering Who I Am (Establishing an Authenticated Session)
|
||||
|
||||
As most authentication mechanisms, Hadoop HTTP authentication authenticates users once and issues a short-lived authentication token to be presented in subsequent requests. This authentication token is a signed HTTP Cookie.
|
||||
|
||||
When using tools like `curl`, the authentication token must be stored on the first request doing authentication, and submitted in subsequent requests. To do this with curl the `-b` and `-c` options to save and send HTTP Cookies must be used.
|
||||
|
||||
For example, the first request doing authentication should save the received HTTP Cookies.
|
||||
|
||||
Using Pseudo Authentication:
|
||||
|
||||
$ curl -c ~/.httpfsauth "http://<HTTPFS_HOST>:14000/webhdfs/v1?op=homedir&user.name=foo"
|
||||
|
||||
Using Kerberos HTTP SPNEGO authentication:
|
||||
|
||||
$ curl --negotiate -u foo -c ~/.httpfsauth "http://<HTTPFS_HOST>:14000/webhdfs/v1?op=homedir"
|
||||
|
||||
Then, subsequent requests forward the previously received HTTP Cookie:
|
||||
|
||||
$ curl -b ~/.httpfsauth "http://<HTTPFS_HOST>:14000/webhdfs/v1?op=liststatus"
|
|
@ -0,0 +1,52 @@
|
|||
<!---
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. See accompanying LICENSE file.
|
||||
-->
|
||||
|
||||
Hadoop HDFS over HTTP - Documentation Sets
|
||||
==========================================
|
||||
|
||||
HttpFS is a server that provides a REST HTTP gateway supporting all HDFS File System operations (read and write). And it is inteoperable with the **webhdfs** REST HTTP API.
|
||||
|
||||
HttpFS can be used to transfer data between clusters running different versions of Hadoop (overcoming RPC versioning issues), for example using Hadoop DistCP.
|
||||
|
||||
HttpFS can be used to access data in HDFS on a cluster behind of a firewall (the HttpFS server acts as a gateway and is the only system that is allowed to cross the firewall into the cluster).
|
||||
|
||||
HttpFS can be used to access data in HDFS using HTTP utilities (such as curl and wget) and HTTP libraries Perl from other languages than Java.
|
||||
|
||||
The **webhdfs** client FileSytem implementation can be used to access HttpFS using the Hadoop filesystem command (`hadoop fs`) line tool as well as from Java aplications using the Hadoop FileSystem Java API.
|
||||
|
||||
HttpFS has built-in security supporting Hadoop pseudo authentication and HTTP SPNEGO Kerberos and other pluggable authentication mechanims. It also provides Hadoop proxy user support.
|
||||
|
||||
How Does HttpFS Works?
|
||||
----------------------
|
||||
|
||||
HttpFS is a separate service from Hadoop NameNode.
|
||||
|
||||
HttpFS itself is Java web-application and it runs using a preconfigured Tomcat bundled with HttpFS binary distribution.
|
||||
|
||||
HttpFS HTTP web-service API calls are HTTP REST calls that map to a HDFS file system operation. For example, using the `curl` Unix command:
|
||||
|
||||
* `$ curl http://httpfs-host:14000/webhdfs/v1/user/foo/README.txt` returns the contents of the HDFS `/user/foo/README.txt` file.
|
||||
|
||||
* `$ curl http://httpfs-host:14000/webhdfs/v1/user/foo?op=list` returns the contents of the HDFS `/user/foo` directory in JSON format.
|
||||
|
||||
* `$ curl -X POST http://httpfs-host:14000/webhdfs/v1/user/foo/bar?op=mkdirs` creates the HDFS `/user/foo.bar` directory.
|
||||
|
||||
User and Developer Documentation
|
||||
--------------------------------
|
||||
|
||||
* [HttpFS Server Setup](./ServerSetup.html)
|
||||
|
||||
* [Using HTTP Tools](./UsingHttpTools.html)
|
||||
|
||||
|
|
@ -141,6 +141,8 @@ Trunk (Unreleased)
|
|||
HDFS-7668. Convert site documentation from apt to markdown (Masatake
|
||||
Iwasaki via aw)
|
||||
|
||||
HDFS-7460. Rewrite httpfs to use new shell framework (John Smith via aw)
|
||||
|
||||
OPTIMIZATIONS
|
||||
|
||||
BUG FIXES
|
||||
|
|
Loading…
Reference in New Issue