HDFS-7460. Rewrite httpfs to use new shell framework (John Smith via aw)

This commit is contained in:
Allen Wittenauer 2015-02-25 18:57:41 -08:00
parent 606f5b517f
commit 8c4f76aa20
12 changed files with 408 additions and 555 deletions

View File

@ -524,7 +524,7 @@
<copy file="${basedir}/src/main/tomcat/server.xml"
toDir="${httpfs.tomcat.dist.dir}/conf"/>
<delete file="${httpfs.tomcat.dist.dir}/conf/ssl-server.xml"/>
<copy file="${basedir}/src/main/tomcat/ssl-server.xml"
<copy file="${basedir}/src/main/tomcat/ssl-server.xml.conf"
toDir="${httpfs.tomcat.dist.dir}/conf"/>
<delete file="${httpfs.tomcat.dist.dir}/conf/logging.properties"/>
<copy file="${basedir}/src/main/tomcat/logging.properties"

View File

@ -14,40 +14,59 @@
#
# Set httpfs specific environment variables here.
# Settings for the Embedded Tomcat that runs HttpFS
# Java System properties for HttpFS should be specified in this variable
#
# export CATALINA_OPTS=
# HttpFS logs directory
# hadoop-env.sh is read prior to this file.
#
# export HTTPFS_LOG=${HTTPFS_HOME}/logs
# HttpFS temporary directory
# HTTPFS temporary directory
#
# export HTTPFS_TEMP=${HTTPFS_HOME}/temp
# export HTTPFS_TEMP=${HADOOP_PREFIX}/temp
# The HTTP port used by HttpFS
# The HTTP port used by HTTPFS
#
# export HTTPFS_HTTP_PORT=14000
# The Admin port used by HttpFS
# The Admin port used by HTTPFS
#
# export HTTPFS_ADMIN_PORT=`expr ${HTTPFS_HTTP_PORT} + 1`
# export HTTPFS_ADMIN_PORT=$((HTTPFS_HTTP_PORT + 1))
# The maximum number of Tomcat handler threads
#
# export HTTPFS_MAX_THREADS=1000
# The hostname HttpFS server runs on
#
# export HTTPFS_HTTP_HOSTNAME=`hostname -f`
# Indicates if HttpFS is using SSL
#
# export HTTPFS_SSL_ENABLED=false
# export HTTPFS_HTTP_HOSTNAME=$(hostname -f)
# The location of the SSL keystore if using SSL
#
# export HTTPFS_SSL_KEYSTORE_FILE=${HOME}/.keystore
#
# The password of the SSL keystore if using SSL
#
# export HTTPFS_SSL_KEYSTORE_PASS=password
##
## Tomcat specific settings
##
#
# Location of tomcat
#
# export HTTPFS_CATALINA_HOME=${HADOOP_PREFIX}/share/hadoop/httpfs/tomcat
# Java System properties for HTTPFS should be specified in this variable.
# The java.library.path and hadoop.home.dir properties are automatically
# configured. In order to supplement java.library.path,
# one should add to the JAVA_LIBRARY_PATH env var.
#
# export CATALINA_OPTS=
# PID file
#
# export CATALINA_PID=${HADOOP_PID_DIR}/hadoop-${HADOOP_IDENT_STRING}-httpfs.pid
# Output file
#
# export CATALINA_OUT=${HTTPFS_LOG}/hadoop-${HADOOP_IDENT_STRING}-httpfs-${HOSTNAME}.out

View File

@ -1,4 +1,4 @@
#!/usr/bin/env bash
#!/bin/bash
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -13,183 +13,63 @@
# limitations under the License.
#
# resolve links - $0 may be a softlink
PRG="${0}"
function hadoop_subproject_init
{
local this
local binparent
local varlist
while [ -h "${PRG}" ]; do
ls=`ls -ld "${PRG}"`
link=`expr "$ls" : '.*-> \(.*\)$'`
if expr "$link" : '/.*' > /dev/null; then
PRG="$link"
else
PRG=`dirname "${PRG}"`/"$link"
if [[ -z "${HADOOP_HTTPFS_ENV_PROCESSED}" ]]; then
if [[ -e "${HADOOP_CONF_DIR}/httpfs-env.sh" ]]; then
. "${HADOOP_CONF_DIR}/httpfs-env.sh"
export HADOOP_HTTPFS_ENV_PROCESSED=true
fi
fi
export HADOOP_CATALINA_PREFIX=httpfs
export HADOOP_CATALINA_TEMP="${HTTPFS_TEMP:-${HADOOP_PREFIX}/temp}"
hadoop_deprecate_envvar HTTPFS_CONFIG HADOOP_CONF_DIR
hadoop_deprecate_envvar HTTPFS_LOG HADOOP_LOG_DIR
export HADOOP_CATALINA_CONFIG="${HADOOP_CONF_DIR}"
export HADOOP_CATALINA_LOG="${HADOOP_LOG_DIR}"
export HTTPFS_HTTP_HOSTNAME=${HTTPFS_HTTP_HOSTNAME:-$(hostname -f)}
export HADOOP_CATALINA_HTTP_PORT="${HTTPFS_HTTP_PORT:-14000}"
export HADOOP_CATALINA_ADMIN_PORT="${HTTPFS_ADMIN_PORT:-$((HADOOP_CATALINA_HTTP_PORT+1))}"
export HADOOP_CATALINA_MAX_THREADS="${HTTPFS_MAX_THREADS:-150}"
export HTTPFS_SSL_ENABLED=${HTTPFS_SSL_ENABLED:-false}
export HADOOP_CATALINA_SSL_KEYSTORE_FILE="${HTTPFS_SSL_KEYSTORE_FILE:-${HOME}/.keystore}"
export CATALINA_BASE="${CATALINA_BASE:-${HADOOP_PREFIX}/share/hadoop/httpfs/tomcat}"
export HADOOP_CATALINA_HOME="${HTTPFS_CATALINA_HOME:-${CATALINA_BASE}}"
export CATALINA_OUT="${CATALINA_OUT:-${HADOOP_LOG_DIR}/hadoop-${HADOOP_IDENT_STRING}-httpfs-${HOSTNAME}.out}"
export CATALINA_PID="${CATALINA_PID:-${HADOOP_PID_DIR}/hadoop-${HADOOP_IDENT_STRING}-httpfs.pid}"
if [[ -n "${HADOOP_SHELL_SCRIPT_DEBUG}" ]]; then
varlist=$(env | egrep '(^HTTPFS|^CATALINA)' | cut -f1 -d= | grep -v _PASS)
for i in ${varlist}; do
hadoop_debug "Setting ${i} to ${!i}"
done
BASEDIR=`dirname ${PRG}`
BASEDIR=`cd ${BASEDIR}/..;pwd`
function print() {
if [ "${HTTPFS_SILENT}" != "true" ]; then
echo "$@"
fi
}
# if HTTPFS_HOME is already set warn it will be ignored
#
if [ "${HTTPFS_HOME}" != "" ]; then
echo "WARNING: current setting of HTTPFS_HOME ignored"
fi
print
# setting HTTPFS_HOME to the installation dir, it cannot be changed
#
export HTTPFS_HOME=${BASEDIR}
httpfs_home=${HTTPFS_HOME}
print "Setting HTTPFS_HOME: ${HTTPFS_HOME}"
# if the installation has a env file, source it
# this is for native packages installations
#
if [ -e "${HTTPFS_HOME}/bin/httpfs-env.sh" ]; then
print "Sourcing: ${HTTPFS_HOME}/bin/httpfs-env.sh"
source ${HTTPFS_HOME}/bin/httpfs-env.sh
grep "^ *export " ${HTTPFS_HOME}/bin/httpfs-env.sh | sed 's/ *export/ setting/'
fi
# verify that the sourced env file didn't change HTTPFS_HOME
# if so, warn and revert
#
if [ "${HTTPFS_HOME}" != "${httpfs_home}" ]; then
print "WARN: HTTPFS_HOME resetting to ''${HTTPFS_HOME}'' ignored"
export HTTPFS_HOME=${httpfs_home}
print " using HTTPFS_HOME: ${HTTPFS_HOME}"
fi
if [ "${HTTPFS_CONFIG}" = "" ]; then
export HTTPFS_CONFIG=${HTTPFS_HOME}/etc/hadoop
print "Setting HTTPFS_CONFIG: ${HTTPFS_CONFIG}"
if [[ -n "${HADOOP_COMMON_HOME}" ]] &&
[[ -e "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh" ]]; then
. "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh"
elif [[ -e "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]]; then
. "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh"
elif [[ -e "${HADOOP_PREFIX}/libexec/hadoop-config.sh" ]]; then
. "${HADOOP_PREFIX}/libexec/hadoop-config.sh"
else
print "Using HTTPFS_CONFIG: ${HTTPFS_CONFIG}"
echo "ERROR: Hadoop common not found." 2>&1
exit 1
fi
httpfs_config=${HTTPFS_CONFIG}
# if the configuration dir has a env file, source it
#
if [ -e "${HTTPFS_CONFIG}/httpfs-env.sh" ]; then
print "Sourcing: ${HTTPFS_CONFIG}/httpfs-env.sh"
source ${HTTPFS_CONFIG}/httpfs-env.sh
grep "^ *export " ${HTTPFS_CONFIG}/httpfs-env.sh | sed 's/ *export/ setting/'
fi
# verify that the sourced env file didn't change HTTPFS_HOME
# if so, warn and revert
#
if [ "${HTTPFS_HOME}" != "${httpfs_home}" ]; then
echo "WARN: HTTPFS_HOME resetting to ''${HTTPFS_HOME}'' ignored"
export HTTPFS_HOME=${httpfs_home}
fi
# verify that the sourced env file didn't change HTTPFS_CONFIG
# if so, warn and revert
#
if [ "${HTTPFS_CONFIG}" != "${httpfs_config}" ]; then
echo "WARN: HTTPFS_CONFIG resetting to ''${HTTPFS_CONFIG}'' ignored"
export HTTPFS_CONFIG=${httpfs_config}
fi
if [ "${HTTPFS_LOG}" = "" ]; then
export HTTPFS_LOG=${HTTPFS_HOME}/logs
print "Setting HTTPFS_LOG: ${HTTPFS_LOG}"
else
print "Using HTTPFS_LOG: ${HTTPFS_LOG}"
fi
if [ ! -f ${HTTPFS_LOG} ]; then
mkdir -p ${HTTPFS_LOG}
fi
if [ "${HTTPFS_TEMP}" = "" ]; then
export HTTPFS_TEMP=${HTTPFS_HOME}/temp
print "Setting HTTPFS_TEMP: ${HTTPFS_TEMP}"
else
print "Using HTTPFS_TEMP: ${HTTPFS_TEMP}"
fi
if [ ! -f ${HTTPFS_TEMP} ]; then
mkdir -p ${HTTPFS_TEMP}
fi
if [ "${HTTPFS_HTTP_PORT}" = "" ]; then
export HTTPFS_HTTP_PORT=14000
print "Setting HTTPFS_HTTP_PORT: ${HTTPFS_HTTP_PORT}"
else
print "Using HTTPFS_HTTP_PORT: ${HTTPFS_HTTP_PORT}"
fi
if [ "${HTTPFS_ADMIN_PORT}" = "" ]; then
export HTTPFS_ADMIN_PORT=`expr $HTTPFS_HTTP_PORT + 1`
print "Setting HTTPFS_ADMIN_PORT: ${HTTPFS_ADMIN_PORT}"
else
print "Using HTTPFS_ADMIN_PORT: ${HTTPFS_ADMIN_PORT}"
fi
if [ "${HTTPFS_HTTP_HOSTNAME}" = "" ]; then
export HTTPFS_HTTP_HOSTNAME=`hostname -f`
print "Setting HTTPFS_HTTP_HOSTNAME: ${HTTPFS_HTTP_HOSTNAME}"
else
print "Using HTTPFS_HTTP_HOSTNAME: ${HTTPFS_HTTP_HOSTNAME}"
fi
if [ "${HTTPFS_SSL_ENABLED}" = "" ]; then
export HTTPFS_SSL_ENABLED="false"
print "Setting HTTPFS_SSL_ENABLED: ${HTTPFS_SSL_ENABLED}"
else
print "Using HTTPFS_SSL_ENABLED: ${HTTPFS_SSL_ENABLED}"
fi
if [ "${HTTPFS_SSL_KEYSTORE_FILE}" = "" ]; then
export HTTPFS_SSL_KEYSTORE_FILE=${HOME}/.keystore
print "Setting HTTPFS_SSL_KEYSTORE_FILE: ${HTTPFS_SSL_KEYSTORE_FILE}"
else
print "Using HTTPFS_SSL_KEYSTORE_FILE: ${HTTPFS_SSL_KEYSTORE_FILE}"
fi
if [ "${HTTPFS_SSL_KEYSTORE_PASS}" = "" ]; then
export HTTPFS_SSL_KEYSTORE_PASS=password
print "Setting HTTPFS_SSL_KEYSTORE_PASS: ${HTTPFS_SSL_KEYSTORE_PASS}"
else
print "Using HTTPFS_SSL_KEYSTORE_PASS: ${HTTPFS_SSL_KEYSTORE_PASS}"
fi
if [ "${CATALINA_BASE}" = "" ]; then
export CATALINA_BASE=${HTTPFS_HOME}/share/hadoop/httpfs/tomcat
print "Setting CATALINA_BASE: ${CATALINA_BASE}"
else
print "Using CATALINA_BASE: ${CATALINA_BASE}"
fi
if [ "${HTTPFS_CATALINA_HOME}" = "" ]; then
export HTTPFS_CATALINA_HOME=${CATALINA_BASE}
print "Setting HTTPFS_CATALINA_HOME: ${HTTPFS_CATALINA_HOME}"
else
print "Using HTTPFS_CATALINA_HOME: ${HTTPFS_CATALINA_HOME}"
fi
if [ "${CATALINA_OUT}" = "" ]; then
export CATALINA_OUT=${HTTPFS_LOG}/httpfs-catalina.out
print "Setting CATALINA_OUT: ${CATALINA_OUT}"
else
print "Using CATALINA_OUT: ${CATALINA_OUT}"
fi
if [ "${CATALINA_PID}" = "" ]; then
export CATALINA_PID=/tmp/httpfs.pid
print "Setting CATALINA_PID: ${CATALINA_PID}"
else
print "Using CATALINA_PID: ${CATALINA_PID}"
fi
print

View File

@ -1,4 +1,4 @@
#!/usr/bin/env bash
#!/bin/bash
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -13,53 +13,99 @@
# limitations under the License.
#
# resolve links - $0 may be a softlink
PRG="${0}"
function hadoop_usage()
{
echo "Usage: httpfs.sh [--config confdir] [--debug] --daemon start|status|stop"
echo " httpfs.sh [--config confdir] [--debug] COMMAND"
echo " where COMMAND is one of:"
echo " run Start httpfs in the current window"
echo " run -security Start in the current window with security manager"
echo " start Start httpfs in a separate window"
echo " start -security Start in a separate window with security manager"
echo " status Return the LSB compliant status"
echo " stop Stop httpfs, waiting up to 5 seconds for the process to end"
echo " stop n Stop httpfs, waiting up to n seconds for the process to end"
echo " stop -force Stop httpfs, wait up to 5 seconds and then use kill -KILL if still running"
echo " stop n -force Stop httpfs, wait up to n seconds and then use kill -KILL if still running"
}
while [ -h "${PRG}" ]; do
ls=`ls -ld "${PRG}"`
link=`expr "$ls" : '.*-> \(.*\)$'`
if expr "$link" : '/.*' > /dev/null; then
PRG="$link"
# let's locate libexec...
if [[ -n "${HADOOP_PREFIX}" ]]; then
DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
else
PRG=`dirname "${PRG}"`/"$link"
this="${BASH_SOURCE-$0}"
bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
fi
done
BASEDIR=`dirname ${PRG}`
BASEDIR=`cd ${BASEDIR}/..;pwd`
HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
# shellcheck disable=SC2034
HADOOP_NEW_CONFIG=true
if [[ -f "${HADOOP_LIBEXEC_DIR}/httpfs-config.sh" ]]; then
. "${HADOOP_LIBEXEC_DIR}/httpfs-config.sh"
else
echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/httpfs-config.sh." 2>&1
exit 1
fi
source ${HADOOP_LIBEXEC_DIR:-${BASEDIR}/libexec}/httpfs-config.sh
# The Java System property 'httpfs.http.port' it is not used by HttpFS,
# The Java System property 'httpfs.http.port' it is not used by Kms,
# it is used in Tomcat's server.xml configuration file
#
print "Using CATALINA_OPTS: ${CATALINA_OPTS}"
catalina_opts="-Dhttpfs.home.dir=${HTTPFS_HOME}";
catalina_opts="${catalina_opts} -Dhttpfs.config.dir=${HTTPFS_CONFIG}";
catalina_opts="${catalina_opts} -Dhttpfs.log.dir=${HTTPFS_LOG}";
catalina_opts="${catalina_opts} -Dhttpfs.temp.dir=${HTTPFS_TEMP}";
catalina_opts="${catalina_opts} -Dhttpfs.admin.port=${HTTPFS_ADMIN_PORT}";
catalina_opts="${catalina_opts} -Dhttpfs.http.port=${HTTPFS_HTTP_PORT}";
catalina_opts="${catalina_opts} -Dhttpfs.http.hostname=${HTTPFS_HTTP_HOSTNAME}";
catalina_opts="${catalina_opts} -Dhttpfs.ssl.enabled=${HTTPFS_SSL_ENABLED}";
catalina_opts="${catalina_opts} -Dhttpfs.ssl.keystore.file=${HTTPFS_SSL_KEYSTORE_FILE}";
catalina_opts="${catalina_opts} -Dhttpfs.ssl.keystore.pass=${HTTPFS_SSL_KEYSTORE_PASS}";
# Mask the trustStorePassword
# shellcheck disable=SC2086
CATALINA_OPTS_DISP="$(echo ${CATALINA_OPTS} | sed -e 's/trustStorePassword=[^ ]*/trustStorePassword=***/')"
print "Adding to CATALINA_OPTS: ${catalina_opts}"
hadoop_debug "Using CATALINA_OPTS: ${CATALINA_OPTS_DISP}"
export CATALINA_OPTS="${CATALINA_OPTS} ${catalina_opts}"
# We're using hadoop-common, so set up some stuff it might need:
hadoop_finalize
hadoop_verify_logdir
if [[ $# = 0 ]]; then
case "${HADOOP_DAEMON_MODE}" in
status)
hadoop_status_daemon "${CATALINA_PID}"
exit
;;
start)
set -- "start"
;;
stop)
set -- "stop"
;;
esac
fi
hadoop_finalize_catalina_opts
export CATALINA_OPTS
# A bug in catalina.sh script does not use CATALINA_OPTS for stopping the server
#
if [ "${1}" = "stop" ]; then
if [[ "${1}" = "stop" ]]; then
export JAVA_OPTS=${CATALINA_OPTS}
fi
if [ "${HTTPFS_SILENT}" != "true" ]; then
exec ${HTTPFS_CATALINA_HOME}/bin/catalina.sh "$@"
else
exec ${HTTPFS_CATALINA_HOME}/bin/catalina.sh "$@" > /dev/null
# If ssl, the populate the passwords into ssl-server.xml before starting tomcat
#
# HTTPFS_SSL_KEYSTORE_PASS is a bit odd.
# if undefined, then the if test will not enable ssl on its own
# if "", set it to "password".
# if custom, use provided password
#
if [[ -f "${HADOOP_CATALINA_HOME}/conf/ssl-server.xml.conf" ]]; then
if [[ -n "${HTTPFS_SSL_KEYSTORE_PASS+x}" ]] || [[ -n "${HTTPFS_SSL_TRUSTSTORE_PASS}" ]]; then
export HTTPFS_SSL_KEYSTORE_PASS=${HTTPFS_SSL_KEYSTORE_PASS:-password}
sed -e 's/_httpfs_ssl_keystore_pass_/'${HTTPFS_SSL_KEYSTORE_PASS}'/g' \
-e 's/_httpfs_ssl_truststore_pass_/'${HTTPFS_SSL_TRUSTSTORE_PASS}'/g' \
"${HADOOP_CATALINA_HOME}/conf/ssl-server.xml.conf" \
> "${HADOOP_CATALINA_HOME}/conf/ssl-server.xml"
chmod 700 "${HADOOP_CATALINA_HOME}/conf/ssl-server.xml" >/dev/null 2>&1
fi
fi
hadoop_add_param CATALINA_OPTS -Dhttpfs.http.hostname "-Dhttpfs.http.hostname=${HTTPFS_HOST_NAME}"
hadoop_add_param CATALINA_OPTS -Dhttpfs.ssl.enabled "-Dhttpfs.ssl.enabled=${HTTPFS_SSL_ENABLED}"
exec "${HADOOP_CATALINA_HOME}/bin/catalina.sh" "$@"

View File

@ -61,7 +61,7 @@
<!--The connectors can use a shared executor, you can define one or more named thread pools-->
<!--
<Executor name="tomcatThreadPool" namePrefix="catalina-exec-"
maxThreads="150" minSpareThreads="4"/>
maxThreads="httpfs.max.threads" minSpareThreads="4"/>
-->
<!-- Define a SSL HTTP/1.1 Connector on port 8443
@ -72,7 +72,7 @@
maxThreads="150" scheme="https" secure="true"
clientAuth="false" sslEnabledProtocols="TLSv1,SSLv2Hello"
keystoreFile="${httpfs.ssl.keystore.file}"
keystorePass="${httpfs.ssl.keystore.pass}"/>
keystorePass="_httpfs_ssl_keystore_pass_"/>
<!-- Define an AJP 1.3 Connector on port 8009 -->

View File

@ -1,159 +0,0 @@
~~ Licensed under the Apache License, Version 2.0 (the "License");
~~ you may not use this file except in compliance with the License.
~~ You may obtain a copy of the License at
~~
~~ http://www.apache.org/licenses/LICENSE-2.0
~~
~~ Unless required by applicable law or agreed to in writing, software
~~ distributed under the License is distributed on an "AS IS" BASIS,
~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~~ See the License for the specific language governing permissions and
~~ limitations under the License.
---
Hadoop HDFS over HTTP ${project.version} - Server Setup
---
---
${maven.build.timestamp}
Hadoop HDFS over HTTP ${project.version} - Server Setup
This page explains how to quickly setup HttpFS with Pseudo authentication
against a Hadoop cluster with Pseudo authentication.
* Requirements
* Java 6+
* Maven 3+
* Install HttpFS
+---+
~ $ tar xzf httpfs-${project.version}.tar.gz
+---+
* Configure HttpFS
By default, HttpFS assumes that Hadoop configuration files
(<<<core-site.xml & hdfs-site.xml>>>) are in the HttpFS
configuration directory.
If this is not the case, add to the <<<httpfs-site.xml>>> file the
<<<httpfs.hadoop.config.dir>>> property set to the location
of the Hadoop configuration directory.
* Configure Hadoop
Edit Hadoop <<<core-site.xml>>> and defined the Unix user that will
run the HttpFS server as a proxyuser. For example:
+---+
...
<property>
<name>hadoop.proxyuser.#HTTPFSUSER#.hosts</name>
<value>httpfs-host.foo.com</value>
</property>
<property>
<name>hadoop.proxyuser.#HTTPFSUSER#.groups</name>
<value>*</value>
</property>
...
+---+
IMPORTANT: Replace <<<#HTTPFSUSER#>>> with the Unix user that will
start the HttpFS server.
* Restart Hadoop
You need to restart Hadoop for the proxyuser configuration ot become
active.
* Start/Stop HttpFS
To start/stop HttpFS use HttpFS's bin/httpfs.sh script. For example:
+---+
httpfs-${project.version} $ bin/httpfs.sh start
+---+
NOTE: Invoking the script without any parameters list all possible
parameters (start, stop, run, etc.). The <<<httpfs.sh>>> script is a wrapper
for Tomcat's <<<catalina.sh>>> script that sets the environment variables
and Java System properties required to run HttpFS server.
* Test HttpFS is working
+---+
~ $ curl -i "http://<HTTPFSHOSTNAME>:14000?user.name=babu&op=homedir"
HTTP/1.1 200 OK
Content-Type: application/json
Transfer-Encoding: chunked
{"homeDir":"http:\/\/<HTTPFS_HOST>:14000\/user\/babu"}
+---+
* Embedded Tomcat Configuration
To configure the embedded Tomcat go to the <<<tomcat/conf>>>.
HttpFS preconfigures the HTTP and Admin ports in Tomcat's <<<server.xml>>> to
14000 and 14001.
Tomcat logs are also preconfigured to go to HttpFS's <<<logs/>>> directory.
The following environment variables (which can be set in HttpFS's
<<<conf/httpfs-env.sh>>> script) can be used to alter those values:
* HTTPFS_HTTP_PORT
* HTTPFS_ADMIN_PORT
* HTTPFS_LOG
* HttpFS Configuration
HttpFS supports the following {{{./httpfs-default.html}configuration properties}}
in the HttpFS's <<<conf/httpfs-site.xml>>> configuration file.
* HttpFS over HTTPS (SSL)
To configure HttpFS to work over SSL edit the {{httpfs-env.sh}} script in the
configuration directory setting the {{HTTPFS_SSL_ENABLED}} to {{true}}.
In addition, the following 2 properties may be defined (shown with default
values):
* HTTPFS_SSL_KEYSTORE_FILE=${HOME}/.keystore
* HTTPFS_SSL_KEYSTORE_PASS=password
In the HttpFS <<<tomcat/conf>>> directory, replace the <<<server.xml>>> file
with the <<<ssl-server.xml>>> file.
You need to create an SSL certificate for the HttpFS server. As the
<<<httpfs>>> Unix user, using the Java <<<keytool>>> command to create the
SSL certificate:
+---+
$ keytool -genkey -alias tomcat -keyalg RSA
+---+
You will be asked a series of questions in an interactive prompt. It will
create the keystore file, which will be named <<.keystore>> and located in the
<<<httpfs>>> user home directory.
The password you enter for "keystore password" must match the value of the
<<<HTTPFS_SSL_KEYSTORE_PASS>>> environment variable set in the
<<<httpfs-env.sh>>> script in the configuration directory.
The answer to "What is your first and last name?" (i.e. "CN") must be the
hostname of the machine where the HttpFS Server will be running.
Start HttpFS. It should work over HTTPS.
Using the Hadoop <<<FileSystem>>> API or the Hadoop FS shell, use the
<<<swebhdfs://>>> scheme. Make sure the JVM is picking up the truststore
containing the public key of the SSL certificate if using a self-signed
certificate.

View File

@ -1,87 +0,0 @@
~~ Licensed under the Apache License, Version 2.0 (the "License");
~~ you may not use this file except in compliance with the License.
~~ You may obtain a copy of the License at
~~
~~ http://www.apache.org/licenses/LICENSE-2.0
~~
~~ Unless required by applicable law or agreed to in writing, software
~~ distributed under the License is distributed on an "AS IS" BASIS,
~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~~ See the License for the specific language governing permissions and
~~ limitations under the License.
---
Hadoop HDFS over HTTP ${project.version} - Using HTTP Tools
---
---
${maven.build.timestamp}
Hadoop HDFS over HTTP ${project.version} - Using HTTP Tools
* Security
Out of the box HttpFS supports both pseudo authentication and Kerberos HTTP
SPNEGO authentication.
** Pseudo Authentication
With pseudo authentication the user name must be specified in the
<<<user.name=\<USERNAME\>>>> query string parameter of a HttpFS URL.
For example:
+---+
$ curl "http://<HTTFS_HOST>:14000/webhdfs/v1?op=homedir&user.name=babu"
+---+
** Kerberos HTTP SPNEGO Authentication
Kerberos HTTP SPNEGO authentication requires a tool or library supporting
Kerberos HTTP SPNEGO protocol.
IMPORTANT: If using <<<curl>>>, the <<<curl>>> version being used must support
GSS (<<<curl -V>>> prints out 'GSS' if it supports it).
For example:
+---+
$ kinit
Please enter the password for tucu@LOCALHOST:
$ curl --negotiate -u foo "http://<HTTPFS_HOST>:14000/webhdfs/v1?op=homedir"
Enter host password for user 'foo':
+---+
NOTE: the <<<-u USER>>> option is required by the <<<--negotiate>>> but it is
not used. Use any value as <<<USER>>> and when asked for the password press
[ENTER] as the password value is ignored.
** {Remembering Who I Am} (Establishing an Authenticated Session)
As most authentication mechanisms, Hadoop HTTP authentication authenticates
users once and issues a short-lived authentication token to be presented in
subsequent requests. This authentication token is a signed HTTP Cookie.
When using tools like <<<curl>>>, the authentication token must be stored on
the first request doing authentication, and submitted in subsequent requests.
To do this with curl the <<<-b>>> and <<<-c>>> options to save and send HTTP
Cookies must be used.
For example, the first request doing authentication should save the received
HTTP Cookies.
Using Pseudo Authentication:
+---+
$ curl -c ~/.httpfsauth "http://<HTTPFS_HOST>:14000/webhdfs/v1?op=homedir&user.name=babu"
+---+
Using Kerberos HTTP SPNEGO authentication:
+---+
$ curl --negotiate -u foo -c ~/.httpfsauth "http://<HTTPFS_HOST>:14000/webhdfs/v1?op=homedir"
+---+
Then, subsequent requests forward the previously received HTTP Cookie:
+---+
$ curl -b ~/.httpfsauth "http://<HTTPFS_HOST>:14000/webhdfs/v1?op=liststatus"
+---+

View File

@ -1,83 +0,0 @@
~~ Licensed under the Apache License, Version 2.0 (the "License");
~~ you may not use this file except in compliance with the License.
~~ You may obtain a copy of the License at
~~
~~ http://www.apache.org/licenses/LICENSE-2.0
~~
~~ Unless required by applicable law or agreed to in writing, software
~~ distributed under the License is distributed on an "AS IS" BASIS,
~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~~ See the License for the specific language governing permissions and
~~ limitations under the License.
---
Hadoop HDFS over HTTP - Documentation Sets ${project.version}
---
---
${maven.build.timestamp}
Hadoop HDFS over HTTP - Documentation Sets ${project.version}
HttpFS is a server that provides a REST HTTP gateway supporting all HDFS
File System operations (read and write). And it is inteoperable with the
<<webhdfs>> REST HTTP API.
HttpFS can be used to transfer data between clusters running different
versions of Hadoop (overcoming RPC versioning issues), for example using
Hadoop DistCP.
HttpFS can be used to access data in HDFS on a cluster behind of a firewall
(the HttpFS server acts as a gateway and is the only system that is allowed
to cross the firewall into the cluster).
HttpFS can be used to access data in HDFS using HTTP utilities (such as curl
and wget) and HTTP libraries Perl from other languages than Java.
The <<webhdfs>> client FileSytem implementation can be used to access HttpFS
using the Hadoop filesystem command (<<<hadoop fs>>>) line tool as well as
from Java aplications using the Hadoop FileSystem Java API.
HttpFS has built-in security supporting Hadoop pseudo authentication and
HTTP SPNEGO Kerberos and other pluggable authentication mechanims. It also
provides Hadoop proxy user support.
* How Does HttpFS Works?
HttpFS is a separate service from Hadoop NameNode.
HttpFS itself is Java web-application and it runs using a preconfigured Tomcat
bundled with HttpFS binary distribution.
HttpFS HTTP web-service API calls are HTTP REST calls that map to a HDFS file
system operation. For example, using the <<<curl>>> Unix command:
* <<<$ curl http://httpfs-host:14000/webhdfs/v1/user/foo/README.txt>>> returns
the contents of the HDFS <<</user/foo/README.txt>>> file.
* <<<$ curl http://httpfs-host:14000/webhdfs/v1/user/foo?op=list>>> returns the
contents of the HDFS <<</user/foo>>> directory in JSON format.
* <<<$ curl -X POST http://httpfs-host:14000/webhdfs/v1/user/foo/bar?op=mkdirs>>>
creates the HDFS <<</user/foo.bar>>> directory.
* How HttpFS and Hadoop HDFS Proxy differ?
HttpFS was inspired by Hadoop HDFS proxy.
HttpFS can be seen as a full rewrite of Hadoop HDFS proxy.
Hadoop HDFS proxy provides a subset of file system operations (read only),
HttpFS provides support for all file system operations.
HttpFS uses a clean HTTP REST API making its use with HTTP tools more
intuitive.
HttpFS supports Hadoop pseudo authentication, Kerberos SPNEGOS authentication
and Hadoop proxy users. Hadoop HDFS proxy did not.
* User and Developer Documentation
* {{{./ServerSetup.html}HttpFS Server Setup}}
* {{{./UsingHttpTools.html}Using HTTP Tools}}

View File

@ -0,0 +1,121 @@
<!---
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
Hadoop HDFS over HTTP - Server Setup
====================================
This page explains how to quickly setup HttpFS with Pseudo authentication against a Hadoop cluster with Pseudo authentication.
Install HttpFS
--------------
~ $ tar xzf httpfs-${project.version}.tar.gz
Configure HttpFS
----------------
By default, HttpFS assumes that Hadoop configuration files (`core-site.xml & hdfs-site.xml`) are in the HttpFS configuration directory.
If this is not the case, add to the `httpfs-site.xml` file the `httpfs.hadoop.config.dir` property set to the location of the Hadoop configuration directory.
Configure Hadoop
----------------
Edit Hadoop `core-site.xml` and defined the Unix user that will run the HttpFS server as a proxyuser. For example:
```xml
<property>
<name>hadoop.proxyuser.#HTTPFSUSER#.hosts</name>
<value>httpfs-host.foo.com</value>
</property>
<property>
<name>hadoop.proxyuser.#HTTPFSUSER#.groups</name>
<value>*</value>
</property>
```
IMPORTANT: Replace `#HTTPFSUSER#` with the Unix user that will start the HttpFS server.
Restart Hadoop
--------------
You need to restart Hadoop for the proxyuser configuration ot become active.
Start/Stop HttpFS
-----------------
To start/stop HttpFS use HttpFS's sbin/httpfs.sh script. For example:
$ sbin/httpfs.sh start
NOTE: Invoking the script without any parameters list all possible parameters (start, stop, run, etc.). The `httpfs.sh` script is a wrapper for Tomcat's `catalina.sh` script that sets the environment variables and Java System properties required to run HttpFS server.
Test HttpFS is working
----------------------
~ $ curl -i "http://<HTTPFSHOSTNAME>:14000?user.name=babu&op=homedir"
HTTP/1.1 200 OK
Content-Type: application/json
Transfer-Encoding: chunked
{"homeDir":"http:\/\/<HTTPFS_HOST>:14000\/user\/babu"}
Embedded Tomcat Configuration
-----------------------------
To configure the embedded Tomcat go to the `tomcat/conf`.
HttpFS preconfigures the HTTP and Admin ports in Tomcat's `server.xml` to 14000 and 14001.
Tomcat logs are also preconfigured to go to HttpFS's `logs/` directory.
The following environment variables (which can be set in HttpFS's `etc/hadoop/httpfs-env.sh` script) can be used to alter those values:
* HTTPFS\_HTTP\_PORT
* HTTPFS\_ADMIN\_PORT
* HADOOP\_LOG\_DIR
HttpFS Configuration
--------------------
HttpFS supports the following [configuration properties](./httpfs-default.html) in the HttpFS's `etc/hadoop/httpfs-site.xml` configuration file.
HttpFS over HTTPS (SSL)
-----------------------
To configure HttpFS to work over SSL edit the [httpfs-env.sh](#httpfs-env.sh) script in the configuration directory setting the [HTTPFS\_SSL\_ENABLED](#HTTPFS_SSL_ENABLED) to [true](#true).
In addition, the following 2 properties may be defined (shown with default values):
* HTTPFS\_SSL\_KEYSTORE\_FILE=$HOME/.keystore
* HTTPFS\_SSL\_KEYSTORE\_PASS=password
In the HttpFS `tomcat/conf` directory, replace the `server.xml` file with the `ssl-server.xml` file.
You need to create an SSL certificate for the HttpFS server. As the `httpfs` Unix user, using the Java `keytool` command to create the SSL certificate:
$ keytool -genkey -alias tomcat -keyalg RSA
You will be asked a series of questions in an interactive prompt. It will create the keystore file, which will be named **.keystore** and located in the `httpfs` user home directory.
The password you enter for "keystore password" must match the value of the `HTTPFS_SSL_KEYSTORE_PASS` environment variable set in the `httpfs-env.sh` script in the configuration directory.
The answer to "What is your first and last name?" (i.e. "CN") must be the hostname of the machine where the HttpFS Server will be running.
Start HttpFS. It should work over HTTPS.
Using the Hadoop `FileSystem` API or the Hadoop FS shell, use the `swebhdfs://` scheme. Make sure the JVM is picking up the truststore containing the public key of the SSL certificate if using a self-signed certificate.

View File

@ -0,0 +1,62 @@
<!---
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
Hadoop HDFS over HTTP - Using HTTP Tools
========================================
Security
--------
Out of the box HttpFS supports both pseudo authentication and Kerberos HTTP SPNEGO authentication.
### Pseudo Authentication
With pseudo authentication the user name must be specified in the `user.name=<USERNAME>` query string parameter of a HttpFS URL. For example:
$ curl "http://<HTTFS_HOST>:14000/webhdfs/v1?op=homedir&user.name=babu"
### Kerberos HTTP SPNEGO Authentication
Kerberos HTTP SPNEGO authentication requires a tool or library supporting Kerberos HTTP SPNEGO protocol.
IMPORTANT: If using `curl`, the `curl` version being used must support GSS (`curl -V` prints out 'GSS' if it supports it).
For example:
$ kinit
Please enter the password for user@LOCALHOST:
$ curl --negotiate -u foo "http://<HTTPFS_HOST>:14000/webhdfs/v1?op=homedir"
Enter host password for user 'foo':
NOTE: the `-u USER` option is required by the `--negotiate` but it is not used. Use any value as `USER` and when asked for the password press [ENTER] as the password value is ignored.
### Remembering Who I Am (Establishing an Authenticated Session)
As most authentication mechanisms, Hadoop HTTP authentication authenticates users once and issues a short-lived authentication token to be presented in subsequent requests. This authentication token is a signed HTTP Cookie.
When using tools like `curl`, the authentication token must be stored on the first request doing authentication, and submitted in subsequent requests. To do this with curl the `-b` and `-c` options to save and send HTTP Cookies must be used.
For example, the first request doing authentication should save the received HTTP Cookies.
Using Pseudo Authentication:
$ curl -c ~/.httpfsauth "http://<HTTPFS_HOST>:14000/webhdfs/v1?op=homedir&user.name=foo"
Using Kerberos HTTP SPNEGO authentication:
$ curl --negotiate -u foo -c ~/.httpfsauth "http://<HTTPFS_HOST>:14000/webhdfs/v1?op=homedir"
Then, subsequent requests forward the previously received HTTP Cookie:
$ curl -b ~/.httpfsauth "http://<HTTPFS_HOST>:14000/webhdfs/v1?op=liststatus"

View File

@ -0,0 +1,52 @@
<!---
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
Hadoop HDFS over HTTP - Documentation Sets
==========================================
HttpFS is a server that provides a REST HTTP gateway supporting all HDFS File System operations (read and write). And it is inteoperable with the **webhdfs** REST HTTP API.
HttpFS can be used to transfer data between clusters running different versions of Hadoop (overcoming RPC versioning issues), for example using Hadoop DistCP.
HttpFS can be used to access data in HDFS on a cluster behind of a firewall (the HttpFS server acts as a gateway and is the only system that is allowed to cross the firewall into the cluster).
HttpFS can be used to access data in HDFS using HTTP utilities (such as curl and wget) and HTTP libraries Perl from other languages than Java.
The **webhdfs** client FileSytem implementation can be used to access HttpFS using the Hadoop filesystem command (`hadoop fs`) line tool as well as from Java aplications using the Hadoop FileSystem Java API.
HttpFS has built-in security supporting Hadoop pseudo authentication and HTTP SPNEGO Kerberos and other pluggable authentication mechanims. It also provides Hadoop proxy user support.
How Does HttpFS Works?
----------------------
HttpFS is a separate service from Hadoop NameNode.
HttpFS itself is Java web-application and it runs using a preconfigured Tomcat bundled with HttpFS binary distribution.
HttpFS HTTP web-service API calls are HTTP REST calls that map to a HDFS file system operation. For example, using the `curl` Unix command:
* `$ curl http://httpfs-host:14000/webhdfs/v1/user/foo/README.txt` returns the contents of the HDFS `/user/foo/README.txt` file.
* `$ curl http://httpfs-host:14000/webhdfs/v1/user/foo?op=list` returns the contents of the HDFS `/user/foo` directory in JSON format.
* `$ curl -X POST http://httpfs-host:14000/webhdfs/v1/user/foo/bar?op=mkdirs` creates the HDFS `/user/foo.bar` directory.
User and Developer Documentation
--------------------------------
* [HttpFS Server Setup](./ServerSetup.html)
* [Using HTTP Tools](./UsingHttpTools.html)

View File

@ -141,6 +141,8 @@ Trunk (Unreleased)
HDFS-7668. Convert site documentation from apt to markdown (Masatake
Iwasaki via aw)
HDFS-7460. Rewrite httpfs to use new shell framework (John Smith via aw)
OPTIMIZATIONS
BUG FIXES