Merge r1414455:r1426018 from trunk

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-2802@1432246 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Suresh Srinivas 2013-01-11 19:40:23 +00:00
commit 93c1b4b25a
228 changed files with 8482 additions and 5616 deletions

View File

@ -11,6 +11,9 @@ Trunk (Unreleased)
NEW FEATURES
HADOOP-8561. Introduce HADOOP_PROXY_USER for secure impersonation in child
hadoop client processes. (Yu Gao via llu)
HADOOP-8469. Make NetworkTopology class pluggable. (Junping Du via
szetszwo)
@ -129,9 +132,6 @@ Trunk (Unreleased)
HADOOP-8776. Provide an option in test-patch that can enable/disable
compiling native code. (Chris Nauroth via suresh)
HADOOP-9004. Allow security unit tests to use external KDC. (Stephen Chu
via suresh)
HADOOP-6616. Improve documentation for rack awareness. (Adam Faris via
jghoman)
@ -141,8 +141,16 @@ Trunk (Unreleased)
HADOOP-9093. Move all the Exception in PathExceptions to o.a.h.fs package.
(suresh)
HADOOP-9140 Cleanup rpc PB protos (sanjay Radia)
HADOOP-9162. Add utility to check native library availability.
(Binglin Chang via suresh)
BUG FIXES
HADOOP-9041. FsUrlStreamHandlerFactory could cause an infinite loop in
FileSystem initialization. (Yanbo Liang and Radim Kolar via llu)
HADOOP-8418. Update UGI Principal classes name for running with
IBM JDK on 64 bits Windows. (Yu Gao via eyang)
@ -295,6 +303,12 @@ Trunk (Unreleased)
HADOOP-9121. InodeTree.java has redundant check for vName while
throwing exception. (Arup Malakar via suresh)
HADOOP-9131. Turn off TestLocalFileSystem#testListStatusWithColons on
Windows. (Chris Nauroth via suresh)
HADOOP-8957 AbstractFileSystem#IsValidName should be overridden for
embedded file systems like ViewFs (Chris Nauroth via Sanjay Radia)
OPTIMIZATIONS
HADOOP-7761. Improve the performance of raw comparisons. (todd)
@ -395,6 +409,17 @@ Release 2.0.3-alpha - Unreleased
HADOOP-9042. Add a test for umask in FileSystemContractBaseTest.
(Colin McCabe via eli)
HADOOP-9127. Update documentation for ZooKeeper Failover Controller.
(Daisuke Kobayashi via atm)
HADOOP-9004. Allow security unit tests to use external KDC. (Stephen Chu
via suresh)
HADOOP-9147. Add missing fields to FIleStatus.toString.
(Jonathan Allen via suresh)
HADOOP-8427. Convert Forrest docs to APT, incremental. (adi2 via tucu)
OPTIMIZATIONS
HADOOP-8866. SampleQuantiles#query is O(N^2) instead of O(N). (Andrew Wang
@ -473,6 +498,24 @@ Release 2.0.3-alpha - Unreleased
HADOOP-9070. Kerberos SASL server cannot find kerberos key. (daryn via atm)
HADOOP-6762. Exception while doing RPC I/O closes channel
(Sam Rash and todd via todd)
HADOOP-9126. FormatZK and ZKFC startup can fail due to zkclient connection
establishment delay. (Rakesh R and todd via todd)
HADOOP-9113. o.a.h.fs.TestDelegationTokenRenewer is failing intermittently.
(Karthik Kambatla via eli)
HADOOP-9135. JniBasedUnixGroupsMappingWithFallback should log at debug
rather than info during fallback. (Colin Patrick McCabe via todd)
HADOOP-9152. HDFS can report negative DFS Used on clusters with very small
amounts of data. (Brock Noland via atm)
HADOOP-9153. Support createNonRecursive in ViewFileSystem.
(Sandy Ryza via tomwhite)
Release 2.0.2-alpha - 2012-09-07
INCOMPATIBLE CHANGES
@ -1184,6 +1227,8 @@ Release 0.23.6 - UNRELEASED
HADOOP-9038. unit-tests for AllocatorPerContext.PathIterator (Ivan A.
Veselovsky via bobby)
HADOOP-9105. FsShell -moveFromLocal erroneously fails (daryn via bobby)
Release 0.23.5 - UNRELEASED
INCOMPATIBLE CHANGES

View File

@ -260,7 +260,7 @@
</Match>
<Match>
<!-- protobuf generated code -->
<Class name="~org\.apache\.hadoop\.ipc\.protobuf\.HadoopRpcProtos.*"/>
<Class name="~org\.apache\.hadoop\.ipc\.protobuf\.ProtobufRpcEngineProtos.*"/>
</Match>
<Match>
<!-- protobuf generated code -->
@ -272,7 +272,7 @@
</Match>
<Match>
<!-- protobuf generated code -->
<Class name="~org\.apache\.hadoop\.ipc\.protobuf\.RpcPayloadHeaderProtos.*"/>
<Class name="~org\.apache\.hadoop\.ipc\.protobuf\.RpcHeaderProtos.*"/>
</Match>
<Match>
<!-- protobuf generated code -->

View File

@ -378,9 +378,9 @@
<argument>src/main/proto/HAServiceProtocol.proto</argument>
<argument>src/main/proto/IpcConnectionContext.proto</argument>
<argument>src/main/proto/ProtocolInfo.proto</argument>
<argument>src/main/proto/RpcPayloadHeader.proto</argument>
<argument>src/main/proto/RpcHeader.proto</argument>
<argument>src/main/proto/ZKFCProtocol.proto</argument>
<argument>src/main/proto/hadoop_rpc.proto</argument>
<argument>src/main/proto/ProtobufRpcEngine.proto</argument>
</arguments>
</configuration>
</execution>

View File

@ -31,6 +31,7 @@ function print_usage(){
echo " fs run a generic filesystem user client"
echo " version print the version"
echo " jar <jar> run a jar file"
echo " checknative [-a|-h] check native hadoop and compression libraries availability"
echo " distcp <srcurl> <desturl> copy file or directories recursively"
echo " archive -archiveName NAME -p <parent path> <src>* <dest> create a hadoop archive"
echo " classpath prints the class path needed to get the"
@ -100,6 +101,8 @@ case $COMMAND in
CLASS=org.apache.hadoop.util.VersionInfo
elif [ "$COMMAND" = "jar" ] ; then
CLASS=org.apache.hadoop.util.RunJar
elif [ "$COMMAND" = "checknative" ] ; then
CLASS=org.apache.hadoop.util.NativeLibraryChecker
elif [ "$COMMAND" = "distcp" ] ; then
CLASS=org.apache.hadoop.tools.DistCp
CLASSPATH=${CLASSPATH}:${TOOL_PATH}

View File

@ -1,127 +0,0 @@
<?xml version="1.0"?>
<!--
Copyright 2002-2004 The Apache Software Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN"
"http://forrest.apache.org/dtd/document-v20.dtd">
<document>
<header>
<title>
Authentication for Hadoop HTTP web-consoles
</title>
</header>
<body>
<section>
<title> Introduction </title>
<p>
This document describes how to configure Hadoop HTTP web-consoles to require user
authentication.
</p>
<p>
By default Hadoop HTTP web-consoles (JobTracker, NameNode, TaskTrackers and DataNodes) allow
access without any form of authentication.
</p>
<p>
Similarly to Hadoop RPC, Hadoop HTTP web-consoles can be configured to require Kerberos
authentication using HTTP SPNEGO protocol (supported by browsers like Firefox and Internet
Explorer).
</p>
<p>
In addition, Hadoop HTTP web-consoles support the equivalent of Hadoop's Pseudo/Simple
authentication. If this option is enabled, user must specify their user name in the first
browser interaction using the <code>user.name</code> query string parameter. For example:
<code>http://localhost:50030/jobtracker.jsp?user.name=babu</code>.
</p>
<p>
If a custom authentication mechanism is required for the HTTP web-consoles, it is possible
to implement a plugin to support the alternate authentication mechanism (refer to
Hadoop hadoop-auth for details on writing an <code>AuthenticatorHandler</code>).
</p>
<p>
The next section describes how to configure Hadoop HTTP web-consoles to require user
authentication.
</p>
</section>
<section>
<title> Configuration </title>
<p>
The following properties should be in the <code>core-site.xml</code> of all the nodes
in the cluster.
</p>
<p><code>hadoop.http.filter.initializers</code>: add to this property the
<code>org.apache.hadoop.security.AuthenticationFilterInitializer</code> initializer class.
</p>
<p><code>hadoop.http.authentication.type</code>: Defines authentication used for the HTTP
web-consoles. The supported values are: <code>simple | kerberos |
#AUTHENTICATION_HANDLER_CLASSNAME#</code>. The dfeault value is <code>simple</code>.
</p>
<p><code>hadoop.http.authentication.token.validity</code>: Indicates how long (in seconds)
an authentication token is valid before it has to be renewed. The default value is
<code>36000</code>.
</p>
<p><code>hadoop.http.authentication.signature.secret.file</code>: The signature secret
file for signing the authentication tokens. If not set a random secret is generated at
startup time. The same secret should be used for all nodes in the cluster, JobTracker,
NameNode, DataNode and TastTracker. The default value is
<code>${user.home}/hadoop-http-auth-signature-secret</code>.
IMPORTANT: This file should be readable only by the Unix user running the daemons.
</p>
<p><code>hadoop.http.authentication.cookie.domain</code>: The domain to use for the HTTP
cookie that stores the authentication token. In order to authentiation to work
correctly across all nodes in the cluster the domain must be correctly set.
There is no default value, the HTTP cookie will not have a domain working only
with the hostname issuing the HTTP cookie.
</p>
<p>
IMPORTANT: when using IP addresses, browsers ignore cookies with domain settings.
For this setting to work properly all nodes in the cluster must be configured
to generate URLs with hostname.domain names on it.
</p>
<p><code>hadoop.http.authentication.simple.anonymous.allowed</code>: Indicates if anonymous
requests are allowed when using 'simple' authentication. The default value is
<code>true</code>
</p>
<p><code>hadoop.http.authentication.kerberos.principal</code>: Indicates the Kerberos
principal to be used for HTTP endpoint when using 'kerberos' authentication.
The principal short name must be <code>HTTP</code> per Kerberos HTTP SPNEGO specification.
The default value is <code>HTTP/_HOST@$LOCALHOST</code>, where <code>_HOST</code> -if present-
is replaced with bind address of the HTTP server.
</p>
<p><code>hadoop.http.authentication.kerberos.keytab</code>: Location of the keytab file
with the credentials for the Kerberos principal used for the HTTP endpoint.
The default value is <code>${user.home}/hadoop.keytab</code>.i
</p>
</section>
</body>
</document>

View File

@ -1,798 +0,0 @@
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" "http://forrest.apache.org/dtd/document-v20.dtd">
<document>
<header>
<title>Hadoop Commands Guide</title>
</header>
<body>
<section>
<title>Overview</title>
<p>
All Hadoop commands are invoked by the bin/hadoop script. Running the Hadoop
script without any arguments prints the description for all commands.
</p>
<p>
<code>Usage: hadoop [--config confdir] [COMMAND] [GENERIC_OPTIONS] [COMMAND_OPTIONS]</code>
</p>
<p>
Hadoop has an option parsing framework that employs parsing generic options as well as running classes.
</p>
<table>
<tr><th> COMMAND_OPTION </th><th> Description </th></tr>
<tr>
<td><code>--config confdir</code></td>
<td>Overwrites the default Configuration directory. Default is ${HADOOP_PREFIX}/conf.</td>
</tr>
<tr>
<td><code>GENERIC_OPTIONS</code></td>
<td>The common set of options supported by multiple commands.</td>
</tr>
<tr>
<td><code>COMMAND</code><br/><code>COMMAND_OPTIONS</code></td>
<td>Various commands with their options are described in the following sections. The commands
have been grouped into <a href="commands_manual.html#User+Commands">User Commands</a>
and <a href="commands_manual.html#Administration+Commands">Administration Commands</a>.</td>
</tr>
</table>
<section>
<title>Generic Options</title>
<p>
The following options are supported by <a href="commands_manual.html#dfsadmin">dfsadmin</a>,
<a href="commands_manual.html#fs">fs</a>, <a href="commands_manual.html#fsck">fsck</a>,
<a href="commands_manual.html#job">job</a> and <a href="commands_manual.html#fetchdt">fetchdt</a>.
Applications should implement
<a href="ext:api/org/apache/hadoop/util/tool">Tool</a> to support
<a href="ext:api/org/apache/hadoop/util/genericoptionsparser">
GenericOptions</a>.
</p>
<table>
<tr><th> GENERIC_OPTION </th><th> Description </th></tr>
<tr>
<td><code>-conf &lt;configuration file&gt;</code></td>
<td>Specify an application configuration file.</td>
</tr>
<tr>
<td><code>-D &lt;property=value&gt;</code></td>
<td>Use value for given property.</td>
</tr>
<tr>
<td><code>-fs &lt;local|namenode:port&gt;</code></td>
<td>Specify a namenode.</td>
</tr>
<tr>
<td><code>-jt &lt;local|jobtracker:port&gt;</code></td>
<td>Specify a job tracker. Applies only to <a href="commands_manual.html#job">job</a>.</td>
</tr>
<tr>
<td><code>-files &lt;comma separated list of files&gt;</code></td>
<td>Specify comma separated files to be copied to the map reduce cluster.
Applies only to <a href="commands_manual.html#job">job</a>.</td>
</tr>
<tr>
<td><code>-libjars &lt;comma seperated list of jars&gt;</code></td>
<td>Specify comma separated jar files to include in the classpath.
Applies only to <a href="commands_manual.html#job">job</a>.</td>
</tr>
<tr>
<td><code>-archives &lt;comma separated list of archives&gt;</code></td>
<td>Specify comma separated archives to be unarchived on the compute machines.
Applies only to <a href="commands_manual.html#job">job</a>.</td>
</tr>
</table>
</section>
</section>
<section>
<title> User Commands </title>
<p>Commands useful for users of a Hadoop cluster.</p>
<section>
<title> archive </title>
<p>
Creates a Hadoop archive. More information see the <a href="ext:hadoop-archives">Hadoop Archives Guide</a>.
</p>
<p>
<code>Usage: hadoop archive -archiveName NAME &lt;src&gt;* &lt;dest&gt;</code>
</p>
<table>
<tr><th> COMMAND_OPTION </th><th> Description </th></tr>
<tr>
<td><code>-archiveName NAME</code></td>
<td>Name of the archive to be created.</td>
</tr>
<tr>
<td><code>src</code></td>
<td>Filesystem pathnames which work as usual with regular expressions.</td>
</tr>
<tr>
<td><code>dest</code></td>
<td>Destination directory which would contain the archive.</td>
</tr>
</table>
</section>
<section>
<title> distcp </title>
<p>
Copy file or directories recursively. More information can be found at <a href="ext:distcp">DistCp Guide</a>.
</p>
<p>
<code>Usage: hadoop distcp &lt;srcurl&gt; &lt;desturl&gt;</code>
</p>
<table>
<tr><th> COMMAND_OPTION </th><th> Description </th></tr>
<tr>
<td><code>srcurl</code></td>
<td>Source Url</td>
</tr>
<tr>
<td><code>desturl</code></td>
<td>Destination Url</td>
</tr>
</table>
</section>
<section>
<title> fs </title>
<p>
Runs a generic filesystem user client.
</p>
<p>
<code>Usage: hadoop fs [</code><a href="commands_manual.html#Generic+Options">GENERIC_OPTIONS</a><code>]
[COMMAND_OPTIONS]</code>
</p>
<p>
The various COMMAND_OPTIONS can be found at
<a href="file_system_shell.html">File System Shell Guide</a>.
</p>
</section>
<section>
<title> fsck </title>
<p>
Runs a HDFS filesystem checking utility. See <a href="http://hadoop.apache.org/hdfs/docs/current/hdfs_user_guide.html#Fsck">Fsck</a> for more info.
</p>
<p><code>Usage: hadoop fsck [</code><a href="commands_manual.html#Generic+Options">GENERIC_OPTIONS</a><code>]
&lt;path&gt; [-move | -delete | -openforwrite] [-files [-blocks
[-locations | -racks]]]</code></p>
<table>
<tr><th> COMMAND_OPTION </th><th> Description </th></tr>
<tr>
<td><code>&lt;path&gt;</code></td>
<td>Start checking from this path.</td>
</tr>
<tr>
<td><code>-move</code></td>
<td>Move corrupted files to /lost+found</td>
</tr>
<tr>
<td><code>-delete</code></td>
<td>Delete corrupted files.</td>
</tr>
<tr>
<td><code>-openforwrite</code></td>
<td>Print out files opened for write.</td>
</tr>
<tr>
<td><code>-files</code></td>
<td>Print out files being checked.</td>
</tr>
<tr>
<td><code>-blocks</code></td>
<td>Print out block report.</td>
</tr>
<tr>
<td><code>-locations</code></td>
<td>Print out locations for every block.</td>
</tr>
<tr>
<td><code>-racks</code></td>
<td>Print out network topology for data-node locations.</td>
</tr>
</table>
</section>
<section>
<title> fetchdt </title>
<p>
Gets Delegation Token from a NameNode. See <a href="http://hadoop.apache.org/hdfs/docs/current/hdfs_user_guide.html#fetchdt">fetchdt</a> for more info.
</p>
<p><code>Usage: hadoop fetchdt [</code><a href="commands_manual.html#Generic+Options">GENERIC_OPTIONS</a><code>]
[--webservice &lt;namenode_http_addr&gt;] &lt;file_name&gt; </code></p>
<table>
<tr><th> COMMAND_OPTION </th><th> Description </th></tr>
<tr>
<td><code>&lt;file_name&gt;</code></td>
<td>File name to store the token into.</td>
</tr>
<tr>
<td><code>--webservice &lt;https_address&gt;</code></td>
<td>use http protocol instead of RPC</td>
</tr>
</table>
</section>
<section>
<title> jar </title>
<p>
Runs a jar file. Users can bundle their Map Reduce code in a jar file and execute it using this command.
</p>
<p>
<code>Usage: hadoop jar &lt;jar&gt; [mainClass] args...</code>
</p>
<p>
The streaming jobs are run via this command. For examples, see
<a href="ext:streaming">Hadoop Streaming</a>.
</p>
<p>
The WordCount example is also run using jar command. For examples, see the
<a href="ext:mapred-tutorial">MapReduce Tutorial</a>.
</p>
</section>
<section>
<title> job </title>
<p>
Command to interact with Map Reduce Jobs.
</p>
<p>
<code>Usage: hadoop job [</code><a href="commands_manual.html#Generic+Options">GENERIC_OPTIONS</a><code>]
[-submit &lt;job-file&gt;] | [-status &lt;job-id&gt;] |
[-counter &lt;job-id&gt; &lt;group-name&gt; &lt;counter-name&gt;] | [-kill &lt;job-id&gt;] |
[-events &lt;job-id&gt; &lt;from-event-#&gt; &lt;#-of-events&gt;] | [-history [all] &lt;historyFile&gt;] |
[-list [all]] | [-kill-task &lt;task-id&gt;] | [-fail-task &lt;task-id&gt;] |
[-set-priority &lt;job-id&gt; &lt;priority&gt;]</code>
</p>
<table>
<tr><th> COMMAND_OPTION </th><th> Description </th></tr>
<tr>
<td><code>-submit &lt;job-file&gt;</code></td>
<td>Submits the job.</td>
</tr>
<tr>
<td><code>-status &lt;job-id&gt;</code></td>
<td>Prints the map and reduce completion percentage and all job counters.</td>
</tr>
<tr>
<td><code>-counter &lt;job-id&gt; &lt;group-name&gt; &lt;counter-name&gt;</code></td>
<td>Prints the counter value.</td>
</tr>
<tr>
<td><code>-kill &lt;job-id&gt;</code></td>
<td>Kills the job.</td>
</tr>
<tr>
<td><code>-events &lt;job-id&gt; &lt;from-event-#&gt; &lt;#-of-events&gt;</code></td>
<td>Prints the events' details received by jobtracker for the given range.</td>
</tr>
<tr>
<td><code>-history [all] &lt;historyFile&gt;</code></td>
<td>-history &lt;historyFile&gt; prints job details, failed and killed tip details. More details
about the job such as successful tasks and task attempts made for each task can be viewed by
specifying the [all] option. </td>
</tr>
<tr>
<td><code>-list [all]</code></td>
<td>-list all displays all jobs. -list displays only jobs which are yet to complete.</td>
</tr>
<tr>
<td><code>-kill-task &lt;task-id&gt;</code></td>
<td>Kills the task. Killed tasks are NOT counted against failed attempts.</td>
</tr>
<tr>
<td><code>-fail-task &lt;task-id&gt;</code></td>
<td>Fails the task. Failed tasks are counted against failed attempts.</td>
</tr>
<tr>
<td><code>-set-priority &lt;job-id&gt; &lt;priority&gt;</code></td>
<td>Changes the priority of the job.
Allowed priority values are VERY_HIGH, HIGH, NORMAL, LOW, VERY_LOW</td>
</tr>
</table>
</section>
<section>
<title> pipes </title>
<p>
Runs a pipes job.
</p>
<p>
<code>Usage: hadoop pipes [-conf &lt;path&gt;] [-jobconf &lt;key=value&gt;, &lt;key=value&gt;, ...]
[-input &lt;path&gt;] [-output &lt;path&gt;] [-jar &lt;jar file&gt;] [-inputformat &lt;class&gt;]
[-map &lt;class&gt;] [-partitioner &lt;class&gt;] [-reduce &lt;class&gt;] [-writer &lt;class&gt;]
[-program &lt;executable&gt;] [-reduces &lt;num&gt;] </code>
</p>
<table>
<tr><th> COMMAND_OPTION </th><th> Description </th></tr>
<tr>
<td><code>-conf &lt;path&gt;</code></td>
<td>Configuration for job</td>
</tr>
<tr>
<td><code>-jobconf &lt;key=value&gt;, &lt;key=value&gt;, ...</code></td>
<td>Add/override configuration for job</td>
</tr>
<tr>
<td><code>-input &lt;path&gt;</code></td>
<td>Input directory</td>
</tr>
<tr>
<td><code>-output &lt;path&gt;</code></td>
<td>Output directory</td>
</tr>
<tr>
<td><code>-jar &lt;jar file&gt;</code></td>
<td>Jar filename</td>
</tr>
<tr>
<td><code>-inputformat &lt;class&gt;</code></td>
<td>InputFormat class</td>
</tr>
<tr>
<td><code>-map &lt;class&gt;</code></td>
<td>Java Map class</td>
</tr>
<tr>
<td><code>-partitioner &lt;class&gt;</code></td>
<td>Java Partitioner</td>
</tr>
<tr>
<td><code>-reduce &lt;class&gt;</code></td>
<td>Java Reduce class</td>
</tr>
<tr>
<td><code>-writer &lt;class&gt;</code></td>
<td>Java RecordWriter</td>
</tr>
<tr>
<td><code>-program &lt;executable&gt;</code></td>
<td>Executable URI</td>
</tr>
<tr>
<td><code>-reduces &lt;num&gt;</code></td>
<td>Number of reduces</td>
</tr>
</table>
</section>
<section>
<title> queue </title>
<p>
command to interact and view Job Queue information
</p>
<p>
<code>Usage : hadoop queue [-list] | [-info &lt;job-queue-name&gt; [-showJobs]] | [-showacls]</code>
</p>
<table>
<tr>
<th> COMMAND_OPTION </th><th> Description </th>
</tr>
<tr>
<td><anchor id="QueuesList"/><code>-list</code> </td>
<td>Gets list of Job Queues configured in the system. Along with scheduling information
associated with the job queues.
</td>
</tr>
<tr>
<td><anchor id="QueuesInfo"/><code>-info &lt;job-queue-name&gt; [-showJobs]</code></td>
<td>
Displays the job queue information and associated scheduling information of particular
job queue. If -showJobs options is present a list of jobs submitted to the particular job
queue is displayed.
</td>
</tr>
<tr>
<td><code>-showacls</code></td>
<td>Displays the queue name and associated queue operations allowed for the current user.
The list consists of only those queues to which the user has access.
</td>
</tr>
</table>
</section>
<section>
<title> version </title>
<p>
Prints the version.
</p>
<p>
<code>Usage: hadoop version</code>
</p>
</section>
<section>
<title> CLASSNAME </title>
<p>
Hadoop script can be used to invoke any class.
</p>
<p>
Runs the class named CLASSNAME.
</p>
<p>
<code>Usage: hadoop CLASSNAME</code>
</p>
</section>
</section>
<section>
<title> Administration Commands </title>
<p>Commands useful for administrators of a Hadoop cluster.</p>
<section>
<title> balancer </title>
<p>
Runs a cluster balancing utility. An administrator can simply press Ctrl-C to stop the
rebalancing process. For more details see
<a href="http://hadoop.apache.org/hdfs/docs/current/hdfs_user_guide.html#Rebalancer">Rebalancer</a>.
</p>
<p>
<code>Usage: hadoop balancer [-policy &lt;blockpool|datanode&gt;] [-threshold &lt;threshold&gt;]</code>
</p>
<table>
<tr><th> COMMAND_OPTION </th><th> Description </th></tr>
<tr>
<td><code>-policy &lt;blockpool|datanode&gt;</code></td>
<td>The balancing policy.
<br /><code>datanode</code>: Cluster is balance if the disk usage of each datanode is balance.
<br /><code>blockpool</code>: Cluster is balance if the disk usage of each block pool in each datanode is balance.
<br />Note that <code>blockpool</code> is a condition stronger than <code>datanode</code>.
The default policy is <code>datanode</code>.
</td>
</tr>
<tr>
<td><code>-threshold &lt;threshold&gt;</code></td>
<td>Percentage of disk capacity. This default threshold is 10%.</td>
</tr>
</table>
</section>
<section>
<title> daemonlog </title>
<p>
Get/Set the log level for each daemon.
</p>
<p>
<code>Usage: hadoop daemonlog -getlevel &lt;host:port&gt; &lt;name&gt;</code><br/>
<code>Usage: hadoop daemonlog -setlevel &lt;host:port&gt; &lt;name&gt; &lt;level&gt;</code>
</p>
<table>
<tr><th> COMMAND_OPTION </th><th> Description </th></tr>
<tr>
<td><code>-getlevel &lt;host:port&gt; &lt;name&gt;</code></td>
<td>Prints the log level of the daemon running at &lt;host:port&gt;.
This command internally connects to http://&lt;host:port&gt;/logLevel?log=&lt;name&gt;</td>
</tr>
<tr>
<td><code>-setlevel &lt;host:port&gt; &lt;name&gt; &lt;level&gt;</code></td>
<td>Sets the log level of the daemon running at &lt;host:port&gt;.
This command internally connects to http://&lt;host:port&gt;/logLevel?log=&lt;name&gt;</td>
</tr>
</table>
</section>
<section>
<title> datanode</title>
<p>
Runs a HDFS datanode.
</p>
<p>
<code>Usage: hadoop datanode [-rollback]</code>
</p>
<table>
<tr><th> COMMAND_OPTION </th><th> Description </th></tr>
<tr>
<td><code>-rollback</code></td>
<td>Rollsback the datanode to the previous version. This should be used after stopping the datanode
and distributing the old Hadoop version.</td>
</tr>
</table>
</section>
<section>
<title> dfsadmin </title>
<p>
Runs a HDFS dfsadmin client.
</p>
<p>
<code>Usage: hadoop dfsadmin [</code><a href="commands_manual.html#Generic+Options">GENERIC_OPTIONS</a><code>] [-report] [-safemode enter | leave | get | wait] [-refreshNodes]
[-finalizeUpgrade] [-upgradeProgress status | details | force] [-metasave filename]
[-setQuota &lt;quota&gt; &lt;dirname&gt;...&lt;dirname&gt;] [-clrQuota &lt;dirname&gt;...&lt;dirname&gt;]
[-restoreFailedStorage true|false|check]
[-help [cmd]]</code>
</p>
<table>
<tr><th> COMMAND_OPTION </th><th> Description </th></tr>
<tr>
<td><code>-report</code></td>
<td>Reports basic filesystem information and statistics.</td>
</tr>
<tr>
<td><code>-safemode enter | leave | get | wait</code></td>
<td>Safe mode maintenance command.
Safe mode is a Namenode state in which it <br/>
1. does not accept changes to the name space (read-only) <br/>
2. does not replicate or delete blocks. <br/>
Safe mode is entered automatically at Namenode startup, and
leaves safe mode automatically when the configured minimum
percentage of blocks satisfies the minimum replication
condition. Safe mode can also be entered manually, but then
it can only be turned off manually as well.</td>
</tr>
<tr>
<td><code>-refreshNodes</code></td>
<td>Re-read the hosts and exclude files to update the set
of Datanodes that are allowed to connect to the Namenode
and those that should be decommissioned or recommissioned.</td>
</tr>
<tr>
<td><code>-finalizeUpgrade</code></td>
<td>Finalize upgrade of HDFS.
Datanodes delete their previous version working directories,
followed by Namenode doing the same.
This completes the upgrade process.</td>
</tr>
<tr>
<td><code>-printTopology</code></td>
<td>Print a tree of the rack/datanode topology of the
cluster as seen by the NameNode.</td>
</tr>
<tr>
<td><code>-upgradeProgress status | details | force</code></td>
<td>Request current distributed upgrade status,
a detailed status or force the upgrade to proceed.</td>
</tr>
<tr>
<td><code>-metasave filename</code></td>
<td>Save Namenode's primary data structures
to &lt;filename&gt; in the directory specified by hadoop.log.dir property.
&lt;filename&gt; will contain one line for each of the following <br/>
1. Datanodes heart beating with Namenode<br/>
2. Blocks waiting to be replicated<br/>
3. Blocks currrently being replicated<br/>
4. Blocks waiting to be deleted</td>
</tr>
<tr>
<td><code>-setQuota &lt;quota&gt; &lt;dirname&gt;...&lt;dirname&gt;</code></td>
<td>Set the quota &lt;quota&gt; for each directory &lt;dirname&gt;.
The directory quota is a long integer that puts a hard limit on the number of names in the directory tree.<br/>
Best effort for the directory, with faults reported if<br/>
1. N is not a positive integer, or<br/>
2. user is not an administrator, or<br/>
3. the directory does not exist or is a file, or<br/>
4. the directory would immediately exceed the new quota.</td>
</tr>
<tr>
<td><code>-clrQuota &lt;dirname&gt;...&lt;dirname&gt;</code></td>
<td>Clear the quota for each directory &lt;dirname&gt;.<br/>
Best effort for the directory. with fault reported if<br/>
1. the directory does not exist or is a file, or<br/>
2. user is not an administrator.<br/>
It does not fault if the directory has no quota.</td>
</tr>
<tr>
<td><code>-restoreFailedStorage true | false | check</code></td>
<td>This option will turn on/off automatic attempt to restore failed storage replicas.
If a failed storage becomes available again the system will attempt to restore
edits and/or fsimage during checkpoint. 'check' option will return current setting.</td>
</tr>
<tr>
<td><code>-help [cmd]</code></td>
<td> Displays help for the given command or all commands if none
is specified.</td>
</tr>
</table>
</section>
<section>
<title>mradmin</title>
<p>Runs MR admin client</p>
<p><code>Usage: hadoop mradmin [</code>
<a href="commands_manual.html#Generic+Options">GENERIC_OPTIONS</a>
<code>] [-refreshServiceAcl] [-refreshQueues] [-refreshNodes] [-help [cmd]] </code></p>
<table>
<tr>
<th> COMMAND_OPTION </th><th> Description </th>
</tr>
<tr>
<td><code>-refreshServiceAcl</code></td>
<td> Reload the service-level authorization policies. Jobtracker
will reload the authorization policy file.</td>
</tr>
<tr>
<td><anchor id="RefreshQueues"/><code>-refreshQueues</code></td>
<td><p> Reload the queues' configuration at the JobTracker.
Most of the configuration of the queues can be refreshed/reloaded
without restarting the Map/Reduce sub-system. Administrators
typically own the
<a href="cluster_setup.html#mapred-queues.xml">
<em>conf/mapred-queues.xml</em></a>
file, can edit it while the JobTracker is still running, and can do
a reload by running this command.</p>
<p>It should be noted that while trying to refresh queues'
configuration, one cannot change the hierarchy of queues itself.
This means no operation that involves a change in either the
hierarchy structure itself or the queues' names will be allowed.
Only selected properties of queues can be changed during refresh.
For example, new queues cannot be added dynamically, neither can an
existing queue be deleted.</p>
<p>If during a reload of queue configuration,
a syntactic or semantic error in made during the editing of the
configuration file, the refresh command fails with an exception that
is printed on the standard output of this command, thus informing the
requester with any helpful messages of what has gone wrong during
the edit/reload. Importantly, the existing queue configuration is
untouched and the system is left in a consistent state.
</p>
<p>As described in the
<a href="cluster_setup.html#mapred-queues.xml"><em>
conf/mapred-queues.xml</em></a> section, the
<a href="cluster_setup.html#properties_tag"><em>
&lt;properties&gt;</em></a> tag in the queue configuration file can
also be used to specify per-queue properties needed by the scheduler.
When the framework's queue configuration is reloaded using this
command, this scheduler specific configuration will also be reloaded
, provided the scheduler being configured supports this reload.
Please see the documentation of the particular scheduler in use.</p>
</td>
</tr>
<tr>
<td><code>-refreshNodes</code></td>
<td> Refresh the hosts information at the jobtracker.</td>
</tr>
<tr>
<td><code>-help [cmd]</code></td>
<td>Displays help for the given command or all commands if none
is specified.</td>
</tr>
</table>
</section>
<section>
<title> jobtracker </title>
<p>
Runs the MapReduce job Tracker node.
</p>
<p>
<code>Usage: hadoop jobtracker [-dumpConfiguration]</code>
</p>
<table>
<tr>
<th>COMMAND_OPTION</th><th> Description</th>
</tr>
<tr>
<td><code>-dumpConfiguration</code></td>
<td> Dumps the configuration used by the JobTracker alongwith queue
configuration in JSON format into Standard output used by the
jobtracker and exits.</td>
</tr>
</table>
</section>
<section>
<title> namenode </title>
<p>
Runs the namenode. For more information about upgrade, rollback and finalize see
<a href="http://hadoop.apache.org/hdfs/docs/current/hdfs_user_guide.html#Upgrade+and+Rollback">Upgrade and Rollback</a>.
</p>
<p>
<code>Usage: hadoop namenode [-format [-force] [-nonInteractive] [-clusterid someid]] | [-upgrade] | [-rollback] | [-finalize] | [-importCheckpoint] | [-checkpoint] | [-backup]</code>
</p>
<table>
<tr><th> COMMAND_OPTION </th><th> Description </th></tr>
<tr>
<td><code>-regular</code></td>
<td>Start namenode in standard, active role rather than as backup or checkpoint node. This is the default role.</td>
</tr>
<tr>
<td><code>-checkpoint</code></td>
<td>Start namenode in checkpoint role, creating periodic checkpoints of the active namenode metadata.</td>
</tr>
<tr>
<td><code>-backup</code></td>
<td>Start namenode in backup role, maintaining an up-to-date in-memory copy of the namespace and creating periodic checkpoints.</td>
</tr>
<tr>
<td><code>-format [-force] [-nonInteractive] [-clusterid someid]</code></td>
<td>Formats the namenode. It starts the namenode, formats it and then shuts it down. User will be prompted before formatting any non empty name directories in the local filesystem.<br/>
-nonInteractive: User will not be prompted for input if non empty name directories exist in the local filesystem and the format will fail.<br/>
-force: Formats the namenode and the user will NOT be prompted to confirm formatting of the name directories in the local filesystem. If -nonInteractive option is specified it will be ignored.<br/>
-clusterid: Associates the namenode with the id specified. When formatting federated namenodes use this option to make sure all namenodes are associated with the same id.</td>
</tr>
<tr>
<td><code>-upgrade</code></td>
<td>Namenode should be started with upgrade option after the distribution of new Hadoop version.</td>
</tr>
<tr>
<td><code>-rollback</code></td>
<td>Rollsback the namenode to the previous version. This should be used after stopping the cluster
and distributing the old Hadoop version.</td>
</tr>
<tr>
<td><code>-finalize</code></td>
<td>Finalize will remove the previous state of the files system. Recent upgrade will become permanent.
Rollback option will not be available anymore. After finalization it shuts the namenode down.</td>
</tr>
<tr>
<td><code>-importCheckpoint</code></td>
<td>Loads image from a checkpoint directory and saves it into the current one. Checkpoint directory
is read from property dfs.namenode.checkpoint.dir
(see <a href="http://hadoop.apache.org/hdfs/docs/current/hdfs_user_guide.html#Import+checkpoint">Import Checkpoint</a>).
</td>
</tr>
<tr>
<td><code>-checkpoint</code></td>
<td>Enables checkpointing
(see <a href="http://hadoop.apache.org/hdfs/docs/current/hdfs_user_guide.html#Checkpoint+Node">Checkpoint Node</a>).</td>
</tr>
<tr>
<td><code>-backup</code></td>
<td>Enables checkpointing and maintains an in-memory, up-to-date copy of the file system namespace
(see <a href="http://hadoop.apache.org/hdfs/docs/current/hdfs_user_guide.html#Backup+Node">Backup Node</a>).</td>
</tr>
</table>
</section>
<section>
<title> secondarynamenode </title>
<p>
Runs the HDFS secondary
namenode. See <a href="http://hadoop.apache.org/hdfs/docs/current/hdfs_user_guide.html#Secondary+NameNode">Secondary NameNode</a>
for more info.
</p>
<p>
<code>Usage: hadoop secondarynamenode [-checkpoint [force]] | [-geteditsize]</code>
</p>
<table>
<tr><th> COMMAND_OPTION </th><th> Description </th></tr>
<tr>
<td><code>-checkpoint [force]</code></td>
<td>Checkpoints the Secondary namenode if EditLog size >= dfs.namenode.checkpoint.size.
If -force is used, checkpoint irrespective of EditLog size.</td>
</tr>
<tr>
<td><code>-geteditsize</code></td>
<td>Prints the EditLog size.</td>
</tr>
</table>
</section>
<section>
<title> tasktracker </title>
<p>
Runs a MapReduce task Tracker node.
</p>
<p>
<code>Usage: hadoop tasktracker</code>
</p>
</section>
</section>
</body>
</document>

View File

@ -1,594 +0,0 @@
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" "http://forrest.apache.org/dtd/document-v20.dtd">
<document>
<header>
<title>File System Shell Guide</title>
</header>
<body>
<section>
<title>Overview</title>
<p>
The File System (FS) shell includes various shell-like commands that directly
interact with the Hadoop Distributed File System (HDFS) as well as other file systems that Hadoop supports,
such as Local FS, HFTP FS, S3 FS, and others. The FS shell is invoked by: </p>
<source>bin/hdfs dfs &lt;args&gt;</source>
<p>
All FS shell commands take path URIs as arguments. The URI
format is <em>scheme://autority/path</em>. For HDFS the scheme
is <em>hdfs</em>, and for the Local FS the scheme
is <em>file</em>. The scheme and authority are optional. If not
specified, the default scheme specified in the configuration is
used. An HDFS file or directory such as <em>/parent/child</em>
can be specified as <em>hdfs://namenodehost/parent/child</em> or
simply as <em>/parent/child</em> (given that your configuration
is set to point to <em>hdfs://namenodehost</em>).
</p>
<p>
Most of the commands in FS shell behave like corresponding Unix
commands. Differences are described with each of the
commands. Error information is sent to <em>stderr</em> and the
output is sent to <em>stdout</em>.
</p>
<!-- CAT -->
<section>
<title> cat </title>
<p>
<code>Usage: hdfs dfs -cat URI [URI &#x2026;]</code>
</p>
<p>
Copies source paths to <em>stdout</em>.
</p>
<p>Example:</p>
<ul>
<li>
<code> hdfs dfs -cat hdfs://nn1.example.com/file1 hdfs://nn2.example.com/file2
</code>
</li>
<li>
<code>hdfs dfs -cat file:///file3 /user/hadoop/file4 </code>
</li>
</ul>
<p>Exit Code:<br/>
<code> Returns 0 on success and -1 on error. </code></p>
</section>
<!-- CHGRP -->
<section>
<title> chgrp </title>
<p>
<code>Usage: hdfs dfs -chgrp [-R] GROUP URI [URI &#x2026;]</code>
</p>
<p>
Change group association of files. With <code>-R</code>, make the change recursively through the directory structure.
The user must be the owner of files, or else a super-user.
Additional information is in the <a href="http://hadoop.apache.org/hdfs/docs/current/hdfs_permissions_guide.html">HDFS Permissions Guide</a>.
</p>
</section>
<section>
<title> chmod </title>
<p>
<code>Usage: hdfs dfs -chmod [-R] &lt;MODE[,MODE]... | OCTALMODE&gt; URI [URI &#x2026;]</code>
</p>
<p>
Change the permissions of files. With <code>-R</code>, make the change recursively through the directory structure.
The user must be the owner of the file, or else a super-user.
Additional information is in the <a href="http://hadoop.apache.org/hdfs/docs/current/hdfs_permissions_guide.html">HDFS Permissions Guide</a>.
</p>
</section>
<!-- CHOWN -->
<section>
<title> chown </title>
<p>
<code>Usage: hdfs dfs -chown [-R] [OWNER][:[GROUP]] URI [URI ]</code>
</p>
<p>
Change the owner of files. With <code>-R</code>, make the change recursively through the directory structure.
The user must be a super-user.
Additional information is in the <a href="http://hadoop.apache.org/hdfs/docs/current/hdfs_permissions_guide.html">HDFS Permissions Guide</a>.
</p>
</section>
<!-- COPYFROMLOCAL -->
<section>
<title>copyFromLocal</title>
<p>
<code>Usage: hdfs dfs -copyFromLocal &lt;localsrc&gt; URI</code>
</p>
<p>Similar to <a href="#put"><strong>put</strong></a> command, except that the source is restricted to a local file reference. </p>
</section>
<!-- COPYTOLOCAL -->
<section>
<title> copyToLocal</title>
<p>
<code>Usage: hdfs dfs -copyToLocal [-ignorecrc] [-crc] URI &lt;localdst&gt;</code>
</p>
<p> Similar to <a href="#get"><strong>get</strong></a> command, except that the destination is restricted to a local file reference.</p>
</section>
<!-- COUNT -->
<section>
<title> count </title>
<p>
<code>Usage: hdfs dfs -count [-q] &lt;paths&gt;</code>
</p>
<p>
Count the number of directories, files and bytes under the paths that match the specified file pattern. <br/><br/>
The output columns with <code>-count </code> are:<br/><br/>
<code>DIR_COUNT, FILE_COUNT, CONTENT_SIZE FILE_NAME</code> <br/><br/>
The output columns with <code>-count -q</code> are:<br/><br/>
<code>QUOTA, REMAINING_QUATA, SPACE_QUOTA, REMAINING_SPACE_QUOTA,
DIR_COUNT, FILE_COUNT, CONTENT_SIZE, FILE_NAME</code>
</p>
<p>Example:</p>
<ul>
<li>
<code> hdfs dfs -count hdfs://nn1.example.com/file1 hdfs://nn2.example.com/file2
</code>
</li>
<li>
<code> hdfs dfs -count -q hdfs://nn1.example.com/file1
</code>
</li>
</ul>
<p>Exit Code:</p>
<p>
<code> Returns 0 on success and -1 on error.</code>
</p>
</section>
<!-- CP -->
<section>
<title> cp </title>
<p>
<code>Usage: hdfs dfs -cp URI [URI &#x2026;] &lt;dest&gt;</code>
</p>
<p>
Copy files from source to destination. This command allows multiple sources as well in which case the destination must be a directory.
<br/>
Example:</p>
<ul>
<li>
<code> hdfs dfs -cp /user/hadoop/file1 /user/hadoop/file2</code>
</li>
<li>
<code> hdfs dfs -cp /user/hadoop/file1 /user/hadoop/file2 /user/hadoop/dir </code>
</li>
</ul>
<p>Exit Code:</p>
<p>
<code> Returns 0 on success and -1 on error.</code>
</p>
</section>
<!-- DU -->
<section>
<title>du</title>
<p>
<code>Usage: hdfs dfs -du [-s] [-h] URI [URI &#x2026;]</code>
</p>
<p>
Displays sizes of files and directories contained in the given directory or the length of a file in case its just a file.</p>
<p>Options:</p>
<ul>
<li>The <code>-s</code> option will result in an aggregate summary of file lengths being displayed, rather than the individual files.</li>
<li>The <code>-h</code> option will format file sizes in a &quot;human-readable&quot; fashion (e.g 64.0m instead of 67108864)</li>
</ul>
<p>
Example:<br/><code>hdfs dfs -du /user/hadoop/dir1 /user/hadoop/file1 hdfs://nn.example.com/user/hadoop/dir1</code><br/>
Exit Code:<br/><code> Returns 0 on success and -1 on error. </code><br/></p>
</section>
<!-- DUS -->
<section>
<title> dus </title>
<p>
<code>Usage: hdfs dfs -dus &lt;args&gt;</code>
</p>
<p>
Displays a summary of file lengths. This is an alternate form of <code>hdfs dfs -du -s</code>.
</p>
</section>
<!-- EXPUNGE -->
<section>
<title> expunge </title>
<p>
<code>Usage: hdfs dfs -expunge</code>
</p>
<p>Empty the Trash. Refer to the <a href="http://hadoop.apache.org/hdfs/docs/current/hdfs_design.html">HDFS Architecture Guide</a>
for more information on the Trash feature.</p>
</section>
<!-- GET -->
<section>
<title> get </title>
<p>
<code>Usage: hdfs dfs -get [-ignorecrc] [-crc] &lt;src&gt; &lt;localdst&gt;</code>
<br/>
</p>
<p>
Copy files to the local file system. Files that fail the CRC check may be copied with the
<code>-ignorecrc</code> option. Files and CRCs may be copied using the
<code>-crc</code> option.
</p>
<p>Example:</p>
<ul>
<li>
<code> hdfs dfs -get /user/hadoop/file localfile </code>
</li>
<li>
<code> hdfs dfs -get hdfs://nn.example.com/user/hadoop/file localfile</code>
</li>
</ul>
<p>Exit Code:</p>
<p>
<code> Returns 0 on success and -1 on error. </code>
</p>
</section>
<!-- GETMERGE -->
<section>
<title> getmerge </title>
<p>
<code>Usage: hdfs dfs -getmerge [-nl] &lt;src&gt; &lt;localdst&gt;</code>
</p>
<p>
Takes a source directory and a destination file as input and concatenates files in src into the destination local file.
Optionally <code>-nl</code> flag can be set to enable adding a newline character at the end of each file during merge.
</p>
</section>
<!-- LS -->
<section>
<title>ls</title>
<p>
<code>Usage: hdfs dfs -ls [-d] [-h] [-R] &lt;args&gt;</code>
</p>
<p>For a file returns stat on the file with the following format:</p>
<p>
<code>permissions number_of_replicas userid groupid filesize modification_date modification_time filename</code>
</p>
<p>For a directory it returns list of its direct children as in unix.A directory is listed as:</p>
<p>
<code>permissions userid groupid modification_date modification_time dirname</code>
</p>
<p>Options:</p>
<ul>
<li><code>-d</code> Directories are listed as plain files</li>
<li><code>-h</code> Format file sizes in a &quot;human-readable&quot; fashion (e.g 64.0m instead of 67108864)</li>
<li><code>-R</code> Recursively list subdirectories encountered</li>
</ul>
<p>Example:</p>
<p>
<code>hdfs dfs -ls /user/hadoop/file1 </code>
</p>
<p>Exit Code:</p>
<p>
<code>Returns 0 on success and -1 on error.</code>
</p>
</section>
<!-- LSR -->
<section>
<title>lsr</title>
<p><code>Usage: hdfs dfs -lsr &lt;args&gt;</code><br/>
Recursive version of <code>ls</code>. Similar to Unix <code>ls -R</code>.
</p>
</section>
<!-- MKDIR -->
<section>
<title> mkdir </title>
<p>
<code>Usage: hdfs dfs -mkdir &lt;paths&gt;</code>
<br/>
</p>
<p>
Takes path uri's as argument and creates directories. The behavior is much like unix mkdir -p creating parent directories along the path.
</p>
<p>Example:</p>
<ul>
<li>
<code>hdfs dfs -mkdir /user/hadoop/dir1 /user/hadoop/dir2 </code>
</li>
<li>
<code>hdfs dfs -mkdir hdfs://nn1.example.com/user/hadoop/dir hdfs://nn2.example.com/user/hadoop/dir
</code>
</li>
</ul>
<p>Exit Code:</p>
<p>
<code>Returns 0 on success and -1 on error.</code>
</p>
</section>
<!-- MOVEFROMLOCAL -->
<section>
<title> moveFromLocal </title>
<p>
<code>Usage: dfs -moveFromLocal &lt;localsrc&gt; &lt;dst&gt;</code>
</p>
<p>Similar to <a href="#put"><strong>put</strong></a> command, except that the source <code>localsrc</code> is deleted after it's copied. </p>
</section>
<!-- MOVETOLOCAL -->
<section>
<title> moveToLocal</title>
<p>
<code>Usage: hdfs dfs -moveToLocal [-crc] &lt;src&gt; &lt;dst&gt;</code>
</p>
<p>Displays a "Not implemented yet" message.</p>
</section>
<!-- MV -->
<section>
<title> mv </title>
<p>
<code>Usage: hdfs dfs -mv URI [URI &#x2026;] &lt;dest&gt;</code>
</p>
<p>
Moves files from source to destination. This command allows multiple sources as well in which case the destination needs to be a directory.
Moving files across file systems is not permitted.
<br/>
Example:
</p>
<ul>
<li>
<code> hdfs dfs -mv /user/hadoop/file1 /user/hadoop/file2</code>
</li>
<li>
<code> hdfs dfs -mv hdfs://nn.example.com/file1 hdfs://nn.example.com/file2 hdfs://nn.example.com/file3 hdfs://nn.example.com/dir1</code>
</li>
</ul>
<p>Exit Code:</p>
<p>
<code> Returns 0 on success and -1 on error.</code>
</p>
</section>
<!-- PUT -->
<section>
<title> put </title>
<p>
<code>Usage: hdfs dfs -put &lt;localsrc&gt; ... &lt;dst&gt;</code>
</p>
<p>Copy single src, or multiple srcs from local file system to the destination file system.
Also reads input from stdin and writes to destination file system.<br/>
</p>
<ul>
<li>
<code> hdfs dfs -put localfile /user/hadoop/hadoopfile</code>
</li>
<li>
<code> hdfs dfs -put localfile1 localfile2 /user/hadoop/hadoopdir</code>
</li>
<li>
<code> hdfs dfs -put localfile hdfs://nn.example.com/hadoop/hadoopfile</code>
</li>
<li><code>hdfs dfs -put - hdfs://nn.example.com/hadoop/hadoopfile</code><br/>Reads the input from stdin.</li>
</ul>
<p>Exit Code:</p>
<p>
<code> Returns 0 on success and -1 on error. </code>
</p>
</section>
<!-- RM -->
<section>
<title> rm </title>
<p>
<code>Usage: hdfs dfs -rm [-skipTrash] URI [URI &#x2026;] </code>
</p>
<p>
Delete files specified as args. Only deletes files. If the <code>-skipTrash</code> option
is specified, the trash, if enabled, will be bypassed and the specified file(s) deleted immediately. This can be
useful when it is necessary to delete files from an over-quota directory.
Use -rm -r or rmr for recursive deletes.<br/>
Example:
</p>
<ul>
<li>
<code> hdfs dfs -rm hdfs://nn.example.com/file </code>
</li>
</ul>
<p>Exit Code:</p>
<p>
<code> Returns 0 on success and -1 on error.</code>
</p>
</section>
<!-- RMR -->
<section>
<title> rmr </title>
<p>
<code>Usage: hdfs dfs -rmr [-skipTrash] URI [URI &#x2026;]</code>
</p>
<p>Recursive version of delete. The rmr command recursively deletes the directory and any content under it. If the <code>-skipTrash</code> option
is specified, the trash, if enabled, will be bypassed and the specified file(s) deleted immediately. This can be
useful when it is necessary to delete files from an over-quota directory.<br/>
Example:
</p>
<ul>
<li>
<code> hdfs dfs -rmr /user/hadoop/dir </code>
</li>
<li>
<code> hdfs dfs -rmr hdfs://nn.example.com/user/hadoop/dir </code>
</li>
</ul>
<p>Exit Code:</p>
<p>
<code> Returns 0 on success and -1 on error. </code>
</p>
</section>
<!-- SETREP -->
<section>
<title> setrep </title>
<p>
<code>Usage: hdfs dfs -setrep [-R] &lt;path&gt;</code>
</p>
<p>
Changes the replication factor of a file. -R option is for recursively increasing the replication factor of files within a directory.
</p>
<p>Example:</p>
<ul>
<li>
<code> hdfs dfs -setrep -w 3 -R /user/hadoop/dir1 </code>
</li>
</ul>
<p>Exit Code:</p>
<p>
<code>Returns 0 on success and -1 on error. </code>
</p>
</section>
<!-- STAT -->
<section>
<title> stat </title>
<p>
<code>Usage: hdfs dfs -stat [format] URI [URI &#x2026;]</code>
</p>
<p>Print statistics about the file/directory matching the given URI pattern in the specified format.</p>
<p>Format accepts:</p>
<ul>
<li>filesize in blocks (%b)</li>
<li>filename (%n)</li>
<li>block size (%o)</li>
<li>replication (%r)</li>
<li>modification date, formatted as Y-M-D H:M:S (%y)</li>
<li>modification date, in epoch seconds (%Y)</li>
</ul>
<p>Example:</p>
<ul>
<li>
<code> hdfs dfs -stat path </code>
</li>
<li>
<code> hdfs dfs -stat %y path </code>
</li>
<li>
<code> hdfs dfs -stat '%b %r' path </code>
</li>
</ul>
<p>Exit Code:<br/>
<code> Returns 0 on success and -1 on error.</code></p>
</section>
<!-- TAIL-->
<section>
<title> tail </title>
<p>
<code>Usage: hdfs dfs -tail [-f] URI </code>
</p>
<p>
Displays last kilobyte of the file to stdout. -f option can be used as in Unix.
</p>
<p>Example:</p>
<ul>
<li>
<code> hdfs dfs -tail pathname </code>
</li>
</ul>
<p>Exit Code: <br/>
<code> Returns 0 on success and -1 on error.</code></p>
</section>
<!-- TEST -->
<section>
<title> test </title>
<p>
<code>Usage: hdfs dfs -test -[ezd] URI</code>
</p>
<p>
Options: <br/>
-e check to see if the file exists. Return 0 if true. <br/>
-z check to see if the file is zero length. Return 0 if true. <br/>
-d check to see if the path is directory. Return 0 if true. <br/></p>
<p>Example:</p>
<ul>
<li>
<code> hdfs dfs -test -e filename </code>
</li>
</ul>
</section>
<!-- TEXT -->
<section>
<title> text </title>
<p>
<code>Usage: hdfs dfs -text &lt;src&gt;</code>
<br/>
</p>
<p>
Takes a source file and outputs the file in text format. The allowed formats are zip and TextRecordInputStream.
</p>
</section>
<!-- TOUCHZ -->
<section>
<title> touchz </title>
<p>
<code>Usage: hdfs dfs -touchz URI [URI &#x2026;]</code>
<br/>
</p>
<p>
Create a file of zero length.
</p>
<p>Example:</p>
<ul>
<li>
<code> hadoop -touchz pathname </code>
</li>
</ul>
<p>Exit Code:<br/>
<code> Returns 0 on success and -1 on error.</code></p>
</section>
</section>
</body>
</document>

View File

@ -85,14 +85,20 @@ public abstract class AbstractFileSystem {
}
/**
* Prohibits names which contain a ".", "..", ":" or "/"
* Returns true if the specified string is considered valid in the path part
* of a URI by this file system. The default implementation enforces the rules
* of HDFS, but subclasses may override this method to implement specific
* validation rules for specific file systems.
*
* @param src String source filename to check, path part of the URI
* @return boolean true if the specified string is considered valid
*/
private static boolean isValidName(String src) {
// Check for ".." "." ":" "/"
public boolean isValidName(String src) {
// Prohibit ".." "." and anything containing ":"
StringTokenizer tokens = new StringTokenizer(src, Path.SEPARATOR);
while(tokens.hasMoreTokens()) {
String element = tokens.nextToken();
if (element.equals("target/generated-sources") ||
if (element.equals("..") ||
element.equals(".") ||
(element.indexOf(":") >= 0)) {
return false;

View File

@ -136,7 +136,7 @@ public class DU extends Shell {
}
}
return used.longValue();
return Math.max(used.longValue(), 0L);
}
/**

View File

@ -18,6 +18,8 @@
package org.apache.hadoop.fs;
import com.google.common.annotations.VisibleForTesting;
import java.io.IOException;
import java.lang.ref.WeakReference;
import java.util.concurrent.DelayQueue;
@ -147,6 +149,12 @@ public class DelegationTokenRenewer
/** Queue to maintain the RenewActions to be processed by the {@link #run()} */
private volatile DelayQueue<RenewAction<?>> queue = new DelayQueue<RenewAction<?>>();
/** For testing purposes */
@VisibleForTesting
protected int getRenewQueueLength() {
return queue.size();
}
/**
* Create the singleton instance. However, the thread can be started lazily in
* {@link #addRenewAction(FileSystem)}

View File

@ -349,9 +349,15 @@ public class FileStatus implements Writable, Comparable {
sb.append("; replication=" + block_replication);
sb.append("; blocksize=" + blocksize);
}
sb.append("; modification_time=" + modification_time);
sb.append("; access_time=" + access_time);
sb.append("; owner=" + owner);
sb.append("; group=" + group);
sb.append("; permission=" + permission);
sb.append("; isSymlink=" + isSymlink());
if(isSymlink()) {
sb.append("; symlink=" + symlink);
}
sb.append("}");
return sb.toString();
}

View File

@ -167,6 +167,18 @@ public class FilterFileSystem extends FileSystem {
overwrite, bufferSize, replication, blockSize, progress);
}
@Override
@Deprecated
public FSDataOutputStream createNonRecursive(Path f, FsPermission permission,
EnumSet<CreateFlag> flags, int bufferSize, short replication, long blockSize,
Progressable progress) throws IOException {
return fs.createNonRecursive(f, permission, flags, bufferSize, replication, blockSize,
progress);
}
/**
* Set replication for an existing file.
*

View File

@ -278,4 +278,9 @@ public abstract class FilterFs extends AbstractFileSystem {
public List<Token<?>> getDelegationTokens(String renewer) throws IOException {
return myFs.getDelegationTokens(renewer);
}
@Override
public boolean isValidName(String src) {
return myFs.isValidName(src);
}
}

View File

@ -56,6 +56,12 @@ public class FsUrlStreamHandlerFactory implements
public FsUrlStreamHandlerFactory(Configuration conf) {
this.conf = new Configuration(conf);
// force init of FileSystem code to avoid HADOOP-9041
try {
FileSystem.getFileSystemClass("file", conf);
} catch (IOException io) {
throw new RuntimeException(io);
}
this.handler = new FsUrlStreamHandler(this.conf);
}

View File

@ -30,6 +30,7 @@ import java.io.FileDescriptor;
import java.net.URI;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.EnumSet;
import java.util.StringTokenizer;
import org.apache.hadoop.classification.InterfaceAudience;
@ -282,6 +283,18 @@ public class RawLocalFileSystem extends FileSystem {
new LocalFSFileOutputStream(f, false), bufferSize), statistics);
}
@Override
@Deprecated
public FSDataOutputStream createNonRecursive(Path f, FsPermission permission,
EnumSet<CreateFlag> flags, int bufferSize, short replication, long blockSize,
Progressable progress) throws IOException {
if (exists(f) && !flags.contains(CreateFlag.OVERWRITE)) {
throw new IOException("File already exists: "+f);
}
return new FSDataOutputStream(new BufferedOutputStream(
new LocalFSFileOutputStream(f, false), bufferSize), statistics);
}
@Override
public FSDataOutputStream create(Path f, FsPermission permission,
boolean overwrite, int bufferSize, short replication, long blockSize,

View File

@ -159,6 +159,14 @@ public class RawLocalFs extends DelegateToFileSystem {
}
}
@Override
public boolean isValidName(String src) {
// Different local file systems have different validation rules. Skip
// validation here and just let the OS handle it. This is consistent with
// RawLocalFileSystem.
return true;
}
@Override
public Path getLinkTarget(Path f) throws IOException {
/* We should never get here. Valid local links are resolved transparently

View File

@ -311,6 +311,7 @@ abstract public class Command extends Configured {
if (recursive && item.stat.isDirectory()) {
recursePath(item);
}
postProcessPath(item);
} catch (IOException e) {
displayError(e);
}
@ -329,6 +330,15 @@ abstract public class Command extends Configured {
throw new RuntimeException("processPath() is not implemented");
}
/**
* Hook for commands to implement an operation to be applied on each
* path for the command after being processed successfully
* @param item a {@link PathData} object
* @throws IOException if anything goes wrong...
*/
protected void postProcessPath(PathData item) throws IOException {
}
/**
* Gets the directory listing for a path and invokes
* {@link #processPaths(PathData, PathData...)}

View File

@ -24,6 +24,7 @@ import java.util.LinkedList;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.fs.PathIOException;
import org.apache.hadoop.fs.PathExistsException;
import org.apache.hadoop.fs.shell.CopyCommands.CopyFromLocal;
/** Various commands for moving files */
@ -49,7 +50,21 @@ class MoveCommands {
@Override
protected void processPath(PathData src, PathData target) throws IOException {
target.fs.moveFromLocalFile(src.path, target.path);
// unlike copy, don't merge existing dirs during move
if (target.exists && target.stat.isDirectory()) {
throw new PathExistsException(target.toString());
}
super.processPath(src, target);
}
@Override
protected void postProcessPath(PathData src) throws IOException {
if (!src.fs.delete(src.path, false)) {
// we have no way to know the actual error...
PathIOException e = new PathIOException(src.toString());
e.setOperation("remove");
throw e;
}
}
}

View File

@ -19,11 +19,14 @@ package org.apache.hadoop.fs.viewfs;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.URI;
import java.util.EnumSet;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.fs.CreateFlag;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileChecksum;
@ -172,6 +175,16 @@ class ChRootedFileSystem extends FilterFileSystem {
replication, blockSize, progress);
}
@Override
@Deprecated
public FSDataOutputStream createNonRecursive(Path f, FsPermission permission,
EnumSet<CreateFlag> flags, int bufferSize, short replication, long blockSize,
Progressable progress) throws IOException {
return super.createNonRecursive(fullPath(f), permission, flags, bufferSize, replication, blockSize,
progress);
}
@Override
public boolean delete(final Path f, final boolean recursive)
throws IOException {

View File

@ -84,6 +84,11 @@ class ChRootedFs extends AbstractFileSystem {
+ path.toUri().getPath());
}
@Override
public boolean isValidName(String src) {
return myFs.isValidName(fullPath(new Path(src)).toUri().toString());
}
public ChRootedFs(final AbstractFileSystem fs, final Path theRoot)
throws URISyntaxException {
super(fs.getUri(), fs.getUri().getScheme(),
@ -103,7 +108,7 @@ class ChRootedFs extends AbstractFileSystem {
// scheme:/// and scheme://authority/
myUri = new URI(myFs.getUri().toString() +
(myFs.getUri().getAuthority() == null ? "" : Path.SEPARATOR) +
chRootPathPart.toString().substring(1));
chRootPathPart.toUri().getPath().substring(1));
super.checkPath(theRoot);
}

View File

@ -24,6 +24,7 @@ import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Arrays;
import java.util.EnumSet;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
@ -35,6 +36,7 @@ import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.fs.CreateFlag;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileAlreadyExistsException;
@ -62,6 +64,9 @@ import org.apache.hadoop.util.Time;
@InterfaceAudience.Public
@InterfaceStability.Evolving /*Evolving for a release,to be changed to Stable */
public class ViewFileSystem extends FileSystem {
private static final Path ROOT_PATH = new Path(Path.SEPARATOR);
static AccessControlException readOnlyMountTable(final String operation,
final String p) {
return new AccessControlException(
@ -96,23 +101,6 @@ public class ViewFileSystem extends FileSystem {
InodeTree<FileSystem> fsState; // the fs state; ie the mount table
Path homeDir = null;
/**
* Prohibits names which contain a ".", "..", ":" or "/"
*/
private static boolean isValidName(final String src) {
// Check for ".." "." ":" "/"
final StringTokenizer tokens = new StringTokenizer(src, Path.SEPARATOR);
while(tokens.hasMoreTokens()) {
String element = tokens.nextToken();
if (element.equals("..") ||
element.equals(".") ||
(element.indexOf(":") >= 0)) {
return false;
}
}
return true;
}
/**
* Make the path Absolute and get the path-part of a pathname.
* Checks that URI matches this file system
@ -124,10 +112,6 @@ public class ViewFileSystem extends FileSystem {
private String getUriPath(final Path p) {
checkPath(p);
String s = makeAbsolute(p).toUri().getPath();
if (!isValidName(s)) {
throw new InvalidPathException("Path part " + s + " from URI" + p
+ " is not a valid filename.");
}
return s;
}
@ -282,6 +266,21 @@ public class ViewFileSystem extends FileSystem {
return res.targetFileSystem.append(res.remainingPath, bufferSize, progress);
}
@Override
public FSDataOutputStream createNonRecursive(Path f, FsPermission permission,
EnumSet<CreateFlag> flags, int bufferSize, short replication, long blockSize,
Progressable progress) throws IOException {
InodeTree.ResolveResult<FileSystem> res;
try {
res = fsState.resolve(getUriPath(f), false);
} catch (FileNotFoundException e) {
throw readOnlyMountTable("create", f);
}
assert(res.remainingPath != null);
return res.targetFileSystem.createNonRecursive(res.remainingPath, permission,
flags, bufferSize, replication, blockSize, progress);
}
@Override
public FSDataOutputStream create(final Path f, final FsPermission permission,
final boolean overwrite, final int bufferSize, final short replication,
@ -672,7 +671,7 @@ public class ViewFileSystem extends FileSystem {
PERMISSION_RRR, ugi.getUserName(), ugi.getGroupNames()[0],
new Path(theInternalDir.fullPath).makeQualified(
myUri, null));
myUri, ROOT_PATH));
}

View File

@ -597,6 +597,12 @@ public class ViewFs extends AbstractFileSystem {
return result;
}
@Override
public boolean isValidName(String src) {
// Prefix validated at mount time and rest of path validated by mount target.
return true;
}
/*

View File

@ -21,6 +21,8 @@ package org.apache.hadoop.ha;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
@ -45,6 +47,7 @@ import org.apache.zookeeper.KeeperException.Code;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
/**
*
@ -205,7 +208,7 @@ public class ActiveStandbyElector implements StatCallback, StringCallback {
int zookeeperSessionTimeout, String parentZnodeName, List<ACL> acl,
List<ZKAuthInfo> authInfo,
ActiveStandbyElectorCallback app) throws IOException,
HadoopIllegalArgumentException {
HadoopIllegalArgumentException, KeeperException {
if (app == null || acl == null || parentZnodeName == null
|| zookeeperHostPorts == null || zookeeperSessionTimeout <= 0) {
throw new HadoopIllegalArgumentException("Invalid argument");
@ -602,10 +605,24 @@ public class ActiveStandbyElector implements StatCallback, StringCallback {
*
* @return new zookeeper client instance
* @throws IOException
* @throws KeeperException zookeeper connectionloss exception
*/
protected synchronized ZooKeeper getNewZooKeeper() throws IOException {
ZooKeeper zk = new ZooKeeper(zkHostPort, zkSessionTimeout, null);
zk.register(new WatcherWithClientRef(zk));
protected synchronized ZooKeeper getNewZooKeeper() throws IOException,
KeeperException {
// Unfortunately, the ZooKeeper constructor connects to ZooKeeper and
// may trigger the Connected event immediately. So, if we register the
// watcher after constructing ZooKeeper, we may miss that event. Instead,
// we construct the watcher first, and have it queue any events it receives
// before we can set its ZooKeeper reference.
WatcherWithClientRef watcher = new WatcherWithClientRef();
ZooKeeper zk = new ZooKeeper(zkHostPort, zkSessionTimeout, watcher);
watcher.setZooKeeperRef(zk);
// Wait for the asynchronous success/failure. This may throw an exception
// if we don't connect within the session timeout.
watcher.waitForZKConnectionEvent(zkSessionTimeout);
for (ZKAuthInfo auth : zkAuthInfo) {
zk.addAuthInfo(auth.getScheme(), auth.getAuth());
}
@ -710,13 +727,16 @@ public class ActiveStandbyElector implements StatCallback, StringCallback {
} catch(IOException e) {
LOG.warn(e);
sleepFor(5000);
} catch(KeeperException e) {
LOG.warn(e);
sleepFor(5000);
}
++connectionRetryCount;
}
return success;
}
private void createConnection() throws IOException {
private void createConnection() throws IOException, KeeperException {
if (zkClient != null) {
try {
zkClient.close();
@ -973,14 +993,76 @@ public class ActiveStandbyElector implements StatCallback, StringCallback {
* events.
*/
private final class WatcherWithClientRef implements Watcher {
private final ZooKeeper zk;
private ZooKeeper zk;
/**
* Latch fired whenever any event arrives. This is used in order
* to wait for the Connected event when the client is first created.
*/
private CountDownLatch hasReceivedEvent = new CountDownLatch(1);
/**
* If any events arrive before the reference to ZooKeeper is set,
* they get queued up and later forwarded when the reference is
* available.
*/
private final List<WatchedEvent> queuedEvents = Lists.newLinkedList();
private WatcherWithClientRef() {
}
private WatcherWithClientRef(ZooKeeper zk) {
this.zk = zk;
}
/**
* Waits for the next event from ZooKeeper to arrive.
*
* @param connectionTimeoutMs zookeeper connection timeout in milliseconds
* @throws KeeperException if the connection attempt times out. This will
* be a ZooKeeper ConnectionLoss exception code.
* @throws IOException if interrupted while connecting to ZooKeeper
*/
private void waitForZKConnectionEvent(int connectionTimeoutMs)
throws KeeperException, IOException {
try {
if (!hasReceivedEvent.await(connectionTimeoutMs, TimeUnit.MILLISECONDS)) {
LOG.error("Connection timed out: couldn't connect to ZooKeeper in "
+ connectionTimeoutMs + " milliseconds");
synchronized (this) {
zk.close();
}
throw KeeperException.create(Code.CONNECTIONLOSS);
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new IOException(
"Interrupted when connecting to zookeeper server", e);
}
}
private synchronized void setZooKeeperRef(ZooKeeper zk) {
Preconditions.checkState(this.zk == null,
"zk already set -- must be set exactly once");
this.zk = zk;
for (WatchedEvent e : queuedEvents) {
forwardEvent(e);
}
queuedEvents.clear();
}
@Override
public void process(WatchedEvent event) {
public synchronized void process(WatchedEvent event) {
if (zk != null) {
forwardEvent(event);
} else {
queuedEvents.add(event);
}
}
private void forwardEvent(WatchedEvent event) {
hasReceivedEvent.countDown();
try {
ActiveStandbyElector.this.processWatchEvent(
zk, event);
@ -1024,5 +1106,4 @@ public class ActiveStandbyElector implements StatCallback, StringCallback {
((appData == null) ? "null" : StringUtils.byteToHexString(appData)) +
" cb=" + appClient;
}
}

View File

@ -180,7 +180,15 @@ public abstract class ZKFailoverController {
private int doRun(String[] args)
throws HadoopIllegalArgumentException, IOException, InterruptedException {
try {
initZK();
} catch (KeeperException ke) {
LOG.fatal("Unable to start failover controller. Unable to connect "
+ "to ZooKeeper quorum at " + zkQuorum + ". Please check the "
+ "configured value for " + ZK_QUORUM_KEY + " and ensure that "
+ "ZooKeeper is running.");
return ERR_CODE_NO_ZK;
}
if (args.length > 0) {
if ("-formatZK".equals(args[0])) {
boolean force = false;
@ -200,24 +208,12 @@ public abstract class ZKFailoverController {
}
}
try {
if (!elector.parentZNodeExists()) {
LOG.fatal("Unable to start failover controller. " +
"Parent znode does not exist.\n" +
"Run with -formatZK flag to initialize ZooKeeper.");
LOG.fatal("Unable to start failover controller. "
+ "Parent znode does not exist.\n"
+ "Run with -formatZK flag to initialize ZooKeeper.");
return ERR_CODE_NO_PARENT_ZNODE;
}
} catch (IOException ioe) {
if (ioe.getCause() instanceof KeeperException.ConnectionLossException) {
LOG.fatal("Unable to start failover controller. Unable to connect " +
"to ZooKeeper quorum at " + zkQuorum + ". Please check the " +
"configured value for " + ZK_QUORUM_KEY + " and ensure that " +
"ZooKeeper is running.");
return ERR_CODE_NO_ZK;
} else {
throw ioe;
}
}
try {
localTarget.checkFencingConfigured();
@ -310,7 +306,8 @@ public abstract class ZKFailoverController {
}
private void initZK() throws HadoopIllegalArgumentException, IOException {
private void initZK() throws HadoopIllegalArgumentException, IOException,
KeeperException {
zkQuorum = conf.get(ZK_QUORUM_KEY);
int zkTimeout = conf.getInt(ZK_SESSION_TIMEOUT_KEY,
ZK_SESSION_TIMEOUT_DEFAULT);

View File

@ -38,6 +38,11 @@ import java.util.Iterator;
import java.util.Map.Entry;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.RejectedExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
@ -58,11 +63,10 @@ import org.apache.hadoop.io.WritableUtils;
import org.apache.hadoop.io.retry.RetryPolicies;
import org.apache.hadoop.io.retry.RetryPolicy;
import org.apache.hadoop.io.retry.RetryPolicy.RetryAction;
import org.apache.hadoop.ipc.protobuf.IpcConnectionContextProtos.IpcConnectionContextProto;
import org.apache.hadoop.ipc.protobuf.RpcPayloadHeaderProtos.RpcPayloadHeaderProto;
import org.apache.hadoop.ipc.protobuf.RpcPayloadHeaderProtos.RpcPayloadOperationProto;
import org.apache.hadoop.ipc.protobuf.RpcPayloadHeaderProtos.RpcResponseHeaderProto;
import org.apache.hadoop.ipc.protobuf.RpcPayloadHeaderProtos.RpcStatusProto;
import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos.RpcRequestHeaderProto;
import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos.RpcRequestHeaderProto.OperationProto;
import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos.RpcResponseHeaderProto;
import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos.RpcResponseHeaderProto.RpcStatusProto;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.security.KerberosInfo;
import org.apache.hadoop.security.SaslRpcClient;
@ -78,6 +82,8 @@ import org.apache.hadoop.util.ProtoUtil;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.util.Time;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
/** A client for an IPC service. IPC calls take a single {@link Writable} as a
* parameter, and return a {@link Writable} as their value. A service runs on
* a port and is defined by a parameter class and a value class.
@ -103,6 +109,19 @@ public class Client {
final static int PING_CALL_ID = -1;
/**
* Executor on which IPC calls' parameters are sent. Deferring
* the sending of parameters to a separate thread isolates them
* from thread interruptions in the calling code.
*/
private static final ExecutorService SEND_PARAMS_EXECUTOR =
Executors.newCachedThreadPool(
new ThreadFactoryBuilder()
.setDaemon(true)
.setNameFormat("IPC Parameter Sending Thread #%d")
.build());
/**
* set the ping interval value in configuration
*
@ -171,7 +190,7 @@ public class Client {
*/
private class Call {
final int id; // call id
final Writable rpcRequest; // the serialized rpc request - RpcPayload
final Writable rpcRequest; // the serialized rpc request
Writable rpcResponse; // null if rpc has error
IOException error; // exception, null if success
final RPC.RpcKind rpcKind; // Rpc EngineKind
@ -246,6 +265,8 @@ public class Client {
private AtomicBoolean shouldCloseConnection = new AtomicBoolean(); // indicate if the connection is closed
private IOException closeException; // close reason
private final Object sendRpcRequestLock = new Object();
public Connection(ConnectionId remoteId) throws IOException {
this.remoteId = remoteId;
this.server = remoteId.getAddress();
@ -746,7 +767,7 @@ public class Client {
remoteId.getTicket(),
authMethod).writeTo(buf);
// Write out the payload length
// Write out the packet length
int bufLen = buf.getLength();
out.writeInt(bufLen);
@ -810,7 +831,7 @@ public class Client {
try {
while (waitForWork()) {//wait here for work - read or close connection
receiveResponse();
receiveRpcResponse();
}
} catch (Throwable t) {
// This truly is unexpected, since we catch IOException in receiveResponse
@ -827,40 +848,57 @@ public class Client {
+ connections.size());
}
/** Initiates a call by sending the parameter to the remote server.
/** Initiates a rpc call by sending the rpc request to the remote server.
* Note: this is not called from the Connection thread, but by other
* threads.
* @param call - the rpc request
*/
public void sendParam(Call call) {
public void sendRpcRequest(final Call call)
throws InterruptedException, IOException {
if (shouldCloseConnection.get()) {
return;
}
DataOutputBuffer d=null;
// Serialize the call to be sent. This is done from the actual
// caller thread, rather than the SEND_PARAMS_EXECUTOR thread,
// so that if the serialization throws an error, it is reported
// properly. This also parallelizes the serialization.
//
// Format of a call on the wire:
// 0) Length of rest below (1 + 2)
// 1) RpcRequestHeader - is serialized Delimited hence contains length
// 2) RpcRequest
//
// Items '1' and '2' are prepared here.
final DataOutputBuffer d = new DataOutputBuffer();
RpcRequestHeaderProto header = ProtoUtil.makeRpcRequestHeader(
call.rpcKind, OperationProto.RPC_FINAL_PACKET, call.id);
header.writeDelimitedTo(d);
call.rpcRequest.write(d);
synchronized (sendRpcRequestLock) {
Future<?> senderFuture = SEND_PARAMS_EXECUTOR.submit(new Runnable() {
@Override
public void run() {
try {
synchronized (this.out) {
synchronized (Connection.this.out) {
if (shouldCloseConnection.get()) {
return;
}
if (LOG.isDebugEnabled())
LOG.debug(getName() + " sending #" + call.id);
// Serializing the data to be written.
// Format:
// 0) Length of rest below (1 + 2)
// 1) PayloadHeader - is serialized Delimited hence contains length
// 2) the Payload - the RpcRequest
//
d = new DataOutputBuffer();
RpcPayloadHeaderProto header = ProtoUtil.makeRpcPayloadHeader(
call.rpcKind, RpcPayloadOperationProto.RPC_FINAL_PAYLOAD, call.id);
header.writeDelimitedTo(d);
call.rpcRequest.write(d);
byte[] data = d.getData();
int totalLength = d.getLength();
out.writeInt(totalLength); // Total Length
out.write(data, 0, totalLength);//PayloadHeader + RpcRequest
out.write(data, 0, totalLength);// RpcRequestHeader + RpcRequest
out.flush();
}
} catch(IOException e) {
} catch (IOException e) {
// exception at this point would leave the connection in an
// unrecoverable state (eg half a call left on the wire).
// So, close the connection, killing any outstanding calls
markClosed(e);
} finally {
//the buffer is just an in-memory buffer, but it is still polite to
@ -868,11 +906,28 @@ public class Client {
IOUtils.closeStream(d);
}
}
});
try {
senderFuture.get();
} catch (ExecutionException e) {
Throwable cause = e.getCause();
// cause should only be a RuntimeException as the Runnable above
// catches IOException
if (cause instanceof RuntimeException) {
throw (RuntimeException) cause;
} else {
throw new RuntimeException("unexpected checked exception", cause);
}
}
}
}
/* Receive a response.
* Because only one receiver, so no synchronization on in.
*/
private void receiveResponse() {
private void receiveRpcResponse() {
if (shouldCloseConnection.get()) {
return;
}
@ -1138,7 +1193,16 @@ public class Client {
ConnectionId remoteId) throws InterruptedException, IOException {
Call call = new Call(rpcKind, rpcRequest);
Connection connection = getConnection(remoteId, call);
connection.sendParam(call); // send the parameter
try {
connection.sendRpcRequest(call); // send the rpc request
} catch (RejectedExecutionException e) {
throw new IOException("connection has been closed", e);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
LOG.warn("interrupted waiting to send rpc request to server", e);
throw new IOException(e);
}
boolean interrupted = false;
synchronized (call) {
while (!call.done) {

View File

@ -39,7 +39,7 @@ import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.retry.RetryPolicy;
import org.apache.hadoop.ipc.Client.ConnectionId;
import org.apache.hadoop.ipc.RPC.RpcInvoker;
import org.apache.hadoop.ipc.protobuf.HadoopRpcProtos.HadoopRpcRequestProto;
import org.apache.hadoop.ipc.protobuf.ProtobufRpcEngineProtos.RequestProto;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.token.SecretManager;
import org.apache.hadoop.security.token.TokenIdentifier;
@ -128,10 +128,10 @@ public class ProtobufRpcEngine implements RpcEngine {
.getProtocolVersion(protocol);
}
private HadoopRpcRequestProto constructRpcRequest(Method method,
private RequestProto constructRpcRequest(Method method,
Object[] params) throws ServiceException {
HadoopRpcRequestProto rpcRequest;
HadoopRpcRequestProto.Builder builder = HadoopRpcRequestProto
RequestProto rpcRequest;
RequestProto.Builder builder = RequestProto
.newBuilder();
builder.setMethodName(method.getName());
@ -190,7 +190,7 @@ public class ProtobufRpcEngine implements RpcEngine {
startTime = Time.now();
}
HadoopRpcRequestProto rpcRequest = constructRpcRequest(method, args);
RequestProto rpcRequest = constructRpcRequest(method, args);
RpcResponseWritable val = null;
if (LOG.isTraceEnabled()) {
@ -271,13 +271,13 @@ public class ProtobufRpcEngine implements RpcEngine {
* Writable Wrapper for Protocol Buffer Requests
*/
private static class RpcRequestWritable implements Writable {
HadoopRpcRequestProto message;
RequestProto message;
@SuppressWarnings("unused")
public RpcRequestWritable() {
}
RpcRequestWritable(HadoopRpcRequestProto message) {
RpcRequestWritable(RequestProto message) {
this.message = message;
}
@ -292,7 +292,7 @@ public class ProtobufRpcEngine implements RpcEngine {
int length = ProtoUtil.readRawVarint32(in);
byte[] bytes = new byte[length];
in.readFully(bytes);
message = HadoopRpcRequestProto.parseFrom(bytes);
message = RequestProto.parseFrom(bytes);
}
@Override
@ -426,7 +426,7 @@ public class ProtobufRpcEngine implements RpcEngine {
public Writable call(RPC.Server server, String connectionProtocolName,
Writable writableRequest, long receiveTime) throws Exception {
RpcRequestWritable request = (RpcRequestWritable) writableRequest;
HadoopRpcRequestProto rpcRequest = request.message;
RequestProto rpcRequest = request.message;
String methodName = rpcRequest.getMethodName();

View File

@ -80,7 +80,8 @@ import org.apache.hadoop.ipc.RPC.VersionMismatch;
import org.apache.hadoop.ipc.metrics.RpcDetailedMetrics;
import org.apache.hadoop.ipc.metrics.RpcMetrics;
import org.apache.hadoop.ipc.protobuf.IpcConnectionContextProtos.IpcConnectionContextProto;
import org.apache.hadoop.ipc.protobuf.RpcPayloadHeaderProtos.*;
import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos.RpcResponseHeaderProto.RpcStatusProto;
import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos.*;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.security.AccessControlException;
import org.apache.hadoop.security.SaslRpcServer;
@ -160,7 +161,7 @@ public abstract class Server {
public static final ByteBuffer HEADER = ByteBuffer.wrap("hrpc".getBytes());
/**
* Serialization type for ConnectionContext and RpcPayloadHeader
* Serialization type for ConnectionContext and RpcRequestHeader
*/
public enum IpcSerializationType {
// Add new serialization type to the end without affecting the enum order
@ -197,7 +198,7 @@ public abstract class Server {
// 4 : Introduced SASL security layer
// 5 : Introduced use of {@link ArrayPrimitiveWritable$Internal}
// in ObjectWritable to efficiently transmit arrays of primitives
// 6 : Made RPC payload header explicit
// 6 : Made RPC Request header explicit
// 7 : Changed Ipc Connection Header to use Protocol buffers
// 8 : SASL server always sends a final response
public static final byte CURRENT_VERSION = 8;
@ -1637,14 +1638,15 @@ public abstract class Server {
private void processData(byte[] buf) throws IOException, InterruptedException {
DataInputStream dis =
new DataInputStream(new ByteArrayInputStream(buf));
RpcPayloadHeaderProto header = RpcPayloadHeaderProto.parseDelimitedFrom(dis);
RpcRequestHeaderProto header = RpcRequestHeaderProto.parseDelimitedFrom(dis);
if (LOG.isDebugEnabled())
LOG.debug(" got #" + header.getCallId());
if (!header.hasRpcOp()) {
throw new IOException(" IPC Server: No rpc op in rpcPayloadHeader");
throw new IOException(" IPC Server: No rpc op in rpcRequestHeader");
}
if (header.getRpcOp() != RpcPayloadOperationProto.RPC_FINAL_PAYLOAD) {
if (header.getRpcOp() !=
RpcRequestHeaderProto.OperationProto.RPC_FINAL_PACKET) {
throw new IOException("IPC Server does not implement operation" +
header.getRpcOp());
}
@ -1652,7 +1654,7 @@ public abstract class Server {
// (Note it would make more sense to have the handler deserialize but
// we continue with this original design.
if (!header.hasRpcKind()) {
throw new IOException(" IPC Server: No rpc kind in rpcPayloadHeader");
throw new IOException(" IPC Server: No rpc kind in rpcRequestHeader");
}
Class<? extends Writable> rpcRequestClass =
getRpcRequestWrapper(header.getRpcKind());

View File

@ -37,7 +37,7 @@ public class JniBasedUnixGroupsMappingWithFallback implements
if (NativeCodeLoader.isNativeCodeLoaded()) {
this.impl = new JniBasedUnixGroupsMapping();
} else {
LOG.info("Falling back to shell based");
LOG.debug("Falling back to shell based");
this.impl = new ShellBasedUnixGroupsMapping();
}
if (LOG.isDebugEnabled()){

View File

@ -81,6 +81,7 @@ public class UserGroupInformation {
*/
private static final float TICKET_RENEW_WINDOW = 0.80f;
static final String HADOOP_USER_NAME = "HADOOP_USER_NAME";
static final String HADOOP_PROXY_USER = "HADOOP_PROXY_USER";
/**
* UgiMetrics maintains UGI activity statistics
@ -641,10 +642,18 @@ public class UserGroupInformation {
newLoginContext(authenticationMethod.getLoginAppName(),
subject, new HadoopConfiguration());
login.login();
loginUser = new UserGroupInformation(subject);
loginUser.setLogin(login);
loginUser.setAuthenticationMethod(authenticationMethod);
loginUser = new UserGroupInformation(login.getSubject());
UserGroupInformation realUser = new UserGroupInformation(subject);
realUser.setLogin(login);
realUser.setAuthenticationMethod(authenticationMethod);
realUser = new UserGroupInformation(login.getSubject());
// If the HADOOP_PROXY_USER environment variable or property
// is specified, create a proxy user as the logged in user.
String proxyUser = System.getenv(HADOOP_PROXY_USER);
if (proxyUser == null) {
proxyUser = System.getProperty(HADOOP_PROXY_USER);
}
loginUser = proxyUser == null ? realUser : createProxyUser(proxyUser, realUser);
String fileLocation = System.getenv(HADOOP_TOKEN_FILE_LOCATION);
if (fileLocation != null) {
// load the token storage file and put all of the tokens into the

View File

@ -0,0 +1,75 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.util;
import org.apache.hadoop.util.NativeCodeLoader;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.compress.SnappyCodec;
import org.apache.hadoop.io.compress.zlib.ZlibFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
@InterfaceAudience.Private
@InterfaceStability.Unstable
public class NativeLibraryChecker {
/**
* A tool to test native library availability,
*/
public static void main(String[] args) {
String usage = "NativeLibraryChecker [-a|-h]\n"
+ " -a use -a to check all libraries are available\n"
+ " by default just check hadoop library is available\n"
+ " exit with error code if check failed\n"
+ " -h print this message\n";
if (args.length > 1 ||
(args.length == 1 &&
!(args[0].equals("-a") || args[0].equals("-h")))) {
System.err.println(usage);
ExitUtil.terminate(1);
}
boolean checkAll = false;
if (args.length == 1) {
if (args[0].equals("-h")) {
System.out.println(usage);
return;
}
checkAll = true;
}
boolean nativeHadoopLoaded = NativeCodeLoader.isNativeCodeLoaded();
boolean zlibLoaded = false;
boolean snappyLoaded = false;
// lz4 is linked within libhadoop
boolean lz4Loaded = nativeHadoopLoaded;
if (nativeHadoopLoaded) {
zlibLoaded = ZlibFactory.isNativeZlibLoaded(new Configuration());
snappyLoaded = NativeCodeLoader.buildSupportsSnappy() &&
SnappyCodec.isNativeCodeLoaded();
}
System.out.println("Native library checking:");
System.out.printf("hadoop: %b\n", nativeHadoopLoaded);
System.out.printf("zlib: %b\n", zlibLoaded);
System.out.printf("snappy: %b\n", snappyLoaded);
System.out.printf("lz4: %b\n", lz4Loaded);
if ((!nativeHadoopLoaded) ||
(checkAll && !(zlibLoaded && snappyLoaded && lz4Loaded))) {
// return 1 to indicated check failed
ExitUtil.terminate(1);
}
}
}

View File

@ -24,7 +24,7 @@ import java.io.IOException;
import org.apache.hadoop.ipc.RPC;
import org.apache.hadoop.ipc.protobuf.IpcConnectionContextProtos.IpcConnectionContextProto;
import org.apache.hadoop.ipc.protobuf.IpcConnectionContextProtos.UserInformationProto;
import org.apache.hadoop.ipc.protobuf.RpcPayloadHeaderProtos.*;
import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos.*;
import org.apache.hadoop.security.SaslRpcServer.AuthMethod;
import org.apache.hadoop.security.UserGroupInformation;
@ -157,9 +157,9 @@ public abstract class ProtoUtil {
return null;
}
public static RpcPayloadHeaderProto makeRpcPayloadHeader(RPC.RpcKind rpcKind,
RpcPayloadOperationProto operation, int callId) {
RpcPayloadHeaderProto.Builder result = RpcPayloadHeaderProto.newBuilder();
public static RpcRequestHeaderProto makeRpcRequestHeader(RPC.RpcKind rpcKind,
RpcRequestHeaderProto.OperationProto operation, int callId) {
RpcRequestHeaderProto.Builder result = RpcRequestHeaderProto.newBuilder();
result.setRpcKind(convert(rpcKind)).setRpcOp(operation).setCallId(callId);
return result.build();
}

View File

@ -17,11 +17,13 @@
*/
/**
* These are the messages used by Hadoop RPC to marshal the
* request and response in the RPC layer.
* These are the messages used by Hadoop RPC for the Rpc Engine Protocol Buffer
* to marshal the request and response in the RPC layer.
* The messages are sent in addition to the normal RPC header as
* defined in RpcHeader.proto
*/
option java_package = "org.apache.hadoop.ipc.protobuf";
option java_outer_classname = "HadoopRpcProtos";
option java_outer_classname = "ProtobufRpcEngineProtos";
option java_generate_equals_and_hash = true;
package hadoop.common;
@ -29,10 +31,11 @@ package hadoop.common;
* This message is used for Protobuf Rpc Engine.
* The message is used to marshal a Rpc-request
* from RPC client to the RPC server.
* The Response to the Rpc call (including errors) are handled
* as part of the standard Rpc response.
*
* No special header is needed for the Rpc Response for Protobuf Rpc Engine.
* The normal RPC response header (see RpcHeader.proto) are sufficient.
*/
message HadoopRpcRequestProto {
message RequestProto {
/** Name of the RPC method */
required string methodName = 1;

View File

@ -0,0 +1,92 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
option java_package = "org.apache.hadoop.ipc.protobuf";
option java_outer_classname = "RpcHeaderProtos";
option java_generate_equals_and_hash = true;
package hadoop.common;
/**
* This is the rpc request header. It is sent with every rpc call.
*
* The format of RPC call is as follows:
* +--------------------------------------------------------------+
* | Rpc length in bytes (4 bytes int) sum of next two parts |
* +--------------------------------------------------------------+
* | RpcRequestHeaderProto - serialized delimited ie has len |
* +--------------------------------------------------------------+
* | RpcRequest The actual rpc request |
* | This request is serialized based on RpcKindProto |
* +--------------------------------------------------------------+
*
*/
/**
* RpcKind determine the rpcEngine and the serialization of the rpc request
*/
enum RpcKindProto {
RPC_BUILTIN = 0; // Used for built in calls by tests
RPC_WRITABLE = 1; // Use WritableRpcEngine
RPC_PROTOCOL_BUFFER = 2; // Use ProtobufRpcEngine
}
message RpcRequestHeaderProto { // the header for the RpcRequest
enum OperationProto {
RPC_FINAL_PACKET = 0; // The final RPC Packet
RPC_CONTINUATION_PACKET = 1; // not implemented yet
RPC_CLOSE_CONNECTION = 2; // close the rpc connection
}
optional RpcKindProto rpcKind = 1;
optional OperationProto rpcOp = 2;
required uint32 callId = 3; // each rpc has a callId that is also used in response
}
/**
* Rpc Response Header
* ** If request is successfull response is returned as below ********
* +------------------------------------------------------------------+
* | Rpc reponse length in bytes (4 bytes int) |
* | (sum of next two parts) |
* +------------------------------------------------------------------+
* | RpcResponseHeaderProto - serialized delimited ie has len |
* +------------------------------------------------------------------+
* | if request is successful: |
* | - RpcResponse - The actual rpc response bytes |
* | This response is serialized based on RpcKindProto |
* | if request fails : |
* | - length (4 byte int) + Class name of exception - UTF-8 string |
* | - length (4 byte int) + Stacktrace - UTF-8 string |
* | if the strings are null then the length is -1 |
* +------------------------------------------------------------------+
*
*/
message RpcResponseHeaderProto {
enum RpcStatusProto {
SUCCESS = 0; // RPC succeeded
ERROR = 1; // RPC Failed
FATAL = 2; // Fatal error - connection is closed
}
required uint32 callId = 1; // callId used in Request
required RpcStatusProto status = 2;
optional uint32 serverIpcVersionNum = 3; // in case of an fatal IPC error
}

View File

@ -1,78 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
option java_package = "org.apache.hadoop.ipc.protobuf";
option java_outer_classname = "RpcPayloadHeaderProtos";
option java_generate_equals_and_hash = true;
package hadoop.common;
/**
* This is the rpc payload header. It is sent with every rpc call.
*
* The format of RPC call is as follows:
* +-----------------------------------------------------+
* | Rpc length in bytes |
* +-----------------------------------------------------+
* | RpcPayloadHeader - serialized delimited ie has len |
* +-----------------------------------------------------+
* | RpcRequest Payload |
* +-----------------------------------------------------+
*
*/
/**
* RpcKind determine the rpcEngine and the serialization of the rpc payload
*/
enum RpcKindProto {
RPC_BUILTIN = 0; // Used for built in calls by tests
RPC_WRITABLE = 1; // Use WritableRpcEngine
RPC_PROTOCOL_BUFFER = 2; // Use ProtobufRpcEngine
}
enum RpcPayloadOperationProto {
RPC_FINAL_PAYLOAD = 0; // The final payload
RPC_CONTINUATION_PAYLOAD = 1; // not implemented yet
RPC_CLOSE_CONNECTION = 2; // close the rpc connection
}
message RpcPayloadHeaderProto { // the header for the RpcRequest
optional RpcKindProto rpcKind = 1;
optional RpcPayloadOperationProto rpcOp = 2;
required uint32 callId = 3; // each rpc has a callId that is also used in response
}
enum RpcStatusProto {
SUCCESS = 0; // RPC succeeded
ERROR = 1; // RPC Failed
FATAL = 2; // Fatal error - connection is closed
}
/**
* Rpc Response Header
* - If successfull then the Respose follows after this header
* - length (4 byte int), followed by the response
* - If error or fatal - the exception info follow
* - length (4 byte int) Class name of exception - UTF-8 string
* - length (4 byte int) Stacktrace - UTF-8 string
* - if the strings are null then the length is -1
* In case of Fatal error then the respose contains the Serverside's IPC version
*/
message RpcResponseHeaderProto {
required uint32 callId = 1; // callId used in Request
required RpcStatusProto status = 2;
optional uint32 serverIpcVersionNum = 3; // in case of an fatal IPC error
}

View File

@ -1090,4 +1090,70 @@
</description>
</property>
<!-- ha properties -->
<property>
<name>ha.health-monitor.connect-retry-interval.ms</name>
<value>1000</value>
<description>
How often to retry connecting to the service.
</description>
</property>
<property>
<name>ha.health-monitor.check-interval.ms</name>
<value>1000</value>
<description>
How often to check the service.
</description>
</property>
<property>
<name>ha.health-monitor.sleep-after-disconnect.ms</name>
<value>1000</value>
<description>
How long to sleep after an unexpected RPC error.
</description>
</property>
<property>
<name>ha.health-monitor.rpc-timeout.ms</name>
<value>45000</value>
<description>
Timeout for the actual monitorHealth() calls.
</description>
</property>
<property>
<name>ha.failover-controller.new-active.rpc-timeout.ms</name>
<value>60000</value>
<description>
Timeout that the FC waits for the new active to become active
</description>
</property>
<property>
<name>ha.failover-controller.graceful-fence.rpc-timeout.ms</name>
<value>5000</value>
<description>
Timeout that the FC waits for the old active to go to standby
</description>
</property>
<property>
<name>ha.failover-controller.graceful-fence.connection.retries</name>
<value>1</value>
<description>
FC connection retries for graceful fencing
</description>
</property>
<property>
<name>ha.failover-controller.cli-check.rpc-timeout.ms</name>
<value>20000</value>
<description>
Timeout that the CLI (manual) FC waits for monitorHealth, getServiceState
</description>
</property>
</configuration>

View File

@ -16,12 +16,12 @@
---
${maven.build.timestamp}
Hadoop MapReduce Next Generation - Cluster Setup
\[ {{{./index.html}Go Back}} \]
\[ {{{../index.html}Go Back}} \]
%{toc|section=1|fromDepth=0}
Hadoop MapReduce Next Generation - Cluster Setup
* {Purpose}
This document describes how to install, configure and manage non-trivial
@ -50,7 +50,7 @@ Hadoop MapReduce Next Generation - Cluster Setup
The following sections describe how to configure a Hadoop cluster.
* {Configuration Files}
{Configuration Files}
Hadoop configuration is driven by two types of important configuration files:
@ -67,7 +67,7 @@ Hadoop MapReduce Next Generation - Cluster Setup
directory of the distribution, by setting site-specific values via the
<<conf/hadoop-env.sh>> and <<yarn-env.sh>>.
* {Site Configuration}
{Site Configuration}
To configure the Hadoop cluster you will need to configure the
<<<environment>>> in which the Hadoop daemons execute as well as the
@ -76,7 +76,7 @@ Hadoop MapReduce Next Generation - Cluster Setup
The Hadoop daemons are NameNode/DataNode and ResourceManager/NodeManager.
* {Configuring Environment of Hadoop Daemons}
** {Configuring Environment of Hadoop Daemons}
Administrators should use the <<conf/hadoop-env.sh>> and
<<conf/yarn-env.sh>> script to do site-specific customization of the
@ -130,6 +130,7 @@ Hadoop MapReduce Next Generation - Cluster Setup
will be set to 1000MB. This is used to configure the heap
size for the daemon. By default, the value is 1000. If you want to
configure the values separately for each deamon you can use.
*--------------------------------------+--------------------------------------+
|| Daemon || Environment Variable |
*--------------------------------------+--------------------------------------+
@ -142,7 +143,7 @@ Hadoop MapReduce Next Generation - Cluster Setup
| Map Reduce Job History Server | HADOOP_JOB_HISTORYSERVER_HEAPSIZE |
*--------------------------------------+--------------------------------------+
* {Configuring the Hadoop Daemons in Non-Secure Mode}
** {Configuring the Hadoop Daemons in Non-Secure Mode}
This section deals with important parameters to be specified in
the given configuration files:
@ -379,7 +380,7 @@ Hadoop MapReduce Next Generation - Cluster Setup
| | | Directory where history files are managed by the MR JobHistory Server. |
*-------------------------+-------------------------+------------------------+
* Hadoop Rack Awareness
* {Hadoop Rack Awareness}
The HDFS and the YARN components are rack-aware.
@ -395,7 +396,7 @@ Hadoop MapReduce Next Generation - Cluster Setup
<<<topology.script.file.name>>>. If <<<topology.script.file.name>>> is
not set, the rack id </default-rack> is returned for any passed IP address.
* Monitoring Health of NodeManagers
* {Monitoring Health of NodeManagers}
Hadoop provides a mechanism by which administrators can configure the
NodeManager to run an administrator supplied script periodically to
@ -449,7 +450,7 @@ Hadoop MapReduce Next Generation - Cluster Setup
disk is either raided or a failure in the boot disk is identified by the
health checker script.
* {Slaves file}
* {Slaves file}
Typically you choose one machine in the cluster to act as the NameNode and
one machine as to act as the ResourceManager, exclusively. The rest of the
@ -459,18 +460,18 @@ Hadoop MapReduce Next Generation - Cluster Setup
List all slave hostnames or IP addresses in your <<<conf/slaves>>> file,
one per line.
* {Logging}
* {Logging}
Hadoop uses the Apache log4j via the Apache Commons Logging framework for
logging. Edit the <<<conf/log4j.properties>>> file to customize the
Hadoop daemons' logging configuration (log-formats and so on).
* {Operating the Hadoop Cluster}
* {Operating the Hadoop Cluster}
Once all the necessary configuration is complete, distribute the files to the
<<<HADOOP_CONF_DIR>>> directory on all the machines.
* Hadoop Startup
** Hadoop Startup
To start a Hadoop cluster you will need to start both the HDFS and YARN
cluster.
@ -478,81 +479,81 @@ Hadoop MapReduce Next Generation - Cluster Setup
Format a new distributed filesystem:
----
$ $HADOOP_PREFIX/bin/hdfs namenode -format <cluster_name>
$ $HADOOP_PREFIX/bin/hdfs namenode -format <cluster_name>
----
Start the HDFS with the following command, run on the designated NameNode:
----
$ $HADOOP_PREFIX/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR --script hdfs start namenode
$ $HADOOP_PREFIX/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR --script hdfs start namenode
----
Run a script to start DataNodes on all slaves:
----
$ $HADOOP_PREFIX/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR --script hdfs start datanode
$ $HADOOP_PREFIX/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR --script hdfs start datanode
----
Start the YARN with the following command, run on the designated
ResourceManager:
----
$ $HADOOP_YARN_HOME/sbin/yarn-daemon.sh --config $HADOOP_CONF_DIR start resourcemanager
$ $HADOOP_YARN_HOME/sbin/yarn-daemon.sh --config $HADOOP_CONF_DIR start resourcemanager
----
Run a script to start NodeManagers on all slaves:
----
$ $HADOOP_YARN_HOME/sbin/yarn-daemon.sh --config $HADOOP_CONF_DIR start nodemanager
$ $HADOOP_YARN_HOME/sbin/yarn-daemon.sh --config $HADOOP_CONF_DIR start nodemanager
----
Start a standalone WebAppProxy server. If multiple servers
are used with load balancing it should be run on each of them:
----
$ $HADOOP_YARN_HOME/bin/yarn start proxyserver --config $HADOOP_CONF_DIR
$ $HADOOP_YARN_HOME/bin/yarn start proxyserver --config $HADOOP_CONF_DIR
----
Start the MapReduce JobHistory Server with the following command, run on the
designated server:
----
$ $HADOOP_PREFIX/sbin/mr-jobhistory-daemon.sh start historyserver --config $HADOOP_CONF_DIR
$ $HADOOP_PREFIX/sbin/mr-jobhistory-daemon.sh start historyserver --config $HADOOP_CONF_DIR
----
* Hadoop Shutdown
** Hadoop Shutdown
Stop the NameNode with the following command, run on the designated
NameNode:
----
$ $HADOOP_PREFIX/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR --script hdfs stop namenode
$ $HADOOP_PREFIX/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR --script hdfs stop namenode
----
Run a script to stop DataNodes on all slaves:
----
$ $HADOOP_PREFIX/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR --script hdfs stop datanode
$ $HADOOP_PREFIX/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR --script hdfs stop datanode
----
Stop the ResourceManager with the following command, run on the designated
ResourceManager:
----
$ $HADOOP_YARN_HOME/sbin/yarn-daemon.sh --config $HADOOP_CONF_DIR stop resourcemanager
$ $HADOOP_YARN_HOME/sbin/yarn-daemon.sh --config $HADOOP_CONF_DIR stop resourcemanager
----
Run a script to stop NodeManagers on all slaves:
----
$ $HADOOP_YARN_HOME/sbin/yarn-daemon.sh --config $HADOOP_CONF_DIR stop nodemanager
$ $HADOOP_YARN_HOME/sbin/yarn-daemon.sh --config $HADOOP_CONF_DIR stop nodemanager
----
Stop the WebAppProxy server. If multiple servers are used with load
balancing it should be run on each of them:
----
$ $HADOOP_YARN_HOME/bin/yarn stop proxyserver --config $HADOOP_CONF_DIR
$ $HADOOP_YARN_HOME/bin/yarn stop proxyserver --config $HADOOP_CONF_DIR
----
@ -560,10 +561,9 @@ Hadoop MapReduce Next Generation - Cluster Setup
designated server:
----
$ $HADOOP_PREFIX/sbin/mr-jobhistory-daemon.sh stop historyserver --config $HADOOP_CONF_DIR
$ $HADOOP_PREFIX/sbin/mr-jobhistory-daemon.sh stop historyserver --config $HADOOP_CONF_DIR
----
* {Running Hadoop in Secure Mode}
This section deals with important parameters to be specified in
@ -578,15 +578,15 @@ Hadoop MapReduce Next Generation - Cluster Setup
It's recommended to have them share a Unix group, for e.g. <<<hadoop>>>.
*--------------------------------------+----------------------------------------------------------------------+
*---------------+----------------------------------------------------------------------+
|| User:Group || Daemons |
*--------------------------------------+----------------------------------------------------------------------+
*---------------+----------------------------------------------------------------------+
| hdfs:hadoop | NameNode, Secondary NameNode, Checkpoint Node, Backup Node, DataNode |
*--------------------------------------+----------------------------------------------------------------------+
*---------------+----------------------------------------------------------------------+
| yarn:hadoop | ResourceManager, NodeManager |
*--------------------------------------+----------------------------------------------------------------------+
*---------------+----------------------------------------------------------------------+
| mapred:hadoop | MapReduce JobHistory Server |
*--------------------------------------+----------------------------------------------------------------------+
*---------------+----------------------------------------------------------------------+
* <<<Permissions for both HDFS and local fileSystem paths>>>
@ -635,7 +635,6 @@ Hadoop MapReduce Next Generation - Cluster Setup
following:
----
$ /usr/kerberos/bin/klist -e -k -t /etc/security/keytab/nn.service.keytab
Keytab name: FILE:/etc/security/keytab/nn.service.keytab
KVNO Timestamp Principal
@ -645,14 +644,12 @@ KVNO Timestamp Principal
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
----
The Secondary NameNode keytab file, on that host, should look like the
following:
----
$ /usr/kerberos/bin/klist -e -k -t /etc/security/keytab/sn.service.keytab
Keytab name: FILE:/etc/security/keytab/sn.service.keytab
KVNO Timestamp Principal
@ -662,13 +659,11 @@ KVNO Timestamp Principal
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
----
The DataNode keytab file, on each host, should look like the following:
----
$ /usr/kerberos/bin/klist -e -k -t /etc/security/keytab/dn.service.keytab
Keytab name: FILE:/etc/security/keytab/dn.service.keytab
KVNO Timestamp Principal
@ -678,7 +673,6 @@ KVNO Timestamp Principal
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
----
* YARN
@ -687,7 +681,6 @@ KVNO Timestamp Principal
like the following:
----
$ /usr/kerberos/bin/klist -e -k -t /etc/security/keytab/rm.service.keytab
Keytab name: FILE:/etc/security/keytab/rm.service.keytab
KVNO Timestamp Principal
@ -697,13 +690,11 @@ KVNO Timestamp Principal
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
----
The NodeManager keytab file, on each host, should look like the following:
----
$ /usr/kerberos/bin/klist -e -k -t /etc/security/keytab/nm.service.keytab
Keytab name: FILE:/etc/security/keytab/nm.service.keytab
KVNO Timestamp Principal
@ -713,7 +704,6 @@ KVNO Timestamp Principal
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
----
* MapReduce JobHistory Server
@ -722,7 +712,6 @@ KVNO Timestamp Principal
like the following:
----
$ /usr/kerberos/bin/klist -e -k -t /etc/security/keytab/jhs.service.keytab
Keytab name: FILE:/etc/security/keytab/jhs.service.keytab
KVNO Timestamp Principal
@ -732,10 +721,9 @@ KVNO Timestamp Principal
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
----
* Configuration in Secure Mode
** Configuration in Secure Mode
* <<<conf/core-site.xml>>>
@ -937,7 +925,7 @@ KVNO Timestamp Principal
| <<<min.user.id>>> | 1000 | Prevent other super-users. |
*-------------------------+-------------------------+------------------------+
To re-cap, here are the local file-ssytem permissions required for the
To re-cap, here are the local file-sysytem permissions required for the
various paths related to the <<<LinuxContainerExecutor>>>:
*-------------------+-------------------+------------------+------------------+
@ -1003,7 +991,7 @@ KVNO Timestamp Principal
*-------------------------+-------------------------+------------------------+
* {Operating the Hadoop Cluster}
* {Operating the Hadoop Cluster}
Once all the necessary configuration is complete, distribute the files to the
<<<HADOOP_CONF_DIR>>> directory on all the machines.
@ -1011,7 +999,7 @@ KVNO Timestamp Principal
This section also describes the various Unix users who should be starting the
various components and uses the same Unix accounts and groups used previously:
* Hadoop Startup
** Hadoop Startup
To start a Hadoop cluster you will need to start both the HDFS and YARN
cluster.
@ -1064,7 +1052,7 @@ KVNO Timestamp Principal
[mapred]$ $HADOOP_PREFIX/sbin/mr-jobhistory-daemon.sh start historyserver --config $HADOOP_CONF_DIR
----
* Hadoop Shutdown
** Hadoop Shutdown
Stop the NameNode with the following command, run on the designated NameNode
as <hdfs>:

View File

@ -0,0 +1,490 @@
~~ Licensed to the Apache Software Foundation (ASF) under one or more
~~ contributor license agreements. See the NOTICE file distributed with
~~ this work for additional information regarding copyright ownership.
~~ The ASF licenses this file to You under the Apache License, Version 2.0
~~ (the "License"); you may not use this file except in compliance with
~~ the License. You may obtain a copy of the License at
~~
~~ http://www.apache.org/licenses/LICENSE-2.0
~~
~~ Unless required by applicable law or agreed to in writing, software
~~ distributed under the License is distributed on an "AS IS" BASIS,
~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~~ See the License for the specific language governing permissions and
~~ limitations under the License.
---
Hadoop Commands Guide
---
---
${maven.build.timestamp}
%{toc}
Overview
All hadoop commands are invoked by the <<<bin/hadoop>>> script. Running the
hadoop script without any arguments prints the description for all
commands.
Usage: <<<hadoop [--config confdir] [COMMAND] [GENERIC_OPTIONS] [COMMAND_OPTIONS]>>>
Hadoop has an option parsing framework that employs parsing generic
options as well as running classes.
*-----------------------+---------------+
|| COMMAND_OPTION || Description
*-----------------------+---------------+
| <<<--config confdir>>>| Overwrites the default Configuration directory. Default is <<<${HADOOP_HOME}/conf>>>.
*-----------------------+---------------+
| GENERIC_OPTIONS | The common set of options supported by multiple commands.
| COMMAND_OPTIONS | Various commands with their options are described in the following sections. The commands have been grouped into User Commands and Administration Commands.
*-----------------------+---------------+
Generic Options
The following options are supported by {{dfsadmin}}, {{fs}}, {{fsck}},
{{job}} and {{fetchdt}}. Applications should implement {{{some_useful_url}Tool}} to support
{{{another_useful_url}GenericOptions}}.
*------------------------------------------------+-----------------------------+
|| GENERIC_OPTION || Description
*------------------------------------------------+-----------------------------+
|<<<-conf \<configuration file\> >>> | Specify an application
| configuration file.
*------------------------------------------------+-----------------------------+
|<<<-D \<property\>=\<value\> >>> | Use value for given property.
*------------------------------------------------+-----------------------------+
|<<<-jt \<local\> or \<jobtracker:port\> >>> | Specify a job tracker.
| Applies only to job.
*------------------------------------------------+-----------------------------+
|<<<-files \<comma separated list of files\> >>> | Specify comma separated files
| to be copied to the map
| reduce cluster. Applies only
| to job.
*------------------------------------------------+-----------------------------+
|<<<-libjars \<comma seperated list of jars\> >>>| Specify comma separated jar
| files to include in the
| classpath. Applies only to
| job.
*------------------------------------------------+-----------------------------+
|<<<-archives \<comma separated list of archives\> >>> | Specify comma separated
| archives to be unarchived on
| the compute machines. Applies
| only to job.
*------------------------------------------------+-----------------------------+
User Commands
Commands useful for users of a hadoop cluster.
* <<<archive>>>
Creates a hadoop archive. More information can be found at Hadoop
Archives.
Usage: <<<hadoop archive -archiveName NAME <src>* <dest> >>>
*-------------------+-------------------------------------------------------+
||COMMAND_OPTION || Description
*-------------------+-------------------------------------------------------+
| -archiveName NAME | Name of the archive to be created.
*-------------------+-------------------------------------------------------+
| src | Filesystem pathnames which work as usual with regular
| expressions.
*-------------------+-------------------------------------------------------+
| dest | Destination directory which would contain the archive.
*-------------------+-------------------------------------------------------+
* <<<distcp>>>
Copy file or directories recursively. More information can be found at
Hadoop DistCp Guide.
Usage: <<<hadoop distcp <srcurl> <desturl> >>>
*-------------------+--------------------------------------------+
||COMMAND_OPTION || Description
*-------------------+--------------------------------------------+
| srcurl | Source Url
*-------------------+--------------------------------------------+
| desturl | Destination Url
*-------------------+--------------------------------------------+
* <<<fs>>>
Usage: <<<hadoop fs [GENERIC_OPTIONS] [COMMAND_OPTIONS]>>>
Deprecated, use <<<hdfs dfs>>> instead.
Runs a generic filesystem user client.
The various COMMAND_OPTIONS can be found at File System Shell Guide.
* <<<fsck>>>
Runs a HDFS filesystem checking utility. See {{Fsck}} for more info.
Usage: <<<hadoop fsck [GENERIC_OPTIONS] <path> [-move | -delete | -openforwrite] [-files [-blocks [-locations | -racks]]]>>>
*------------------+---------------------------------------------+
|| COMMAND_OPTION || Description
*------------------+---------------------------------------------+
| <path> | Start checking from this path.
*------------------+---------------------------------------------+
| -move | Move corrupted files to /lost+found
*------------------+---------------------------------------------+
| -delete | Delete corrupted files.
*------------------+---------------------------------------------+
| -openforwrite | Print out files opened for write.
*------------------+---------------------------------------------+
| -files | Print out files being checked.
*------------------+---------------------------------------------+
| -blocks | Print out block report.
*------------------+---------------------------------------------+
| -locations | Print out locations for every block.
*------------------+---------------------------------------------+
| -racks | Print out network topology for data-node locations.
*------------------+---------------------------------------------+
* <<<fetchdt>>>
Gets Delegation Token from a NameNode. See {{fetchdt}} for more info.
Usage: <<<hadoop fetchdt [GENERIC_OPTIONS] [--webservice <namenode_http_addr>] <path> >>>
*------------------------------+---------------------------------------------+
|| COMMAND_OPTION || Description
*------------------------------+---------------------------------------------+
| <fileName> | File name to store the token into.
*------------------------------+---------------------------------------------+
| --webservice <https_address> | use http protocol instead of RPC
*------------------------------+---------------------------------------------+
* <<<jar>>>
Runs a jar file. Users can bundle their Map Reduce code in a jar file and
execute it using this command.
Usage: <<<hadoop jar <jar> [mainClass] args...>>>
The streaming jobs are run via this command. Examples can be referred from
Streaming examples
Word count example is also run using jar command. It can be referred from
Wordcount example
* <<<job>>>
Command to interact with Map Reduce Jobs.
Usage: <<<hadoop job [GENERIC_OPTIONS] [-submit <job-file>] | [-status <job-id>] | [-counter <job-id> <group-name> <counter-name>] | [-kill <job-id>] | [-events <job-id> <from-event-#> <#-of-events>] | [-history [all] <jobOutputDir>] | [-list [all]] | [-kill-task <task-id>] | [-fail-task <task-id>] | [-set-priority <job-id> <priority>]>>>
*------------------------------+---------------------------------------------+
|| COMMAND_OPTION || Description
*------------------------------+---------------------------------------------+
| -submit <job-file> | Submits the job.
*------------------------------+---------------------------------------------+
| -status <job-id> | Prints the map and reduce completion
| percentage and all job counters.
*------------------------------+---------------------------------------------+
| -counter <job-id> <group-name> <counter-name> | Prints the counter value.
*------------------------------+---------------------------------------------+
| -kill <job-id> | Kills the job.
*------------------------------+---------------------------------------------+
| -events <job-id> <from-event-#> <#-of-events> | Prints the events' details
| received by jobtracker for the given range.
*------------------------------+---------------------------------------------+
| -history [all]<jobOutputDir> | Prints job details, failed and killed tip
| details. More details about the job such as
| successful tasks and task attempts made for
| each task can be viewed by specifying the [all]
| option.
*------------------------------+---------------------------------------------+
| -list [all] | Displays jobs which are yet to complete.
| <<<-list all>>> displays all jobs.
*------------------------------+---------------------------------------------+
| -kill-task <task-id> | Kills the task. Killed tasks are NOT counted
| against failed attempts.
*------------------------------+---------------------------------------------+
| -fail-task <task-id> | Fails the task. Failed tasks are counted
| against failed attempts.
*------------------------------+---------------------------------------------+
| -set-priority <job-id> <priority> | Changes the priority of the job. Allowed
| priority values are VERY_HIGH, HIGH, NORMAL,
| LOW, VERY_LOW
*------------------------------+---------------------------------------------+
* <<<pipes>>>
Runs a pipes job.
Usage: <<<hadoop pipes [-conf <path>] [-jobconf <key=value>, <key=value>,
...] [-input <path>] [-output <path>] [-jar <jar file>] [-inputformat
<class>] [-map <class>] [-partitioner <class>] [-reduce <class>] [-writer
<class>] [-program <executable>] [-reduces <num>]>>>
*----------------------------------------+------------------------------------+
|| COMMAND_OPTION || Description
*----------------------------------------+------------------------------------+
| -conf <path> | Configuration for job
*----------------------------------------+------------------------------------+
| -jobconf <key=value>, <key=value>, ... | Add/override configuration for job
*----------------------------------------+------------------------------------+
| -input <path> | Input directory
*----------------------------------------+------------------------------------+
| -output <path> | Output directory
*----------------------------------------+------------------------------------+
| -jar <jar file> | Jar filename
*----------------------------------------+------------------------------------+
| -inputformat <class> | InputFormat class
*----------------------------------------+------------------------------------+
| -map <class> | Java Map class
*----------------------------------------+------------------------------------+
| -partitioner <class> | Java Partitioner
*----------------------------------------+------------------------------------+
| -reduce <class> | Java Reduce class
*----------------------------------------+------------------------------------+
| -writer <class> | Java RecordWriter
*----------------------------------------+------------------------------------+
| -program <executable> | Executable URI
*----------------------------------------+------------------------------------+
| -reduces <num> | Number of reduces
*----------------------------------------+------------------------------------+
* <<<queue>>>
command to interact and view Job Queue information
Usage: <<<hadoop queue [-list] | [-info <job-queue-name> [-showJobs]] | [-showacls]>>>
*-----------------+-----------------------------------------------------------+
|| COMMAND_OPTION || Description
*-----------------+-----------------------------------------------------------+
| -list | Gets list of Job Queues configured in the system.
| Along with scheduling information associated with the job queues.
*-----------------+-----------------------------------------------------------+
| -info <job-queue-name> [-showJobs] | Displays the job queue information and
| associated scheduling information of particular job queue.
| If <<<-showJobs>>> options is present a list of jobs
| submitted to the particular job queue is displayed.
*-----------------+-----------------------------------------------------------+
| -showacls | Displays the queue name and associated queue operations
| allowed for the current user. The list consists of only
| those queues to which the user has access.
*-----------------+-----------------------------------------------------------+
* <<<version>>>
Prints the version.
Usage: <<<hadoop version>>>
* <<<CLASSNAME>>>
hadoop script can be used to invoke any class.
Usage: <<<hadoop CLASSNAME>>>
Runs the class named <<<CLASSNAME>>>.
* <<<classpath>>>
Prints the class path needed to get the Hadoop jar and the required
libraries.
Usage: <<<hadoop classpath>>>
Administration Commands
Commands useful for administrators of a hadoop cluster.
* <<<balancer>>>
Runs a cluster balancing utility. An administrator can simply press Ctrl-C
to stop the rebalancing process. See Rebalancer for more details.
Usage: <<<hadoop balancer [-threshold <threshold>]>>>
*------------------------+-----------------------------------------------------------+
|| COMMAND_OPTION | Description
*------------------------+-----------------------------------------------------------+
| -threshold <threshold> | Percentage of disk capacity. This overwrites the
| default threshold.
*------------------------+-----------------------------------------------------------+
* <<<daemonlog>>>
Get/Set the log level for each daemon.
Usage: <<<hadoop daemonlog -getlevel <host:port> <name> >>>
Usage: <<<hadoop daemonlog -setlevel <host:port> <name> <level> >>>
*------------------------------+-----------------------------------------------------------+
|| COMMAND_OPTION || Description
*------------------------------+-----------------------------------------------------------+
| -getlevel <host:port> <name> | Prints the log level of the daemon running at
| <host:port>. This command internally connects
| to http://<host:port>/logLevel?log=<name>
*------------------------------+-----------------------------------------------------------+
| -setlevel <host:port> <name> <level> | Sets the log level of the daemon
| running at <host:port>. This command internally
| connects to http://<host:port>/logLevel?log=<name>
*------------------------------+-----------------------------------------------------------+
* <<<datanode>>>
Runs a HDFS datanode.
Usage: <<<hadoop datanode [-rollback]>>>
*-----------------+-----------------------------------------------------------+
|| COMMAND_OPTION || Description
*-----------------+-----------------------------------------------------------+
| -rollback | Rollsback the datanode to the previous version. This should
| be used after stopping the datanode and distributing the old
| hadoop version.
*-----------------+-----------------------------------------------------------+
* <<<dfsadmin>>>
Runs a HDFS dfsadmin client.
Usage: <<<hadoop dfsadmin [GENERIC_OPTIONS] [-report] [-safemode enter | leave | get | wait] [-refreshNodes] [-finalizeUpgrade] [-upgradeProgress status | details | force] [-metasave filename] [-setQuota <quota> <dirname>...<dirname>] [-clrQuota <dirname>...<dirname>] [-help [cmd]]>>>
*-----------------+-----------------------------------------------------------+
|| COMMAND_OPTION || Description
| -report | Reports basic filesystem information and statistics.
*-----------------+-----------------------------------------------------------+
| -safemode enter / leave / get / wait | Safe mode maintenance command. Safe
| mode is a Namenode state in which it \
| 1. does not accept changes to the name space (read-only) \
| 2. does not replicate or delete blocks. \
| Safe mode is entered automatically at Namenode startup, and
| leaves safe mode automatically when the configured minimum
| percentage of blocks satisfies the minimum replication
| condition. Safe mode can also be entered manually, but then
| it can only be turned off manually as well.
*-----------------+-----------------------------------------------------------+
| -refreshNodes | Re-read the hosts and exclude files to update the set of
| Datanodes that are allowed to connect to the Namenode and
| those that should be decommissioned or recommissioned.
*-----------------+-----------------------------------------------------------+
| -finalizeUpgrade| Finalize upgrade of HDFS. Datanodes delete their previous
| version working directories, followed by Namenode doing the
| same. This completes the upgrade process.
*-----------------+-----------------------------------------------------------+
| -upgradeProgress status / details / force | Request current distributed
| upgrade status, a detailed status or force the upgrade to
| proceed.
*-----------------+-----------------------------------------------------------+
| -metasave filename | Save Namenode's primary data structures to <filename> in
| the directory specified by hadoop.log.dir property.
| <filename> will contain one line for each of the following\
| 1. Datanodes heart beating with Namenode\
| 2. Blocks waiting to be replicated\
| 3. Blocks currrently being replicated\
| 4. Blocks waiting to be deleted\
*-----------------+-----------------------------------------------------------+
| -setQuota <quota> <dirname>...<dirname> | Set the quota <quota> for each
| directory <dirname>. The directory quota is a long integer
| that puts a hard limit on the number of names in the
| directory tree. Best effort for the directory, with faults
| reported if \
| 1. N is not a positive integer, or \
| 2. user is not an administrator, or \
| 3. the directory does not exist or is a file, or \
| 4. the directory would immediately exceed the new quota. \
*-----------------+-----------------------------------------------------------+
| -clrQuota <dirname>...<dirname> | Clear the quota for each directory
| <dirname>. Best effort for the directory. with fault
| reported if \
| 1. the directory does not exist or is a file, or \
| 2. user is not an administrator. It does not fault if the
| directory has no quota.
*-----------------+-----------------------------------------------------------+
| -help [cmd] | Displays help for the given command or all commands if none
| is specified.
*-----------------+-----------------------------------------------------------+
* <<<mradmin>>>
Runs MR admin client
Usage: <<<hadoop mradmin [ GENERIC_OPTIONS ] [-refreshQueueAcls]>>>
*-------------------+-----------------------------------------------------------+
|| COMMAND_OPTION || Description
*-------------------+-----------------------------------------------------------+
| -refreshQueueAcls | Refresh the queue acls used by hadoop, to check access
| during submissions and administration of the job by the
| user. The properties present in mapred-queue-acls.xml is
| reloaded by the queue manager.
*-------------------+-----------------------------------------------------------+
* <<<jobtracker>>>
Runs the MapReduce job Tracker node.
Usage: <<<hadoop jobtracker [-dumpConfiguration]>>>
*--------------------+-----------------------------------------------------------+
|| COMMAND_OPTION || Description
*--------------------+-----------------------------------------------------------+
| -dumpConfiguration | Dumps the configuration used by the JobTracker alongwith
| queue configuration in JSON format into Standard output
| used by the jobtracker and exits.
*--------------------+-----------------------------------------------------------+
* <<<namenode>>>
Runs the namenode. More info about the upgrade, rollback and finalize is
at Upgrade Rollback
Usage: <<<hadoop namenode [-format] | [-upgrade] | [-rollback] | [-finalize] | [-importCheckpoint]>>>
*--------------------+-----------------------------------------------------------+
|| COMMAND_OPTION || Description
*--------------------+-----------------------------------------------------------+
| -format | Formats the namenode. It starts the namenode, formats
| it and then shut it down.
*--------------------+-----------------------------------------------------------+
| -upgrade | Namenode should be started with upgrade option after
| the distribution of new hadoop version.
*--------------------+-----------------------------------------------------------+
| -rollback | Rollsback the namenode to the previous version. This
| should be used after stopping the cluster and
| distributing the old hadoop version.
*--------------------+-----------------------------------------------------------+
| -finalize | Finalize will remove the previous state of the files
| system. Recent upgrade will become permanent. Rollback
| option will not be available anymore. After finalization
| it shuts the namenode down.
*--------------------+-----------------------------------------------------------+
| -importCheckpoint | Loads image from a checkpoint directory and save it
| into the current one. Checkpoint dir is read from
| property fs.checkpoint.dir
*--------------------+-----------------------------------------------------------+
* <<<secondarynamenode>>>
Runs the HDFS secondary namenode. See Secondary Namenode for more
info.
Usage: <<<hadoop secondarynamenode [-checkpoint [force]] | [-geteditsize]>>>
*----------------------+-----------------------------------------------------------+
|| COMMAND_OPTION || Description
*----------------------+-----------------------------------------------------------+
| -checkpoint [-force] | Checkpoints the Secondary namenode if EditLog size
| >= fs.checkpoint.size. If <<<-force>>> is used,
| checkpoint irrespective of EditLog size.
*----------------------+-----------------------------------------------------------+
| -geteditsize | Prints the EditLog size.
*----------------------+-----------------------------------------------------------+
* <<<tasktracker>>>
Runs a MapReduce task Tracker node.
Usage: <<<hadoop tasktracker>>>

View File

@ -0,0 +1,418 @@
~~ Licensed to the Apache Software Foundation (ASF) under one or more
~~ contributor license agreements. See the NOTICE file distributed with
~~ this work for additional information regarding copyright ownership.
~~ The ASF licenses this file to You under the Apache License, Version 2.0
~~ (the "License"); you may not use this file except in compliance with
~~ the License. You may obtain a copy of the License at
~~
~~ http://www.apache.org/licenses/LICENSE-2.0
~~
~~ Unless required by applicable law or agreed to in writing, software
~~ distributed under the License is distributed on an "AS IS" BASIS,
~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~~ See the License for the specific language governing permissions and
~~ limitations under the License.
---
File System Shell Guide
---
---
${maven.build.timestamp}
%{toc}
Overview
The File System (FS) shell includes various shell-like commands that
directly interact with the Hadoop Distributed File System (HDFS) as well as
other file systems that Hadoop supports, such as Local FS, HFTP FS, S3 FS,
and others. The FS shell is invoked by:
+---
bin/hadoop fs <args>
+---
All FS shell commands take path URIs as arguments. The URI format is
<<<scheme://authority/path>>>. For HDFS the scheme is <<<hdfs>>>, and for
the Local FS the scheme is <<<file>>>. The scheme and authority are
optional. If not specified, the default scheme specified in the
configuration is used. An HDFS file or directory such as /parent/child can
be specified as <<<hdfs://namenodehost/parent/child>>> or simply as
<<</parent/child>>> (given that your configuration is set to point to
<<<hdfs://namenodehost>>>).
Most of the commands in FS shell behave like corresponding Unix commands.
Differences are described with each of the commands. Error information is
sent to stderr and the output is sent to stdout.
cat
Usage: <<<hdfs dfs -cat URI [URI ...]>>>
Copies source paths to stdout.
Example:
* <<<hdfs dfs -cat hdfs://nn1.example.com/file1 hdfs://nn2.example.com/file2>>>
* <<<hdfs dfs -cat file:///file3 /user/hadoop/file4>>>
Exit Code:
Returns 0 on success and -1 on error.
chgrp
Usage: <<<hdfs dfs -chgrp [-R] GROUP URI [URI ...]>>>
Change group association of files. With -R, make the change recursively
through the directory structure. The user must be the owner of files, or
else a super-user. Additional information is in the
{{{betterurl}Permissions Guide}}.
chmod
Usage: <<<hdfs dfs -chmod [-R] <MODE[,MODE]... | OCTALMODE> URI [URI ...]>>>
Change the permissions of files. With -R, make the change recursively
through the directory structure. The user must be the owner of the file, or
else a super-user. Additional information is in the
{{{betterurl}Permissions Guide}}.
chown
Usage: <<<hdfs dfs -chown [-R] [OWNER][:[GROUP]] URI [URI ]>>>
Change the owner of files. With -R, make the change recursively through the
directory structure. The user must be a super-user. Additional information
is in the {{{betterurl}Permissions Guide}}.
copyFromLocal
Usage: <<<hdfs dfs -copyFromLocal <localsrc> URI>>>
Similar to put command, except that the source is restricted to a local
file reference.
copyToLocal
Usage: <<<hdfs dfs -copyToLocal [-ignorecrc] [-crc] URI <localdst> >>>
Similar to get command, except that the destination is restricted to a
local file reference.
count
Usage: <<<hdfs dfs -count [-q] <paths> >>>
Count the number of directories, files and bytes under the paths that match
the specified file pattern. The output columns with -count are: DIR_COUNT,
FILE_COUNT, CONTENT_SIZE FILE_NAME
The output columns with -count -q are: QUOTA, REMAINING_QUATA, SPACE_QUOTA,
REMAINING_SPACE_QUOTA, DIR_COUNT, FILE_COUNT, CONTENT_SIZE, FILE_NAME
Example:
* <<<hdfs dfs -count hdfs://nn1.example.com/file1 hdfs://nn2.example.com/file2>>>
* <<<hdfs dfs -count -q hdfs://nn1.example.com/file1>>>
Exit Code:
Returns 0 on success and -1 on error.
cp
Usage: <<<hdfs dfs -cp URI [URI ...] <dest> >>>
Copy files from source to destination. This command allows multiple sources
as well in which case the destination must be a directory.
Example:
* <<<hdfs dfs -cp /user/hadoop/file1 /user/hadoop/file2>>>
* <<<hdfs dfs -cp /user/hadoop/file1 /user/hadoop/file2 /user/hadoop/dir>>>
Exit Code:
Returns 0 on success and -1 on error.
du
Usage: <<<hdfs dfs -du [-s] [-h] URI [URI ...]>>>
Displays sizes of files and directories contained in the given directory or
the length of a file in case its just a file.
Options:
* The -s option will result in an aggregate summary of file lengths being
displayed, rather than the individual files.
* The -h option will format file sizes in a "human-readable" fashion (e.g
64.0m instead of 67108864)
Example:
* hdfs dfs -du /user/hadoop/dir1 /user/hadoop/file1 hdfs://nn.example.com/user/hadoop/dir1
Exit Code:
Returns 0 on success and -1 on error.
dus
Usage: <<<hdfs dfs -dus <args> >>>
Displays a summary of file lengths. This is an alternate form of hdfs dfs -du -s.
expunge
Usage: <<<hdfs dfs -expunge>>>
Empty the Trash. Refer to the {{{betterurl}HDFS Architecture Guide}} for
more information on the Trash feature.
get
Usage: <<<hdfs dfs -get [-ignorecrc] [-crc] <src> <localdst> >>>
Copy files to the local file system. Files that fail the CRC check may be
copied with the -ignorecrc option. Files and CRCs may be copied using the
-crc option.
Example:
* <<<hdfs dfs -get /user/hadoop/file localfile>>>
* <<<hdfs dfs -get hdfs://nn.example.com/user/hadoop/file localfile>>>
Exit Code:
Returns 0 on success and -1 on error.
getmerge
Usage: <<<hdfs dfs -getmerge <src> <localdst> [addnl]>>>
Takes a source directory and a destination file as input and concatenates
files in src into the destination local file. Optionally addnl can be set to
enable adding a newline character at the
end of each file.
ls
Usage: <<<hdfs dfs -ls <args> >>>
For a file returns stat on the file with the following format:
+---+
permissions number_of_replicas userid groupid filesize modification_date modification_time filename
+---+
For a directory it returns list of its direct children as in unix.A directory is listed as:
+---+
permissions userid groupid modification_date modification_time dirname
+---+
Example:
* <<<hdfs dfs -ls /user/hadoop/file1>>>
Exit Code:
Returns 0 on success and -1 on error.
lsr
Usage: <<<hdfs dfs -lsr <args> >>>
Recursive version of ls. Similar to Unix ls -R.
mkdir
Usage: <<<hdfs dfs -mkdir [-p] <paths> >>>
Takes path uri's as argument and creates directories. With -p the behavior
is much like unix mkdir -p creating parent directories along the path.
Example:
* <<<hdfs dfs -mkdir /user/hadoop/dir1 /user/hadoop/dir2>>>
* <<<hdfs dfs -mkdir hdfs://nn1.example.com/user/hadoop/dir hdfs://nn2.example.com/user/hadoop/dir>>>
Exit Code:
Returns 0 on success and -1 on error.
moveFromLocal
Usage: <<<dfs -moveFromLocal <localsrc> <dst> >>>
Similar to put command, except that the source localsrc is deleted after
it's copied.
moveToLocal
Usage: <<<hdfs dfs -moveToLocal [-crc] <src> <dst> >>>
Displays a "Not implemented yet" message.
mv
Usage: <<<hdfs dfs -mv URI [URI ...] <dest> >>>
Moves files from source to destination. This command allows multiple sources
as well in which case the destination needs to be a directory. Moving files
across file systems is not permitted.
Example:
* <<<hdfs dfs -mv /user/hadoop/file1 /user/hadoop/file2>>>
* <<<hdfs dfs -mv hdfs://nn.example.com/file1 hdfs://nn.example.com/file2 hdfs://nn.example.com/file3 hdfs://nn.example.com/dir1>>>
Exit Code:
Returns 0 on success and -1 on error.
put
Usage: <<<hdfs dfs -put <localsrc> ... <dst> >>>
Copy single src, or multiple srcs from local file system to the destination
file system. Also reads input from stdin and writes to destination file
system.
* <<<hdfs dfs -put localfile /user/hadoop/hadoopfile>>>
* <<<hdfs dfs -put localfile1 localfile2 /user/hadoop/hadoopdir>>>
* <<<hdfs dfs -put localfile hdfs://nn.example.com/hadoop/hadoopfile>>>
* <<<hdfs dfs -put - hdfs://nn.example.com/hadoop/hadoopfile>>>
Reads the input from stdin.
Exit Code:
Returns 0 on success and -1 on error.
rm
Usage: <<<hdfs dfs -rm [-skipTrash] URI [URI ...]>>>
Delete files specified as args. Only deletes non empty directory and files.
If the -skipTrash option is specified, the trash, if enabled, will be
bypassed and the specified file(s) deleted immediately. This can be useful
when it is necessary to delete files from an over-quota directory. Refer to
rmr for recursive deletes.
Example:
* <<<hdfs dfs -rm hdfs://nn.example.com/file /user/hadoop/emptydir>>>
Exit Code:
Returns 0 on success and -1 on error.
rmr
Usage: <<<hdfs dfs -rmr [-skipTrash] URI [URI ...]>>>
Recursive version of delete. If the -skipTrash option is specified, the
trash, if enabled, will be bypassed and the specified file(s) deleted
immediately. This can be useful when it is necessary to delete files from an
over-quota directory.
Example:
* <<<hdfs dfs -rmr /user/hadoop/dir>>>
* <<<hdfs dfs -rmr hdfs://nn.example.com/user/hadoop/dir>>>
Exit Code:
Returns 0 on success and -1 on error.
setrep
Usage: <<<hdfs dfs -setrep [-R] <path> >>>
Changes the replication factor of a file. -R option is for recursively
increasing the replication factor of files within a directory.
Example:
* <<<hdfs dfs -setrep -w 3 -R /user/hadoop/dir1>>>
Exit Code:
Returns 0 on success and -1 on error.
stat
Usage: <<<hdfs dfs -stat URI [URI ...]>>>
Returns the stat information on the path.
Example:
* <<<hdfs dfs -stat path>>>
Exit Code:
Returns 0 on success and -1 on error.
tail
Usage: <<<hdfs dfs -tail [-f] URI>>>
Displays last kilobyte of the file to stdout. -f option can be used as in
Unix.
Example:
* <<<hdfs dfs -tail pathname>>>
Exit Code:
Returns 0 on success and -1 on error.
test
Usage: <<<hdfs dfs -test -[ezd] URI>>>
Options:
*----+------------+
| -e | check to see if the file exists. Return 0 if true.
*----+------------+
| -z | check to see if the file is zero length. Return 0 if true.
*----+------------+
| -d | check to see if the path is directory. Return 0 if true.
*----+------------+
Example:
* <<<hdfs dfs -test -e filename>>>
text
Usage: <<<hdfs dfs -text <src> >>>
Takes a source file and outputs the file in text format. The allowed formats
are zip and TextRecordInputStream.
touchz
Usage: <<<hdfs dfs -touchz URI [URI ...]>>>
Create a file of zero length.
Example:
* <<<hadoop -touchz pathname>>>
Exit Code:
Returns 0 on success and -1 on error.

View File

@ -0,0 +1,99 @@
~~ Licensed under the Apache License, Version 2.0 (the "License");
~~ you may not use this file except in compliance with the License.
~~ You may obtain a copy of the License at
~~
~~ http://www.apache.org/licenses/LICENSE-2.0
~~
~~ Unless required by applicable law or agreed to in writing, software
~~ distributed under the License is distributed on an "AS IS" BASIS,
~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~~ See the License for the specific language governing permissions and
~~ limitations under the License. See accompanying LICENSE file.
---
Authentication for Hadoop HTTP web-consoles
---
---
${maven.build.timestamp}
Authentication for Hadoop HTTP web-consoles
%{toc|section=1|fromDepth=0}
* Introduction
This document describes how to configure Hadoop HTTP web-consoles to
require user authentication.
By default Hadoop HTTP web-consoles (JobTracker, NameNode, TaskTrackers
and DataNodes) allow access without any form of authentication.
Similarly to Hadoop RPC, Hadoop HTTP web-consoles can be configured to
require Kerberos authentication using HTTP SPNEGO protocol (supported
by browsers like Firefox and Internet Explorer).
In addition, Hadoop HTTP web-consoles support the equivalent of
Hadoop's Pseudo/Simple authentication. If this option is enabled, user
must specify their user name in the first browser interaction using the
user.name query string parameter. For example:
<<<http://localhost:50030/jobtracker.jsp?user.name=babu>>>.
If a custom authentication mechanism is required for the HTTP
web-consoles, it is possible to implement a plugin to support the
alternate authentication mechanism (refer to Hadoop hadoop-auth for details
on writing an <<<AuthenticatorHandler>>>).
The next section describes how to configure Hadoop HTTP web-consoles to
require user authentication.
* Configuration
The following properties should be in the <<<core-site.xml>>> of all the
nodes in the cluster.
<<<hadoop.http.filter.initializers>>>: add to this property the
<<<org.apache.hadoop.security.AuthenticationFilterInitializer>>> initializer
class.
<<<hadoop.http.authentication.type>>>: Defines authentication used for the
HTTP web-consoles. The supported values are: <<<simple>>> | <<<kerberos>>> |
<<<#AUTHENTICATION_HANDLER_CLASSNAME#>>>. The dfeault value is <<<simple>>>.
<<<hadoop.http.authentication.token.validity>>>: Indicates how long (in
seconds) an authentication token is valid before it has to be renewed.
The default value is <<<36000>>>.
<<<hadoop.http.authentication.signature.secret.file>>>: The signature secret
file for signing the authentication tokens. If not set a random secret is
generated at startup time. The same secret should be used for all nodes
in the cluster, JobTracker, NameNode, DataNode and TastTracker. The
default value is <<<${user.home}/hadoop-http-auth-signature-secret>>>.
IMPORTANT: This file should be readable only by the Unix user running the
daemons.
<<<hadoop.http.authentication.cookie.domain>>>: The domain to use for the
HTTP cookie that stores the authentication token. In order to
authentiation to work correctly across all nodes in the cluster the
domain must be correctly set. There is no default value, the HTTP
cookie will not have a domain working only with the hostname issuing
the HTTP cookie.
IMPORTANT: when using IP addresses, browsers ignore cookies with domain
settings. For this setting to work properly all nodes in the cluster
must be configured to generate URLs with <<<hostname.domain>>> names on it.
<<<hadoop.http.authentication.simple.anonymous.allowed>>>: Indicates if
anonymous requests are allowed when using 'simple' authentication. The
default value is <<<true>>>
<<<hadoop.http.authentication.kerberos.principal>>>: Indicates the Kerberos
principal to be used for HTTP endpoint when using 'kerberos'
authentication. The principal short name must be <<<HTTP>>> per Kerberos HTTP
SPNEGO specification. The default value is <<<HTTP/_HOST@$LOCALHOST>>>,
where <<<_HOST>>> -if present- is replaced with bind address of the HTTP
server.
<<<hadoop.http.authentication.kerberos.keytab>>>: Location of the keytab file
with the credentials for the Kerberos principal used for the HTTP
endpoint. The default value is <<<${user.home}/hadoop.keytab>>>.i

View File

@ -103,4 +103,12 @@ public class TestDU extends TestCase {
duSize >= writtenSize &&
writtenSize <= (duSize + slack));
}
public void testDUGetUsedWillNotReturnNegative() throws IOException {
File file = new File(DU_DIR, "data");
assertTrue(file.createNewFile());
DU du = new DU(file, 10000);
du.decDfsUsed(Long.MAX_VALUE);
long duSize = du.getUsed();
assertTrue(String.valueOf(duSize), duSize >= 0L);
}
}

View File

@ -4,6 +4,7 @@ import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.URI;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.mockito.Mockito.mock;
@ -133,6 +134,8 @@ public class TestDelegationTokenRenewer {
InterruptedException {
TestFileSystem tfs = new TestFileSystem();
renewer.addRenewAction(tfs);
assertEquals("FileSystem not added to DelegationTokenRenewer", 1,
renewer.getRenewQueueLength());
for (int i = 0; i < 60; i++) {
Thread.sleep(RENEW_CYCLE);
@ -144,7 +147,8 @@ public class TestDelegationTokenRenewer {
assertTrue("Token not renewed even after 1 minute",
(tfs.testToken.renewCount > 0));
assertTrue("Token not removed", (tfs.testToken.renewCount < MAX_RENEWALS));
assertEquals("FileSystem not removed from DelegationTokenRenewer", 0,
renewer.getRenewQueueLength());
assertTrue("Token not cancelled", tfs.testToken.cancelled);
}
}

View File

@ -25,18 +25,34 @@ import java.io.DataInput;
import java.io.DataInputStream;
import java.io.DataOutput;
import java.io.DataOutputStream;
import java.io.IOException;
import org.junit.Test;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
public class TestFileStatus {
private static final Log LOG =
LogFactory.getLog(TestFileStatus.class);
/** Values for creating {@link FileStatus} in some tests */
static final int LENGTH = 1;
static final int REPLICATION = 2;
static final long BLKSIZE = 3;
static final long MTIME = 4;
static final long ATIME = 5;
static final String OWNER = "owner";
static final String GROUP = "group";
static final FsPermission PERMISSION = FsPermission.valueOf("-rw-rw-rw-");
static final Path PATH = new Path("path");
/**
* Check that the write and readField methods work correctly.
*/
@Test
public void testFileStatusWritable() throws Exception {
FileStatus[] tests = {
@ -68,4 +84,181 @@ public class TestFileStatus {
iterator++;
}
}
/**
* Check that the full parameter constructor works correctly.
*/
@Test
public void constructorFull() throws IOException {
boolean isdir = false;
Path symlink = new Path("symlink");
FileStatus fileStatus = new FileStatus(LENGTH, isdir, REPLICATION, BLKSIZE,
MTIME, ATIME, PERMISSION, OWNER, GROUP, symlink, PATH);
validateAccessors(fileStatus, LENGTH, isdir, REPLICATION, BLKSIZE, MTIME,
ATIME, PERMISSION, OWNER, GROUP, symlink, PATH);
}
/**
* Check that the non-symlink constructor works correctly.
*/
@Test
public void constructorNoSymlink() throws IOException {
boolean isdir = true;
FileStatus fileStatus = new FileStatus(LENGTH, isdir, REPLICATION, BLKSIZE,
MTIME, ATIME, PERMISSION, OWNER, GROUP, PATH);
validateAccessors(fileStatus, LENGTH, isdir, REPLICATION, BLKSIZE, MTIME,
ATIME, PERMISSION, OWNER, GROUP, null, PATH);
}
/**
* Check that the constructor without owner, group and permissions works
* correctly.
*/
@Test
public void constructorNoOwner() throws IOException {
boolean isdir = true;
FileStatus fileStatus = new FileStatus(LENGTH, isdir,
REPLICATION, BLKSIZE, MTIME, PATH);
validateAccessors(fileStatus, LENGTH, isdir, REPLICATION, BLKSIZE, MTIME,
0, FsPermission.getDefault(), "", "", null, PATH);
}
/**
* Check that the no parameter constructor works correctly.
*/
@Test
public void constructorBlank() throws IOException {
FileStatus fileStatus = new FileStatus();
validateAccessors(fileStatus, 0, false, 0, 0, 0,
0, FsPermission.getDefault(), "", "", null, null);
}
/**
* Check that FileStatus are equal if their paths are equal.
*/
@Test
public void testEquals() {
Path path = new Path("path");
FileStatus fileStatus1 = new FileStatus(1, true, 1, 1, 1, 1,
FsPermission.valueOf("-rw-rw-rw-"), "one", "one", null, path);
FileStatus fileStatus2 = new FileStatus(2, true, 2, 2, 2, 2,
FsPermission.valueOf("---x--x--x"), "two", "two", null, path);
assertEquals(fileStatus1, fileStatus2);
}
/**
* Check that FileStatus are not equal if their paths are not equal.
*/
@Test
public void testNotEquals() {
Path path1 = new Path("path1");
Path path2 = new Path("path2");
FileStatus fileStatus1 = new FileStatus(1, true, 1, 1, 1, 1,
FsPermission.valueOf("-rw-rw-rw-"), "one", "one", null, path1);
FileStatus fileStatus2 = new FileStatus(1, true, 1, 1, 1, 1,
FsPermission.valueOf("-rw-rw-rw-"), "one", "one", null, path2);
assertFalse(fileStatus1.equals(fileStatus2));
assertFalse(fileStatus2.equals(fileStatus1));
}
/**
* Check that toString produces the expected output for a file.
*/
@Test
public void toStringFile() throws IOException {
boolean isdir = false;
FileStatus fileStatus = new FileStatus(LENGTH, isdir, REPLICATION, BLKSIZE,
MTIME, ATIME, PERMISSION, OWNER, GROUP, null, PATH);
validateToString(fileStatus);
}
/**
* Check that toString produces the expected output for a directory.
*/
@Test
public void toStringDir() throws IOException {
FileStatus fileStatus = new FileStatus(LENGTH, true, REPLICATION, BLKSIZE,
MTIME, ATIME, PERMISSION, OWNER, GROUP, null, PATH);
validateToString(fileStatus);
}
/**
* Check that toString produces the expected output for a symlink.
*/
@Test
public void toStringSymlink() throws IOException {
boolean isdir = false;
Path symlink = new Path("symlink");
FileStatus fileStatus = new FileStatus(LENGTH, isdir, REPLICATION, BLKSIZE,
MTIME, ATIME, PERMISSION, OWNER, GROUP, symlink, PATH);
validateToString(fileStatus);
}
/**
* Validate the accessors for FileStatus.
* @param fileStatus FileStatus to checked
* @param length expected length
* @param isdir expected isDirectory
* @param replication expected replication
* @param blocksize expected blocksize
* @param mtime expected modification time
* @param atime expected access time
* @param permission expected permission
* @param owner expected owner
* @param group expected group
* @param symlink expected symlink
* @param path expected path
*/
private void validateAccessors(FileStatus fileStatus,
long length, boolean isdir, int replication, long blocksize, long mtime,
long atime, FsPermission permission, String owner, String group,
Path symlink, Path path) throws IOException {
assertEquals(length, fileStatus.getLen());
assertEquals(isdir, fileStatus.isDirectory());
assertEquals(replication, fileStatus.getReplication());
assertEquals(blocksize, fileStatus.getBlockSize());
assertEquals(mtime, fileStatus.getModificationTime());
assertEquals(atime, fileStatus.getAccessTime());
assertEquals(permission, fileStatus.getPermission());
assertEquals(owner, fileStatus.getOwner());
assertEquals(group, fileStatus.getGroup());
if(symlink == null) {
assertFalse(fileStatus.isSymlink());
} else {
assertTrue(fileStatus.isSymlink());
assertEquals(symlink, fileStatus.getSymlink());
}
assertEquals(path, fileStatus.getPath());
}
/**
* Validates the toString method for FileStatus.
* @param fileStatus FileStatus to be validated
*/
private void validateToString(FileStatus fileStatus) throws IOException {
StringBuilder expected = new StringBuilder();
expected.append("FileStatus{");
expected.append("path=").append(fileStatus.getPath()).append("; ");
expected.append("isDirectory=").append(fileStatus.isDirectory()).append("; ");
if(!fileStatus.isDirectory()) {
expected.append("length=").append(fileStatus.getLen()).append("; ");
expected.append("replication=").append(fileStatus.getReplication()).append("; ");
expected.append("blocksize=").append(fileStatus.getBlockSize()).append("; ");
}
expected.append("modification_time=").append(fileStatus.getModificationTime()).append("; ");
expected.append("access_time=").append(fileStatus.getAccessTime()).append("; ");
expected.append("owner=").append(fileStatus.getOwner()).append("; ");
expected.append("group=").append(fileStatus.getGroup()).append("; ");
expected.append("permission=").append(fileStatus.getPermission()).append("; ");
if(fileStatus.isSymlink()) {
expected.append("isSymlink=").append(true).append("; ");
expected.append("symlink=").append(fileStatus.getSymlink()).append("}");
} else {
expected.append("isSymlink=").append(false).append("}");
}
assertEquals(expected.toString(), fileStatus.toString());
}
}

View File

@ -0,0 +1,46 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs;
import org.apache.hadoop.conf.Configuration;
import java.io.IOException;
import java.net.URL;
import org.junit.Test;
import static org.junit.Assert.*;
public class TestFileSystemInitialization {
/**
* Check if FileSystem can be properly initialized if URLStreamHandlerFactory
* is registered.
*/
@Test
public void testInitializationWithRegisteredStreamFactory() {
Configuration conf = new Configuration();
URL.setURLStreamHandlerFactory(new FsUrlStreamHandlerFactory(conf));
try {
FileSystem.getFileSystemClass("file", conf);
}
catch (IOException ok) {
// we might get an exception but this not related to infinite loop problem
assertFalse(false);
}
}
}

View File

@ -358,6 +358,66 @@ public class TestFsShellCopy {
assertEquals("f1\ndf1\ndf2\ndf3\nf2\n", readFile("out"));
}
@Test
public void testMoveFileFromLocal() throws Exception {
Path testRoot = new Path(testRootDir, "testPutFile");
lfs.delete(testRoot, true);
lfs.mkdirs(testRoot);
Path target = new Path(testRoot, "target");
Path srcFile = new Path(testRoot, new Path("srcFile"));
lfs.createNewFile(srcFile);
int exit = shell.run(new String[]{
"-moveFromLocal", srcFile.toString(), target.toString() });
assertEquals(0, exit);
assertFalse(lfs.exists(srcFile));
assertTrue(lfs.exists(target));
assertTrue(lfs.isFile(target));
}
@Test
public void testMoveDirFromLocal() throws Exception {
Path testRoot = new Path(testRootDir, "testPutDir");
lfs.delete(testRoot, true);
lfs.mkdirs(testRoot);
Path srcDir = new Path(testRoot, "srcDir");
lfs.mkdirs(srcDir);
Path targetDir = new Path(testRoot, "target");
int exit = shell.run(new String[]{
"-moveFromLocal", srcDir.toString(), targetDir.toString() });
assertEquals(0, exit);
assertFalse(lfs.exists(srcDir));
assertTrue(lfs.exists(targetDir));
}
@Test
public void testMoveDirFromLocalDestExists() throws Exception {
Path testRoot = new Path(testRootDir, "testPutDir");
lfs.delete(testRoot, true);
lfs.mkdirs(testRoot);
Path srcDir = new Path(testRoot, "srcDir");
lfs.mkdirs(srcDir);
Path targetDir = new Path(testRoot, "target");
lfs.mkdirs(targetDir);
int exit = shell.run(new String[]{
"-moveFromLocal", srcDir.toString(), targetDir.toString() });
assertEquals(0, exit);
assertFalse(lfs.exists(srcDir));
assertTrue(lfs.exists(new Path(targetDir, srcDir.getName())));
lfs.mkdirs(srcDir);
exit = shell.run(new String[]{
"-moveFromLocal", srcDir.toString(), targetDir.toString() });
assertEquals(1, exit);
assertTrue(lfs.exists(srcDir));
}
private void createFile(Path ... paths) throws IOException {
for (Path path : paths) {
FSDataOutputStream out = lfs.create(path);

View File

@ -19,12 +19,14 @@ package org.apache.hadoop.fs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem.Statistics;
import org.apache.hadoop.util.Shell;
import static org.apache.hadoop.fs.FileSystemTestHelper.*;
import java.io.*;
import static org.junit.Assert.*;
import static org.junit.Assume.assumeTrue;
import org.junit.Before;
import org.junit.Test;
@ -262,6 +264,7 @@ public class TestLocalFileSystem {
@Test
public void testListStatusWithColons() throws IOException {
assumeTrue(!Shell.WINDOWS);
Configuration conf = new Configuration();
LocalFileSystem fs = FileSystem.getLocal(conf);
File colonFile = new File(TEST_ROOT_DIR, "foo:bar");

View File

@ -25,6 +25,7 @@ import java.util.EnumSet;
import static org.apache.hadoop.fs.FileContextTestHelper.*;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.AbstractFileSystem;
import org.apache.hadoop.fs.CreateFlag;
import org.apache.hadoop.fs.FileContext;
import org.apache.hadoop.fs.FileContextTestHelper;
@ -36,6 +37,7 @@ import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.mockito.Mockito;
public class TestChRootedFs {
FileContext fc; // The ChRoootedFs
@ -307,4 +309,21 @@ public class TestChRootedFs {
fc.getDefaultFileSystem().resolvePath(new Path("/nonExisting"));
}
@Test
public void testIsValidNameValidInBaseFs() throws Exception {
AbstractFileSystem baseFs = Mockito.spy(fc.getDefaultFileSystem());
ChRootedFs chRootedFs = new ChRootedFs(baseFs, new Path("/chroot"));
Mockito.doReturn(true).when(baseFs).isValidName(Mockito.anyString());
Assert.assertTrue(chRootedFs.isValidName("/test"));
Mockito.verify(baseFs).isValidName("/chroot/test");
}
@Test
public void testIsValidNameInvalidInBaseFs() throws Exception {
AbstractFileSystem baseFs = Mockito.spy(fc.getDefaultFileSystem());
ChRootedFs chRootedFs = new ChRootedFs(baseFs, new Path("/chroot"));
Mockito.doReturn(false).when(baseFs).isValidName(Mockito.anyString());
Assert.assertFalse(chRootedFs.isValidName("/test"));
Mockito.verify(baseFs).isValidName("/chroot/test");
}
}

View File

@ -662,4 +662,15 @@ public class ViewFileSystemBaseTest {
public void testInternalSetOwner() throws IOException {
fsView.setOwner(new Path("/internalDir"), "foo", "bar");
}
@Test
public void testCreateNonRecursive() throws IOException {
Path path = FileSystemTestHelper.getTestRootPath(fsView, "/user/foo");
fsView.createNonRecursive(path, false, 1024, (short)1, 1024L, null);
FileStatus status = fsView.getFileStatus(new Path("/user/foo"));
Assert.assertTrue("Created file should be type file",
fsView.isFile(new Path("/user/foo")));
Assert.assertTrue("Target of created file should be type file",
fsTarget.isFile(new Path(targetTestRoot,"user/foo")));
}
}

View File

@ -42,6 +42,7 @@ import org.apache.hadoop.HadoopIllegalArgumentException;
import org.apache.hadoop.ha.ActiveStandbyElector.ActiveStandbyElectorCallback;
import org.apache.hadoop.ha.ActiveStandbyElector.ActiveNotFoundException;
import org.apache.hadoop.ha.HAZKUtil.ZKAuthInfo;
import org.apache.hadoop.test.GenericTestUtils;
public class TestActiveStandbyElector {
@ -56,7 +57,8 @@ public class TestActiveStandbyElector {
private int sleptFor = 0;
ActiveStandbyElectorTester(String hostPort, int timeout, String parent,
List<ACL> acl, ActiveStandbyElectorCallback app) throws IOException {
List<ACL> acl, ActiveStandbyElectorCallback app) throws IOException,
KeeperException {
super(hostPort, timeout, parent, acl,
Collections.<ZKAuthInfo>emptyList(), app);
}
@ -83,7 +85,7 @@ public class TestActiveStandbyElector {
ActiveStandbyElector.BREADCRUMB_FILENAME;
@Before
public void init() throws IOException {
public void init() throws IOException, KeeperException {
count = 0;
mockZK = Mockito.mock(ZooKeeper.class);
mockApp = Mockito.mock(ActiveStandbyElectorCallback.class);
@ -705,4 +707,18 @@ public class TestActiveStandbyElector {
Mockito.eq(ZK_PARENT_NAME), Mockito.<byte[]>any(),
Mockito.eq(Ids.OPEN_ACL_UNSAFE), Mockito.eq(CreateMode.PERSISTENT));
}
/**
* verify the zookeeper connection establishment
*/
@Test
public void testWithoutZKServer() throws Exception {
try {
new ActiveStandbyElector("127.0.0.1", 2000, ZK_PARENT_NAME,
Ids.OPEN_ACL_UNSAFE, Collections.<ZKAuthInfo> emptyList(), mockApp);
Assert.fail("Did not throw zookeeper connection loss exceptions!");
} catch (KeeperException ke) {
GenericTestUtils.assertExceptionContains( "ConnectionLoss", ke);
}
}
}

View File

@ -68,6 +68,7 @@ public class TestIPC {
* of the various writables.
**/
static boolean WRITABLE_FAULTS_ENABLED = true;
static int WRITABLE_FAULTS_SLEEP = 0;
static {
Client.setPingInterval(conf, PING_INTERVAL);
@ -206,16 +207,27 @@ public class TestIPC {
static void maybeThrowIOE() throws IOException {
if (WRITABLE_FAULTS_ENABLED) {
maybeSleep();
throw new IOException("Injected fault");
}
}
static void maybeThrowRTE() {
if (WRITABLE_FAULTS_ENABLED) {
maybeSleep();
throw new RuntimeException("Injected fault");
}
}
private static void maybeSleep() {
if (WRITABLE_FAULTS_SLEEP > 0) {
try {
Thread.sleep(WRITABLE_FAULTS_SLEEP);
} catch (InterruptedException ie) {
}
}
}
@SuppressWarnings("unused")
private static class IOEOnReadWritable extends LongWritable {
public IOEOnReadWritable() {}
@ -370,6 +382,27 @@ public class TestIPC {
RTEOnReadWritable.class);
}
/**
* Test case that fails a write, but only after taking enough time
* that a ping should have been sent. This is a reproducer for a
* deadlock seen in one iteration of HADOOP-6762.
*/
@Test
public void testIOEOnWriteAfterPingClient() throws Exception {
// start server
Client.setPingInterval(conf, 100);
try {
WRITABLE_FAULTS_SLEEP = 1000;
doErrorTest(IOEOnWriteWritable.class,
LongWritable.class,
LongWritable.class,
LongWritable.class);
} finally {
WRITABLE_FAULTS_SLEEP = 0;
}
}
private static void assertExceptionContains(
Throwable t, String substring) {
String msg = StringUtils.stringifyException(t);

View File

@ -38,6 +38,10 @@ import java.net.ConnectException;
import java.net.InetAddress;
import java.net.InetSocketAddress;
import java.util.Arrays;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.CyclicBarrier;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicReference;
import javax.net.SocketFactory;
@ -823,6 +827,96 @@ public class TestRPC {
}
}
@Test(timeout=90000)
public void testRPCInterruptedSimple() throws Exception {
final Configuration conf = new Configuration();
Server server = RPC.getServer(
TestProtocol.class, new TestImpl(), ADDRESS, 0, 5, true, conf, null
);
server.start();
InetSocketAddress addr = NetUtils.getConnectAddress(server);
final TestProtocol proxy = (TestProtocol) RPC.getProxy(
TestProtocol.class, TestProtocol.versionID, addr, conf);
// Connect to the server
proxy.ping();
// Interrupt self, try another call
Thread.currentThread().interrupt();
try {
proxy.ping();
fail("Interruption did not cause IPC to fail");
} catch (IOException ioe) {
if (!ioe.toString().contains("InterruptedException")) {
throw ioe;
}
// clear interrupt status for future tests
Thread.interrupted();
}
}
@Test(timeout=30000)
public void testRPCInterrupted() throws IOException, InterruptedException {
final Configuration conf = new Configuration();
Server server = RPC.getServer(
TestProtocol.class, new TestImpl(), ADDRESS, 0, 5, true, conf, null
);
server.start();
int numConcurrentRPC = 200;
InetSocketAddress addr = NetUtils.getConnectAddress(server);
final CyclicBarrier barrier = new CyclicBarrier(numConcurrentRPC);
final CountDownLatch latch = new CountDownLatch(numConcurrentRPC);
final AtomicBoolean leaderRunning = new AtomicBoolean(true);
final AtomicReference<Throwable> error = new AtomicReference<Throwable>();
Thread leaderThread = null;
for (int i = 0; i < numConcurrentRPC; i++) {
final int num = i;
final TestProtocol proxy = (TestProtocol) RPC.getProxy(
TestProtocol.class, TestProtocol.versionID, addr, conf);
Thread rpcThread = new Thread(new Runnable() {
@Override
public void run() {
try {
barrier.await();
while (num == 0 || leaderRunning.get()) {
proxy.slowPing(false);
}
proxy.slowPing(false);
} catch (Exception e) {
if (num == 0) {
leaderRunning.set(false);
} else {
error.set(e);
}
LOG.error(e);
} finally {
latch.countDown();
}
}
});
rpcThread.start();
if (leaderThread == null) {
leaderThread = rpcThread;
}
}
// let threads get past the barrier
Thread.sleep(1000);
// stop a single thread
while (leaderRunning.get()) {
leaderThread.interrupt();
}
latch.await();
// should not cause any other thread to get an error
assertTrue("rpc got exception " + error.get(), error.get() == null);
}
public static void main(String[] args) throws Exception {
new TestRPC().testCallsInternal(conf);

View File

@ -0,0 +1,47 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package org.apache.hadoop.security;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import org.junit.Test;
public class TestProxyUserFromEnv {
/** Test HADOOP_PROXY_USER for impersonation */
@Test
public void testProxyUserFromEnvironment() throws IOException {
String proxyUser = "foo.bar";
System.setProperty(UserGroupInformation.HADOOP_PROXY_USER, proxyUser);
UserGroupInformation ugi = UserGroupInformation.getLoginUser();
assertEquals(proxyUser, ugi.getUserName());
UserGroupInformation realUgi = ugi.getRealUser();
assertNotNull(realUgi);
// get the expected real user name
Process pp = Runtime.getRuntime().exec("whoami");
BufferedReader br = new BufferedReader
(new InputStreamReader(pp.getInputStream()));
String realUser = br.readLine().trim();
assertEquals(realUser, realUgi.getUserName());
}
}

View File

@ -0,0 +1,54 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.util;
import junit.framework.TestCase;
import org.apache.hadoop.util.ExitUtil.ExitException;
import org.junit.Test;
public class TestNativeLibraryChecker extends TestCase {
private void expectExit(String [] args) {
try {
// should throw exit exception
NativeLibraryChecker.main(args);
fail("should call exit");
} catch (ExitException e) {
// pass
ExitUtil.resetFirstExitException();
}
}
@Test
public void testNativeLibraryChecker() {
ExitUtil.disableSystemExit();
// help should return normally
NativeLibraryChecker.main(new String[] {"-h"});
// illegal argmuments should exit
expectExit(new String[] {"-a", "-h"});
expectExit(new String[] {"aaa"});
if (NativeCodeLoader.isNativeCodeLoaded()) {
// no argument should return normally
NativeLibraryChecker.main(new String[0]);
} else {
// no argument should exit
expectExit(new String[0]);
}
}
}

View File

@ -34,7 +34,7 @@
<description>Apache Hadoop HttpFS</description>
<properties>
<tomcat.version>6.0.32</tomcat.version>
<tomcat.version>6.0.36</tomcat.version>
<httpfs.source.repository>REPO NOT AVAIL</httpfs.source.repository>
<httpfs.source.repository>REPO NOT AVAIL</httpfs.source.repository>
<httpfs.source.revision>REVISION NOT AVAIL</httpfs.source.revision>
@ -531,7 +531,7 @@
<mkdir dir="downloads"/>
<get
src="${tomcat.download.url}"
dest="downloads/tomcat.tar.gz" verbose="true" skipexisting="true"/>
dest="downloads/apache-tomcat-${tomcat.version}.tar.gz" verbose="true" skipexisting="true"/>
<delete dir="${project.build.directory}/tomcat.exp"/>
<mkdir dir="${project.build.directory}/tomcat.exp"/>
@ -545,7 +545,7 @@
BUILD_DIR=`cygpath --unix '${project.build.directory}'`
fi
cd $BUILD_DIR/tomcat.exp
tar xzf ${basedir}/downloads/tomcat.tar.gz
tar xzf ${basedir}/downloads/apache-tomcat-${tomcat.version}.tar.gz
</echo>
<exec executable="sh" dir="${project.build.directory}" failonerror="true">
<arg line="./tomcat-untar.sh"/>

View File

@ -20,6 +20,8 @@ Trunk (Unreleased)
HDFS-3495. Update Balancer to support new NetworkTopology with NodeGroup.
(Junping Du via szetszwo)
HDFS-4296. Reserve layout version for release 1.2.0. (suresh)
IMPROVEMENTS
HDFS-1620. Rename HdfsConstants -> HdfsServerConstants, FSConstants ->
@ -269,6 +271,21 @@ Trunk (Unreleased)
HDFS-4260 Fix HDFS tests to set test dir to a valid HDFS path as opposed
to the local build path (Chri Nauroth via Sanjay)
HDFS-4269. Datanode rejects all datanode registrations from localhost
in single-node developer setup on Windows. (Chris Nauroth via suresh)
HADOOP-8957 HDFS tests for AbstractFileSystem#IsValidName should be overridden for
embedded file systems like ViewFs (Chris Nauroth via Sanjay Radia)
HDFS-4310. fix test org.apache.hadoop.hdfs.server.datanode.
TestStartSecureDataNode (Ivan A. Veselovsky via atm)
HDFS-4274. BlockPoolSliceScanner does not close verification log during
shutdown. (Chris Nauroth via suresh)
HDFS-4275. MiniDFSCluster-based tests fail on Windows due to failure
to delete test namenode directory. (Chris Nauroth via suresh)
Release 2.0.3-alpha - Unreleased
INCOMPATIBLE CHANGES
@ -410,6 +427,11 @@ Release 2.0.3-alpha - Unreleased
HDFS-3680. Allow customized audit logging in HDFS FSNamesystem. (Marcelo
Vanzin via atm)
HDFS-4130. BKJM: The reading for editlog at NN starting using bkjm is not efficient.
(Han Xiao via umamahesh)
HDFS-4326. bump up Tomcat version for HttpFS to 6.0.36. (tucu via acmurthy)
OPTIMIZATIONS
BUG FIXES
@ -581,6 +603,30 @@ Release 2.0.3-alpha - Unreleased
HDFS-4279. NameNode does not initialize generic conf keys when started
with -recover. (Colin Patrick McCabe via atm)
HDFS-4291. edit log unit tests leave stray test_edit_log_file around
(Colin Patrick McCabe via todd)
HDFS-4292. Sanity check not correct in RemoteBlockReader2.newBlockReader
(Binglin Chang via todd)
HDFS-4295. Using port 1023 should be valid when starting Secure DataNode
(Stephen Chu via todd)
HDFS-4294. Backwards compatibility is not maintained for TestVolumeId.
(Ivan A. Veselovsky and Robert Parker via atm)
HDFS-2264. NamenodeProtocol has the wrong value for clientPrincipal in
KerberosInfo annotation. (atm)
HDFS-4307. SocketCache should use monotonic time. (Colin Patrick McCabe
via atm)
HDFS-4315. DNs with multiple BPs can have BPOfferServices fail to start
due to unsynchronized map access. (atm)
HDFS-4140. fuse-dfs handles open(O_TRUNC) poorly. (Colin Patrick McCabe
via atm)
BREAKDOWN OF HDFS-3077 SUBTASKS
HDFS-3077. Quorum-based protocol for reading and writing edit logs.

View File

@ -500,16 +500,18 @@ public class BookKeeperJournalManager implements JournalManager {
}
}
EditLogInputStream getInputStream(long fromTxId, boolean inProgressOk)
throws IOException {
for (EditLogLedgerMetadata l : getLedgerList(inProgressOk)) {
@Override
public void selectInputStreams(Collection<EditLogInputStream> streams,
long fromTxId, boolean inProgressOk) throws IOException {
List<EditLogLedgerMetadata> currentLedgerList = getLedgerList(inProgressOk);
try {
BookKeeperEditLogInputStream elis = null;
for (EditLogLedgerMetadata l : currentLedgerList) {
long lastTxId = l.getLastTxId();
if (l.isInProgress()) {
lastTxId = recoverLastTxId(l, false);
}
if (fromTxId >= l.getFirstTxId() && fromTxId <= lastTxId) {
try {
LedgerHandle h;
if (l.isInProgress()) { // we don't want to fence the current journal
h = bkc.openLedgerNoRecovery(l.getLedgerId(),
@ -518,35 +520,9 @@ public class BookKeeperJournalManager implements JournalManager {
h = bkc.openLedger(l.getLedgerId(), BookKeeper.DigestType.MAC,
digestpw.getBytes());
}
BookKeeperEditLogInputStream s = new BookKeeperEditLogInputStream(h,
l);
s.skipTo(fromTxId);
return s;
} catch (BKException e) {
throw new IOException("Could not open ledger for " + fromTxId, e);
} catch (InterruptedException ie) {
Thread.currentThread().interrupt();
throw new IOException("Interrupted opening ledger for "
+ fromTxId, ie);
}
}
}
return null;
}
@Override
public void selectInputStreams(Collection<EditLogInputStream> streams,
long fromTxId, boolean inProgressOk) {
// NOTE: could probably be rewritten more efficiently
while (true) {
EditLogInputStream elis;
try {
elis = getInputStream(fromTxId, inProgressOk);
} catch (IOException e) {
LOG.error(e);
return;
}
if (elis == null) {
elis = new BookKeeperEditLogInputStream(h, l);
elis.skipTo(fromTxId);
} else {
return;
}
streams.add(elis);
@ -555,6 +531,12 @@ public class BookKeeperJournalManager implements JournalManager {
}
fromTxId = elis.getLastTxId() + 1;
}
} catch (BKException e) {
throw new IOException("Could not open ledger for " + fromTxId, e);
} catch (InterruptedException ie) {
Thread.currentThread().interrupt();
throw new IOException("Interrupted opening ledger for " + fromTxId, ie);
}
}
long getNumberOfTransactions(long fromTxId, boolean inProgressOk)

View File

@ -28,6 +28,7 @@ import org.mockito.Mockito;
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.List;
import java.util.ArrayList;
import java.util.Random;
@ -315,13 +316,13 @@ public class TestBookKeeperJournalManager {
out.close();
bkjm.finalizeLogSegment(1, numTransactions);
EditLogInputStream in = bkjm.getInputStream(1, true);
List<EditLogInputStream> in = new ArrayList<EditLogInputStream>();
bkjm.selectInputStreams(in, 1, true);
try {
assertEquals(numTransactions,
FSEditLogTestUtil.countTransactionsInStream(in));
FSEditLogTestUtil.countTransactionsInStream(in.get(0)));
} finally {
in.close();
in.get(0).close();
}
}

View File

@ -1,536 +0,0 @@
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN"
"http://forrest.apache.org/dtd/document-v20.dtd">
<document>
<header>
<title>
HDFS Architecture Guide
</title>
<authors>
<person name="Dhruba Borthakur" email="dhruba@yahoo-inc.com"/>
</authors>
</header>
<body>
<section>
<title> Introduction </title>
<p>
The Hadoop Distributed File System (<acronym title="Hadoop Distributed File System">HDFS</acronym>) is a distributed file system
designed to run on commodity hardware. It has many similarities with existing distributed file systems. However, the differences from
other distributed file systems are significant. HDFS is highly fault-tolerant and is designed to be deployed on low-cost hardware.
HDFS provides high throughput access to application data and is suitable for applications that have large data sets. HDFS relaxes
a few POSIX requirements to enable streaming access to file system data. HDFS was originally built as infrastructure for the
Apache Nutch web search engine project. HDFS is now an Apache Hadoop subproject.
The project URL is <a href="http://hadoop.apache.org/hdfs/">http://hadoop.apache.org/hdfs/</a>.
</p>
</section>
<section>
<title> Assumptions and Goals </title>
<section>
<title> Hardware Failure </title>
<p>
Hardware failure is the norm rather than the exception. An HDFS instance may consist of hundreds or thousands of server machines,
each storing part of the file system&#x2019;s data. The fact that there are a huge number of components and that each component has
a non-trivial probability of failure means that some component of HDFS is always non-functional. Therefore, detection of faults and quick,
automatic recovery from them is a core architectural goal of HDFS.
</p>
</section>
<section>
<title> Streaming Data Access </title>
<p>
Applications that run on HDFS need streaming access to their data sets. They are not general purpose applications that typically run
on general purpose file systems. HDFS is designed more for batch processing rather than interactive use by users. The emphasis is on
high throughput of data access rather than low latency of data access. POSIX imposes many hard requirements that are not needed for
applications that are targeted for HDFS. POSIX semantics in a few key areas has been traded to increase data throughput rates.
</p>
</section>
<section>
<title> Large Data Sets </title>
<p>
Applications that run on HDFS have large data sets. A typical file in HDFS is gigabytes to terabytes in size. Thus, HDFS is tuned to
support large files. It should provide high aggregate data bandwidth and scale to thousands of nodes in a single cluster. It should support
tens of millions of files in a single instance.
</p>
</section>
<section>
<title> Appending-Writes and File Syncs </title>
<p>
Most HDFS applications need a write-once-read-many access model for files. HDFS provides two additional advanced features: hflush and
append. Hflush makes the last block of an unclosed file visible to readers while providing read consistency and data durability. Append
provides a mechanism for opening a closed file to add additional data.
</p>
<p>
For complete details of the hflush and append design, see the
<a href="https://issues.apache.org/jira/secure/attachment/12445209/appendDesign3.pdf">Append/Hflush/Read Design document</a> (PDF).
</p>
</section>
<section>
<title> &#x201c;Moving Computation is Cheaper than Moving Data&#x201d; </title>
<p>
A computation requested by an application is much more efficient if it is executed near the data it operates on. This is especially true
when the size of the data set is huge. This minimizes network congestion and increases the overall throughput of the system. The
assumption is that it is often better to migrate the computation closer to where the data is located rather than moving the data to where
the application is running. HDFS provides interfaces for applications to move themselves closer to where the data is located.
</p>
</section>
<section>
<title> Portability Across Heterogeneous Hardware and Software Platforms </title>
<p>
HDFS has been designed to be easily portable from one platform to another. This facilitates widespread adoption of HDFS as a
platform of choice for a large set of applications.
</p>
</section>
</section>
<section>
<title> NameNode and DataNodes </title>
<p>
HDFS has a master/slave architecture. An HDFS cluster consists of a single NameNode, a master server that manages the file
system namespace and regulates access to files by clients. In addition, there are a number of DataNodes, usually one per node
in the cluster, which manage storage attached to the nodes that they run on. HDFS exposes a file system namespace and allows
user data to be stored in files. Internally, a file is split into one or more blocks and these blocks are stored in a set of DataNodes.
The NameNode executes file system namespace operations like opening, closing, and renaming files and directories. It also
determines the mapping of blocks to DataNodes. The DataNodes are responsible for serving read and write requests from the file
system&#x2019;s clients. The DataNodes also perform block creation, deletion, and replication upon instruction from the NameNode.
</p>
<figure alt="HDFS Architecture" src="images/hdfsarchitecture.gif"/>
<p>
The NameNode and DataNode are pieces of software designed to run on commodity machines. These machines typically run a
GNU/Linux operating system (<acronym title="operating system">OS</acronym>). HDFS is built using the Java language; any
machine that supports Java can run the NameNode or the DataNode software. Usage of the highly portable Java language means
that HDFS can be deployed on a wide range of machines. A typical deployment has a dedicated machine that runs only the
NameNode software. Each of the other machines in the cluster runs one instance of the DataNode software. The architecture
does not preclude running multiple DataNodes on the same machine but in a real deployment that is rarely the case.
</p>
<p>
The existence of a single NameNode in a cluster greatly simplifies the architecture of the system. The NameNode is the arbitrator
and repository for all HDFS metadata. The system is designed in such a way that user data never flows through the NameNode.
</p>
</section>
<section>
<title> The File System Namespace </title>
<p>
HDFS supports a traditional hierarchical file organization. A user or an application can create directories and store files inside
these directories. The file system namespace hierarchy is similar to most other existing file systems; one can create and
remove files, move a file from one directory to another, or rename a file. HDFS implements user quotas for number of names and
amount of data stored in a particular directory (See
<a href="http://hadoop.apache.org/hdfs/docs/current/hdfs_quota_admin_guide.html">HDFS Quota Admin Guide</a>). In addition, HDFS
supports <a href="http://hadoop.apache.org/common/docs/current/api/org/apache/hadoop/fs/FileContext.html#createSymlink(org.apache.hadoop.fs.Path, org.apache.hadoop.fs.Path, boolean)">symbolic links</a>.
</p>
<p>
The NameNode maintains the file system namespace. Any change to the file system namespace or its properties is
recorded by the NameNode. An application can specify the number of replicas of a file that should be maintained by
HDFS. The number of copies of a file is called the replication factor of that file. This information is stored by the NameNode.
</p>
</section>
<section>
<title> Data Replication </title>
<p>
HDFS is designed to reliably store very large files across machines in a large cluster. It stores each file as a sequence
of blocks; all blocks in a file except the last block are the same size. The blocks of a file are replicated for fault tolerance.
The block size and replication factor are configurable per file. An application can specify the number of replicas of a file.
The replication factor can be specified at file creation time and can be changed later. Files in HDFS are strictly one writer at any
time.
</p>
<p>
The NameNode makes all decisions regarding replication of blocks. It periodically receives a Heartbeat and a Blockreport
from each of the DataNodes in the cluster. Receipt of a Heartbeat implies that the DataNode is functioning properly. A
Blockreport contains a list of all blocks on a DataNode.
</p>
<figure alt="HDFS DataNodes" src="images/hdfsdatanodes.gif"/>
<section>
<title> Replica Placement: The First Baby Steps </title>
<p>
The placement of replicas is critical to HDFS reliability and performance. Optimizing replica placement distinguishes
HDFS from most other distributed file systems. This is a feature that needs lots of tuning and experience. The purpose
of a rack-aware replica placement policy is to improve data reliability, availability, and network bandwidth utilization.
The current implementation for the replica placement policy is a first effort in this direction. The short-term goals of
implementing this policy are to validate it on production systems, learn more about its behavior, and build a foundation
to test and research more sophisticated policies.
</p>
<p>
Large HDFS instances run on a cluster of computers that commonly spread across many racks. Communication
between two nodes in different racks has to go through switches. In most cases, network bandwidth between machines
in the same rack is greater than network bandwidth between machines in different racks.
</p>
<p>
The NameNode determines the rack id each DataNode belongs to via the process outlined in
<a href="http://hadoop.apache.org/common/docs/current/cluster_setup.html#Hadoop+Rack+Awareness">Hadoop Rack Awareness</a>.
A simple but non-optimal policy is to place replicas on unique racks. This prevents losing data when an entire rack
fails and allows use of bandwidth from multiple racks when reading data. This policy evenly distributes replicas in
the cluster which makes it easy to balance load on component failure. However, this policy increases the cost of
writes because a write needs to transfer blocks to multiple racks.
</p>
<p>
For the common case, when the replication factor is three, HDFS&#x2019;s placement policy is to put one replica
on one node in the local rack, another on a node in a different (remote) rack, and the last on a different node in the
same remote rack. This policy cuts the inter-rack write traffic which generally improves write performance. The
chance of rack failure is far less than that of node failure; this policy does not impact data reliability and availability
guarantees. However, it does reduce the aggregate network bandwidth used when reading data since a block is
placed in only two unique racks rather than three. With this policy, the replicas of a file do not evenly distribute
across the racks. One third of replicas are on one node, two thirds of replicas are on one rack, and the other third
are evenly distributed across the remaining racks. This policy improves write performance without compromising
data reliability or read performance.
</p>
<p>
In addition to the default placement policy described above, HDFS also provides a pluggable interface for block placement. See
<a href="http://hadoop.apache.org/hdfs/docs/current/api/org/apache/hadoop/hdfs/server/namenode/BlockPlacementPolicy.html">BlockPlacementPolicy</a>.
</p>
</section>
<section>
<title> Replica Selection </title>
<p>
To minimize global bandwidth consumption and read latency, HDFS tries to satisfy a read request from a replica
that is closest to the reader. If there exists a replica on the same rack as the reader node, then that replica is
preferred to satisfy the read request. If an HDFS cluster spans multiple data centers, then a replica that is
resident in the local data center is preferred over any remote replica.
</p>
</section>
<section>
<title> Safemode </title>
<p>
On startup, the NameNode enters a special state called Safemode. Replication of data blocks does not occur
when the NameNode is in the Safemode state. The NameNode receives Heartbeat and Blockreport messages
from the DataNodes. A Blockreport contains the list of data blocks that a DataNode is hosting. Each block
has a specified minimum number of replicas. A block is considered safely replicated when the minimum number
of replicas of that data block has checked in with the NameNode. After a configurable percentage of safely
replicated data blocks checks in with the NameNode (plus an additional 30 seconds), the NameNode exits
the Safemode state. It then determines the list of data blocks (if any) that still have fewer than the specified
number of replicas. The NameNode then replicates these blocks to other DataNodes.
</p>
</section>
</section>
<section>
<title> The Persistence of File System Metadata </title>
<p>
The HDFS namespace is stored by the NameNode. The NameNode uses a transaction log called the EditLog
to persistently record every change that occurs to file system metadata. For example, creating a new file in
HDFS causes the NameNode to insert a record into the EditLog indicating this. Similarly, changing the
replication factor of a file causes a new record to be inserted into the EditLog. The NameNode uses a file
in its local host OS file system to store the EditLog. The entire file system namespace, including the mapping
of blocks to files and file system properties, is stored in a file called the FsImage. The FsImage is stored as
a file in the NameNode&#x2019;s local file system too.
</p>
<p>
The NameNode keeps an image of the entire file system namespace and file Blockmap in memory. This key
metadata item is designed to be compact, such that a NameNode with 4 GB of RAM is plenty to support a
huge number of files and directories. When the NameNode starts up, it reads the FsImage and EditLog from
disk, applies all the transactions from the EditLog to the in-memory representation of the FsImage, and flushes
out this new version into a new FsImage on disk. It can then truncate the old EditLog because its transactions
have been applied to the persistent FsImage. This process is called a checkpoint. The
<a href="http://hadoop.apache.org/hdfs/docs/current/hdfs_user_guide.html#Checkpoint+Node">Checkpoint Node</a> is a
separate daemon that can be configured to periodically build checkpoints from the FsImage and EditLog which are
uploaded to the NameNode. The
<a href="http://hadoop.apache.org/hdfs/docs/current/hdfs_user_guide.html#Backup+Node">Backup Node</a> builds
checkpoints like the Checkpoint Node and also maintains an up-to-date copy of the FsImage in memory.
</p>
<p>
The DataNode stores HDFS data in files in its local file system. The DataNode has no knowledge about HDFS files.
It stores each block of HDFS data in a separate file in its local file system. The DataNode does not create all files
in the same directory. Instead, it uses a heuristic to determine the optimal number of files per directory and creates
subdirectories appropriately. It is not optimal to create all local files in the same directory because the local file
system might not be able to efficiently support a huge number of files in a single directory. When a DataNode starts
up, it scans through its local file system, generates a list of all HDFS data blocks that correspond to each of these
local files and sends this report to the NameNode: this is the Blockreport.
</p>
</section>
<section>
<title> The Communication Protocols </title>
<p>
All HDFS communication protocols are layered on top of the TCP/IP protocol. A client establishes a connection to
a configurable <acronym title="Transmission Control Protocol">TCP</acronym> port on the NameNode machine.
It talks the ClientProtocol with the NameNode. The DataNodes talk to the NameNode using the DataNode Protocol.
A Remote Procedure Call (<acronym title="Remote Procedure Call">RPC</acronym>) abstraction wraps both the
Client Protocol and the DataNode Protocol. By design, the NameNode never initiates any RPCs. Instead, it only
responds to RPC requests issued by DataNodes or clients.
</p>
</section>
<section>
<title> Robustness </title>
<p>
The primary objective of HDFS is to store data reliably even in the presence of failures. The three common types
of failures are NameNode failures, DataNode failures and network partitions.
</p>
<section>
<title> Data Disk Failure, Heartbeats and Re-Replication </title>
<p>
Each DataNode sends a Heartbeat message to the NameNode periodically. A network partition can cause a
subset of DataNodes to lose connectivity with the NameNode. The NameNode detects this condition by the
absence of a Heartbeat message. The NameNode marks DataNodes without recent Heartbeats as dead and
does not forward any new <acronym title="Input/Output">IO</acronym> requests to them. Any data that was
registered to a dead DataNode is not available to HDFS any more. DataNode death may cause the replication
factor of some blocks to fall below their specified value. The NameNode constantly tracks which blocks need
to be replicated and initiates replication whenever necessary. The necessity for re-replication may arise due
to many reasons: a DataNode may become unavailable, a replica may become corrupted, a hard disk on a
DataNode may fail, or the replication factor of a file may be increased.
</p>
</section>
<section>
<title> Cluster Rebalancing </title>
<p>
The HDFS architecture is compatible with data rebalancing schemes. A scheme might automatically move
data from one DataNode to another if the free space on a DataNode falls below a certain threshold. In the
event of a sudden high demand for a particular file, a scheme might dynamically create additional replicas
and rebalance other data in the cluster. These types of data rebalancing schemes are not yet implemented.
</p>
</section>
<section>
<title> Data Integrity </title>
<p>
<!-- XXX "checksum checking" sounds funny -->
It is possible that a block of data fetched from a DataNode arrives corrupted. This corruption can occur
because of faults in a storage device, network faults, or buggy software. The HDFS client software
implements checksum checking on the contents of HDFS files. When a client creates an HDFS file,
it computes a checksum of each block of the file and stores these checksums in a separate hidden
file in the same HDFS namespace. When a client retrieves file contents it verifies that the data it
received from each DataNode matches the checksum stored in the associated checksum file. If not,
then the client can opt to retrieve that block from another DataNode that has a replica of that block.
</p>
</section>
<section>
<title> Metadata Disk Failure </title>
<p>
The FsImage and the EditLog are central data structures of HDFS. A corruption of these files can
cause the HDFS instance to be non-functional. For this reason, the NameNode can be configured
to support maintaining multiple copies of the FsImage and EditLog. Any update to either the FsImage
or EditLog causes each of the FsImages and EditLogs to get updated synchronously. This
synchronous updating of multiple copies of the FsImage and EditLog may degrade the rate of
namespace transactions per second that a NameNode can support. However, this degradation is
acceptable because even though HDFS applications are very data intensive in nature, they are not
metadata intensive. When a NameNode restarts, it selects the latest consistent FsImage and EditLog to use.
</p>
<p>
The NameNode machine is a single point of failure for an HDFS cluster. If the NameNode machine fails,
manual intervention is necessary. Currently, automatic restart and failover of the NameNode software to
another machine is not supported.
</p>
</section>
<section>
<title> Snapshots </title>
<p>
Snapshots support storing a copy of data at a particular instant of time. One usage of the snapshot
feature may be to roll back a corrupted HDFS instance to a previously known good point in time.
HDFS does not currently support snapshots but will in a future release.
</p>
</section>
</section>
<section>
<!-- XXX Better name -->
<title> Data Organization </title>
<section>
<title> Data Blocks </title>
<p>
HDFS is designed to support very large files. Applications that are compatible with HDFS are those
that deal with large data sets. These applications write their data only once but they read it one or
more times and require these reads to be satisfied at streaming speeds. HDFS supports
write-once-read-many semantics on files. A typical block size used by HDFS is 64 MB. Thus,
an HDFS file is chopped up into 64 MB chunks, and if possible, each chunk will reside on a different DataNode.
</p>
</section>
<section>
<title> Replication Pipelining </title>
<p>
When a client is writing data to an HDFS file with a replication factor of 3, the NameNode retrieves a list of DataNodes using a replication target choosing algorithm.
This list contains the DataNodes that will host a replica of that block. The client then writes to the first DataNode. The first DataNode starts receiving the data in small portions (64 KB, configurable),
writes each portion to its local repository and transfers that portion to the second DataNode in the list.
The second DataNode, in turn starts receiving each portion of the data block, writes that portion to its
repository and then flushes that portion to the third DataNode. Finally, the third DataNode writes the
data to its local repository. Thus, a DataNode can be receiving data from the previous one in the pipeline
and at the same time forwarding data to the next one in the pipeline. Thus, the data is pipelined from
one DataNode to the next.
</p>
</section>
</section>
<section>
<!-- XXX "Accessibility" sounds funny - "Interfaces" ? -->
<title> Accessibility </title>
<!-- XXX Make an API section ? (HTTP is "web service" API?) -->
<p>
HDFS can be accessed from applications in many different ways. Natively, HDFS provides a
<a href="http://hadoop.apache.org/core/docs/current/api/">Java API</a> for applications to
use. A C language wrapper for this Java API is also available. In addition, an HTTP browser
can also be used to browse the files of an HDFS instance. Work is in progress to expose
HDFS through the <acronym title="Web-based Distributed Authoring and Versioning">WebDAV</acronym> protocol.
</p>
<section>
<title> FS Shell </title>
<p>
HDFS allows user data to be organized in the form of files and directories. It provides a commandline
interface called FS shell that lets a user interact with the data in HDFS. The syntax of this command
set is similar to other shells (e.g. bash, csh) that users are already familiar with. Here are some sample
action/command pairs:
</p>
<table>
<tr>
<th> Action </th><th> Command </th>
</tr>
<tr>
<td> Create a directory named <code>/foodir</code> </td>
<td> <code>bin/hadoop dfs -mkdir /foodir</code> </td>
</tr>
<tr>
<td> Remove a directory named <code>/foodir</code> </td>
<td> <code>bin/hadoop dfs -rmr /foodir</code> </td>
</tr>
<tr>
<td> View the contents of a file named <code>/foodir/myfile.txt</code> </td>
<td> <code>bin/hadoop dfs -cat /foodir/myfile.txt</code> </td>
</tr>
</table>
<p>
FS shell is targeted for applications that need a scripting language to interact with the stored data.
</p>
</section>
<section>
<title> DFSAdmin </title>
<p>
The DFSAdmin command set is used for administering an HDFS cluster. These are commands that are
used only by an HDFS administrator. Here are some sample action/command pairs:
</p>
<table>
<tr>
<th> Action </th><th> Command </th>
</tr>
<tr>
<td> Put the cluster in Safemode </td> <td> <code>bin/hadoop dfsadmin -safemode enter</code> </td>
</tr>
<tr>
<td> Generate a list of DataNodes </td> <td> <code>bin/hadoop dfsadmin -report</code> </td>
</tr>
<tr>
<td> Recommission or decommission DataNode(s) </td>
<td> <code>bin/hadoop dfsadmin -refreshNodes</code> </td>
</tr>
</table>
</section>
<section>
<title> Browser Interface </title>
<p>
A typical HDFS install configures a web server to expose the HDFS namespace through
a configurable TCP port. This allows a user to navigate the HDFS namespace and view
the contents of its files using a web browser.
</p>
</section>
</section>
<section>
<title> Space Reclamation </title>
<section>
<title> File Deletes and Undeletes </title>
<p>
When a file is deleted by a user or an application, it is not immediately removed from HDFS. Instead,
HDFS first renames it to a file in the <code>/trash</code> directory. The file can be restored quickly
as long as it remains in <code>/trash</code>. A file remains in <code>/trash</code> for a configurable
amount of time. After the expiry of its life in <code>/trash</code>, the NameNode deletes the file from
the HDFS namespace. The deletion of a file causes the blocks associated with the file to be freed.
Note that there could be an appreciable time delay between the time a file is deleted by a user and
the time of the corresponding increase in free space in HDFS.
</p>
<p>
A user can Undelete a file after deleting it as long as it remains in the <code>/trash</code> directory.
If a user wants to undelete a file that he/she has deleted, he/she can navigate the <code>/trash</code>
directory and retrieve the file. The <code>/trash</code> directory contains only the latest copy of the file
that was deleted. The <code>/trash</code> directory is just like any other directory with one special
feature: HDFS applies specified policies to automatically delete files from this directory.
By default, the trash feature is disabled. It can be enabled by setting the <em>fs.trash.interval</em> property in core-site.xml to a non-zero value (set as minutes of retention required). The property needs to exist on both client and server side configurations.
</p>
</section>
<section>
<title> Decrease Replication Factor </title>
<p>
When the replication factor of a file is reduced, the NameNode selects excess replicas that can be deleted.
The next Heartbeat transfers this information to the DataNode. The DataNode then removes the corresponding
blocks and the corresponding free space appears in the cluster. Once again, there might be a time delay
between the completion of the <code>setReplication</code> API call and the appearance of free space in the cluster.
</p>
</section>
</section>
<section>
<title> References </title>
<p>
HDFS Java API:
<a href="http://hadoop.apache.org/core/docs/current/api/">
http://hadoop.apache.org/core/docs/current/api/
</a>
</p>
<p>
HDFS source code:
<a href= "http://hadoop.apache.org/hdfs/version_control.html">
http://hadoop.apache.org/hdfs/version_control.html
</a>
</p>
</section>
</body>
</document>

View File

@ -404,7 +404,7 @@ public class RemoteBlockReader extends FSInputChecker implements BlockReader {
long firstChunkOffset = checksumInfo.getChunkOffset();
if ( firstChunkOffset < 0 || firstChunkOffset > startOffset ||
firstChunkOffset >= (startOffset + checksum.getBytesPerChecksum())) {
firstChunkOffset <= (startOffset - checksum.getBytesPerChecksum())) {
throw new IOException("BlockReader: error in first chunk offset (" +
firstChunkOffset + ") startOffset is " +
startOffset + " for file " + file);

View File

@ -413,7 +413,7 @@ public class RemoteBlockReader2 implements BlockReader {
long firstChunkOffset = checksumInfo.getChunkOffset();
if ( firstChunkOffset < 0 || firstChunkOffset > startOffset ||
firstChunkOffset >= (startOffset + checksum.getBytesPerChecksum())) {
firstChunkOffset <= (startOffset - checksum.getBytesPerChecksum())) {
throw new IOException("BlockReader: error in first chunk offset (" +
firstChunkOffset + ") startOffset is " +
startOffset + " for file " + file);

View File

@ -37,6 +37,7 @@ import org.apache.hadoop.hdfs.protocol.datatransfer.IOStreamPair;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.util.Daemon;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Time;
/**
* A cache of input stream sockets to Data Node.
@ -53,7 +54,7 @@ class SocketCache {
public SocketAndStreams(Socket s, IOStreamPair ioStreams) {
this.sock = s;
this.ioStreams = ioStreams;
this.createTime = System.currentTimeMillis();
this.createTime = Time.monotonicNow();
}
@Override
@ -205,7 +206,7 @@ class SocketCache {
Entry<SocketAddress, SocketAndStreams> entry = iter.next();
// if oldest socket expired, remove it
if (entry == null ||
System.currentTimeMillis() - entry.getValue().getCreateTime() <
Time.monotonicNow() - entry.getValue().getCreateTime() <
expiryPeriod) {
break;
}
@ -236,13 +237,13 @@ class SocketCache {
* older than expiryPeriod minutes
*/
private void run() throws InterruptedException {
for(long lastExpiryTime = System.currentTimeMillis();
for(long lastExpiryTime = Time.monotonicNow();
!Thread.interrupted();
Thread.sleep(expiryPeriod)) {
final long elapsed = System.currentTimeMillis() - lastExpiryTime;
final long elapsed = Time.monotonicNow() - lastExpiryTime;
if (elapsed >= expiryPeriod) {
evictExpired(expiryPeriod);
lastExpiryTime = System.currentTimeMillis();
lastExpiryTime = Time.monotonicNow();
}
}
clear();

View File

@ -82,10 +82,11 @@ public class LayoutVersion {
EDITS_CHESKUM(-28, "Support checksum for editlog"),
UNUSED(-29, "Skipped version"),
FSIMAGE_NAME_OPTIMIZATION(-30, "Store only last part of path in fsimage"),
RESERVED_REL20_203(-31, -19, "Reserved for release 0.20.203"),
RESERVED_REL20_204(-32, "Reserved for release 0.20.204"),
RESERVED_REL22(-33, -27, "Reserved for release 0.22"),
RESERVED_REL23(-34, -30, "Reserved for release 0.23"),
RESERVED_REL20_203(-31, -19, "Reserved for release 0.20.203", true,
DELEGATION_TOKEN),
RESERVED_REL20_204(-32, -31, "Reserved for release 0.20.204", true),
RESERVED_REL22(-33, -27, "Reserved for release 0.22", true),
RESERVED_REL23(-34, -30, "Reserved for release 0.23", true),
FEDERATION(-35, "Support for namenode federation"),
LEASE_REASSIGNMENT(-36, "Support for persisting lease holder reassignment"),
STORED_TXIDS(-37, "Transaction IDs are stored in edits log and image files"),
@ -95,33 +96,40 @@ public class LayoutVersion {
OPTIMIZE_PERSIST_BLOCKS(-40,
"Serialize block lists with delta-encoded variable length ints, " +
"add OP_UPDATE_BLOCKS"),
SNAPSHOT(-41, "Support for snapshot feature");
RESERVED_REL1_2_0(-41, -32, "Reserved for release 1.2.0", true, CONCAT),
SNAPSHOT(-41, -39, "Support for snapshot feature", false);
final int lv;
final int ancestorLV;
final String description;
final boolean reserved;
final Feature[] specialFeatures;
/**
* Feature that is added at {@code currentLV}.
* Feature that is added at layout version {@code lv} - 1.
* @param lv new layout version with the addition of this feature
* @param description description of the feature
*/
Feature(final int lv, final String description) {
this(lv, lv + 1, description);
this(lv, lv + 1, description, false);
}
/**
* Feature that is added at {@code currentLV}.
* Feature that is added at layout version {@code ancestoryLV}.
* @param lv new layout version with the addition of this feature
* @param ancestorLV layout version from which the new lv is derived
* from.
* @param ancestorLV layout version from which the new lv is derived from.
* @param description description of the feature
* @param reserved true when this is a layout version reserved for previous
* verions
* @param features set of features that are to be enabled for this version
*/
Feature(final int lv, final int ancestorLV,
final String description) {
Feature(final int lv, final int ancestorLV, final String description,
boolean reserved, Feature... features) {
this.lv = lv;
this.ancestorLV = ancestorLV;
this.description = description;
this.reserved = reserved;
specialFeatures = features;
}
/**
@ -147,6 +155,10 @@ public class LayoutVersion {
public String getDescription() {
return description;
}
public boolean isReservedForOldRelease() {
return reserved;
}
}
// Build layout version and corresponding feature matrix
@ -172,19 +184,14 @@ public class LayoutVersion {
map.put(f.ancestorLV, ancestorSet);
}
EnumSet<Feature> featureSet = EnumSet.copyOf(ancestorSet);
if (f.specialFeatures != null) {
for (Feature specialFeature : f.specialFeatures) {
featureSet.add(specialFeature);
}
}
featureSet.add(f);
map.put(f.lv, featureSet);
}
// Special initialization for 0.20.203 and 0.20.204
// to add Feature#DELEGATION_TOKEN
specialInit(Feature.RESERVED_REL20_203.lv, Feature.DELEGATION_TOKEN);
specialInit(Feature.RESERVED_REL20_204.lv, Feature.DELEGATION_TOKEN);
}
private static void specialInit(int lv, Feature f) {
EnumSet<Feature> set = map.get(lv);
set.add(f);
}
/**
@ -223,6 +230,11 @@ public class LayoutVersion {
*/
public static int getCurrentLayoutVersion() {
Feature[] values = Feature.values();
return values[values.length - 1].lv;
for (int i = values.length -1; i >= 0; i--) {
if (!values[i].isReservedForOldRelease()) {
return values[i].lv;
}
}
throw new AssertionError("All layout versions are reserved.");
}
}

View File

@ -633,7 +633,9 @@ public class DatanodeManager {
// Mostly called inside an RPC, update ip and peer hostname
String hostname = dnAddress.getHostName();
String ip = dnAddress.getHostAddress();
if (hostname.equals(ip)) {
if (!isNameResolved(dnAddress)) {
// Reject registration of unresolved datanode to prevent performance
// impact of repetitive DNS lookups later.
LOG.warn("Unresolved datanode registration from " + ip);
throw new DisallowedDatanodeException(nodeReg);
}
@ -1062,6 +1064,22 @@ public class DatanodeManager {
return names;
}
/**
* Checks if name resolution was successful for the given address. If IP
* address and host name are the same, then it means name resolution has
* failed. As a special case, the loopback address is also considered
* acceptable. This is particularly important on Windows, where 127.0.0.1 does
* not resolve to "localhost".
*
* @param address InetAddress to check
* @return boolean true if name resolution successful or address is loopback
*/
private static boolean isNameResolved(InetAddress address) {
String hostname = address.getHostName();
String ip = address.getHostAddress();
return !hostname.equals(ip) || address.isLoopbackAddress();
}
private void setDatanodeDead(DatanodeDescriptor node) {
node.setLastUpdate(0);
}

View File

@ -603,6 +603,15 @@ class BlockPoolSliceScanner {
}
}
/**
* Shuts down this BlockPoolSliceScanner and releases any internal resources.
*/
void shutdown() {
if (verificationLog != null) {
verificationLog.close();
}
}
private void scan() {
if (LOG.isDebugEnabled()) {
LOG.debug("Starting to scan blockpool: " + blockPoolId);
@ -610,7 +619,8 @@ class BlockPoolSliceScanner {
try {
adjustThrottler();
while (datanode.shouldRun && !Thread.interrupted()
while (datanode.shouldRun
&& !datanode.blockScanner.blockScannerThread.isInterrupted()
&& datanode.isBPServiceAlive(blockPoolId)) {
long now = Time.now();
synchronized (this) {

View File

@ -100,6 +100,11 @@ public class DataBlockScanner implements Runnable {
}
bpScanner.scanBlockPoolSlice();
}
// Call shutdown for each allocated BlockPoolSliceScanner.
for (BlockPoolSliceScanner bpss: blockPoolScannerMap.values()) {
bpss.shutdown();
}
}
// Wait for at least one block pool to be up
@ -232,12 +237,24 @@ public class DataBlockScanner implements Runnable {
}
}
public synchronized void shutdown() {
public void shutdown() {
synchronized (this) {
if (blockScannerThread != null) {
blockScannerThread.interrupt();
}
}
// We cannot join within the synchronized block, because it would create a
// deadlock situation. blockScannerThread calls other synchronized methods.
if (blockScannerThread != null) {
try {
blockScannerThread.join();
} catch (InterruptedException e) {
// shutting down anyway
}
}
}
public synchronized void addBlockPool(String blockPoolId) {
if (blockPoolScannerMap.get(blockPoolId) != null) {
return;

View File

@ -26,6 +26,7 @@ import java.io.RandomAccessFile;
import java.nio.channels.FileLock;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
@ -78,7 +79,7 @@ public class DataStorage extends Storage {
// BlockPoolStorage is map of <Block pool Id, BlockPoolStorage>
private Map<String, BlockPoolSliceStorage> bpStorageMap
= new HashMap<String, BlockPoolSliceStorage>();
= Collections.synchronizedMap(new HashMap<String, BlockPoolSliceStorage>());
DataStorage() {

View File

@ -140,7 +140,7 @@ public class SecureDataNodeStarter implements Daemon {
System.err.println("Successfully obtained privileged resources (streaming port = "
+ ss + " ) (http listener port = " + listener.getConnection() +")");
if ((ss.getLocalPort() >= 1023 || listener.getPort() >= 1023) &&
if ((ss.getLocalPort() > 1023 || listener.getPort() > 1023) &&
UserGroupInformation.isSecurityEnabled()) {
throw new RuntimeException("Cannot start secure datanode with unprivileged ports");
}

View File

@ -339,11 +339,13 @@ class NameNodeRpcServer implements NamenodeProtocols {
"Unexpected not positive size: "+size);
}
namesystem.checkOperation(OperationCategory.READ);
namesystem.checkSuperuserPrivilege();
return namesystem.getBlockManager().getBlocks(datanode, size);
}
@Override // NamenodeProtocol
public ExportedBlockKeys getBlockKeys() throws IOException {
namesystem.checkSuperuserPrivilege();
return namesystem.getBlockManager().getBlockKeys();
}
@ -352,6 +354,7 @@ class NameNodeRpcServer implements NamenodeProtocols {
int errorCode,
String msg) throws IOException {
namesystem.checkOperation(OperationCategory.UNCHECKED);
namesystem.checkSuperuserPrivilege();
verifyRequest(registration);
LOG.info("Error report from " + registration + ": " + msg);
if (errorCode == FATAL) {
@ -362,6 +365,7 @@ class NameNodeRpcServer implements NamenodeProtocols {
@Override // NamenodeProtocol
public NamenodeRegistration register(NamenodeRegistration registration)
throws IOException {
namesystem.checkSuperuserPrivilege();
verifyLayoutVersion(registration.getVersion());
NamenodeRegistration myRegistration = nn.setRegistration();
namesystem.registerBackupNode(registration, myRegistration);
@ -371,6 +375,7 @@ class NameNodeRpcServer implements NamenodeProtocols {
@Override // NamenodeProtocol
public NamenodeCommand startCheckpoint(NamenodeRegistration registration)
throws IOException {
namesystem.checkSuperuserPrivilege();
verifyRequest(registration);
if(!nn.isRole(NamenodeRole.NAMENODE))
throw new IOException("Only an ACTIVE node can invoke startCheckpoint.");
@ -380,6 +385,7 @@ class NameNodeRpcServer implements NamenodeProtocols {
@Override // NamenodeProtocol
public void endCheckpoint(NamenodeRegistration registration,
CheckpointSignature sig) throws IOException {
namesystem.checkSuperuserPrivilege();
namesystem.endCheckpoint(registration, sig);
}
@ -756,17 +762,20 @@ class NameNodeRpcServer implements NamenodeProtocols {
@Override // NamenodeProtocol
public long getTransactionID() throws IOException {
namesystem.checkOperation(OperationCategory.UNCHECKED);
namesystem.checkSuperuserPrivilege();
return namesystem.getFSImage().getLastAppliedOrWrittenTxId();
}
@Override // NamenodeProtocol
public long getMostRecentCheckpointTxId() throws IOException {
namesystem.checkOperation(OperationCategory.UNCHECKED);
namesystem.checkSuperuserPrivilege();
return namesystem.getFSImage().getMostRecentCheckpointTxId();
}
@Override // NamenodeProtocol
public CheckpointSignature rollEditLog() throws IOException {
namesystem.checkSuperuserPrivilege();
return namesystem.rollEditLog();
}
@ -774,6 +783,7 @@ class NameNodeRpcServer implements NamenodeProtocols {
public RemoteEditLogManifest getEditLogManifest(long sinceTxId)
throws IOException {
namesystem.checkOperation(OperationCategory.READ);
namesystem.checkSuperuserPrivilege();
return namesystem.getEditLog().getEditLogManifest(sinceTxId);
}
@ -950,6 +960,7 @@ class NameNodeRpcServer implements NamenodeProtocols {
@Override // DatanodeProtocol, NamenodeProtocol
public NamespaceInfo versionRequest() throws IOException {
namesystem.checkSuperuserPrivilege();
return namesystem.getNamespaceInfo();
}

View File

@ -32,8 +32,7 @@ import org.apache.hadoop.security.KerberosInfo;
* It's used to get part of the name node state
*****************************************************************************/
@KerberosInfo(
serverPrincipal = DFSConfigKeys.DFS_NAMENODE_USER_NAME_KEY,
clientPrincipal = DFSConfigKeys.DFS_NAMENODE_USER_NAME_KEY)
serverPrincipal = DFSConfigKeys.DFS_NAMENODE_USER_NAME_KEY)
@InterfaceAudience.Private
public interface NamenodeProtocol {
/**

View File

@ -131,7 +131,6 @@ static enum authConf discoverAuthConf(void)
int fuseConnectInit(const char *nnUri, int port)
{
const char *timerPeriod;
int ret;
gTimerPeriod = FUSE_CONN_DEFAULT_TIMER_PERIOD;

View File

@ -24,12 +24,77 @@
#include <stdio.h>
#include <stdlib.h>
static int get_hdfs_open_flags_from_info(hdfsFS fs, const char *path,
int flags, int *outflags, const hdfsFileInfo *info);
/**
* Given a set of FUSE flags, determine the libhdfs flags we need.
*
* This is complicated by two things:
* 1. libhdfs doesn't support O_RDWR at all;
* 2. when given O_WRONLY, libhdfs will truncate the file unless O_APPEND is
* also given. In other words, there is an implicit O_TRUNC.
*
* Probably the next iteration of the libhdfs interface should not use the POSIX
* flags at all, since, as you can see, they don't really match up very closely
* to the POSIX meaning. However, for the time being, this is the API.
*
* @param fs The libhdfs object
* @param path The path we're opening
* @param flags The FUSE flags
*
* @return negative error code on failure; flags otherwise.
*/
static int64_t get_hdfs_open_flags(hdfsFS fs, const char *path, int flags)
{
int hasContent;
int64_t ret;
hdfsFileInfo *info;
if ((flags & O_ACCMODE) == O_RDONLY) {
return O_RDONLY;
}
if (flags & O_TRUNC) {
/* If we're opening for write or read/write, O_TRUNC means we should blow
* away the file which is there and create our own file.
* */
return O_WRONLY;
}
info = hdfsGetPathInfo(fs, path);
if (info) {
if (info->mSize == 0) {
// If the file has zero length, we shouldn't feel bad about blowing it
// away.
ret = O_WRONLY;
} else if ((flags & O_ACCMODE) == O_RDWR) {
// HACK: translate O_RDWR requests into O_RDONLY if the file already
// exists and has non-zero length.
ret = O_RDONLY;
} else { // O_WRONLY
// HACK: translate O_WRONLY requests into append if the file already
// exists.
ret = O_WRONLY | O_APPEND;
}
} else { // !info
if (flags & O_CREAT) {
ret = O_WRONLY;
} else {
ret = -ENOENT;
}
}
if (info) {
hdfsFreeFileInfo(info, 1);
}
return ret;
}
int dfs_open(const char *path, struct fuse_file_info *fi)
{
hdfsFS fs = NULL;
dfs_context *dfs = (dfs_context*)fuse_get_context()->private_data;
dfs_fh *fh = NULL;
int mutexInit = 0, ret;
int mutexInit = 0, ret, flags = 0;
int64_t flagRet;
TRACE1("open", path)
@ -38,10 +103,6 @@ int dfs_open(const char *path, struct fuse_file_info *fi)
assert('/' == *path);
assert(dfs);
// 0x8000 is always passed in and hadoop doesn't like it, so killing it here
// bugbug figure out what this flag is and report problem to Hadoop JIRA
int flags = (fi->flags & 0x7FFF);
// retrieve dfs specific data
fh = (dfs_fh*)calloc(1, sizeof (dfs_fh));
if (!fh) {
@ -57,22 +118,12 @@ int dfs_open(const char *path, struct fuse_file_info *fi)
goto error;
}
fs = hdfsConnGetFs(fh->conn);
if (flags & O_RDWR) {
hdfsFileInfo *info = hdfsGetPathInfo(fs, path);
if (info == NULL) {
// File does not exist (maybe?); interpret it as a O_WRONLY
// If the actual error was something else, we'll get it again when
// we try to open the file.
flags ^= O_RDWR;
flags |= O_WRONLY;
} else {
// File exists; open this as read only.
flags ^= O_RDWR;
flags |= O_RDONLY;
flagRet = get_hdfs_open_flags(fs, path, fi->flags);
if (flagRet < 0) {
ret = -flagRet;
goto error;
}
}
flags = flagRet;
if ((fh->hdfsFH = hdfsOpenFile(fs, path, flags, 0, 0, 0)) == NULL) {
ERROR("Could not open file %s (errno=%d)", path, errno);
if (errno == 0 || errno == EINTERNAL) {
@ -91,7 +142,7 @@ int dfs_open(const char *path, struct fuse_file_info *fi)
}
mutexInit = 1;
if (fi->flags & O_WRONLY || fi->flags & O_CREAT) {
if ((flags & O_ACCMODE) == O_WRONLY) {
fh->buf = NULL;
} else {
assert(dfs->rdbuffer_size > 0);

View File

@ -98,7 +98,7 @@ static void dfsPrintOptions(FILE *fp, const struct options *o)
o->attribute_timeout, o->rdbuffer_size, o->direct_io);
}
void *dfs_init(void)
void *dfs_init(struct fuse_conn_info *conn)
{
int ret;
@ -143,6 +143,45 @@ void *dfs_init(void)
exit(EXIT_FAILURE);
}
}
#ifdef FUSE_CAP_ATOMIC_O_TRUNC
// If FUSE_CAP_ATOMIC_O_TRUNC is set, open("foo", O_CREAT | O_TRUNC) will
// result in dfs_open being called with O_TRUNC.
//
// If this capability is not present, fuse will try to use multiple
// operation to "simulate" open(O_TRUNC). This doesn't work very well with
// HDFS.
// Unfortunately, this capability is only implemented on Linux 2.6.29 or so.
// See HDFS-4140 for details.
if (conn->capable & FUSE_CAP_ATOMIC_O_TRUNC) {
conn->want |= FUSE_CAP_ATOMIC_O_TRUNC;
}
#endif
#ifdef FUSE_CAP_ASYNC_READ
// We're OK with doing reads at the same time as writes.
if (conn->capable & FUSE_CAP_ASYNC_READ) {
conn->want |= FUSE_CAP_ASYNC_READ;
}
#endif
#ifdef FUSE_CAP_BIG_WRITES
// Yes, we can read more than 4kb at a time. In fact, please do!
if (conn->capable & FUSE_CAP_BIG_WRITES) {
conn->want |= FUSE_CAP_BIG_WRITES;
}
#endif
#ifdef FUSE_CAP_DONT_MASK
if ((options.no_permissions) && (conn->capable & FUSE_CAP_DONT_MASK)) {
// If we're handing permissions ourselves, we don't want the kernel
// applying its own umask. HDFS already implements its own per-user
// umasks! Sadly, this only actually does something on kernels 2.6.31 and
// later.
conn->want |= FUSE_CAP_DONT_MASK;
}
#endif
return (void*)dfs;
}

View File

@ -19,13 +19,15 @@
#ifndef __FUSE_INIT_H__
#define __FUSE_INIT_H__
struct fuse_conn_info;
/**
* These are responsible for initializing connections to dfs and internal
* data structures and then freeing them.
* i.e., what happens on mount and unmount.
*
*/
void *dfs_init();
void *dfs_init(struct fuse_conn_info *conn);
void dfs_destroy (void *ptr);
#endif

View File

@ -16,6 +16,8 @@
* limitations under the License.
*/
#define FUSE_USE_VERSION 26
#include "fuse-dfs/test/fuse_workload.h"
#include "libhdfs/expect.h"
#include "util/posix_util.h"
@ -23,6 +25,7 @@
#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
#include <fuse.h>
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
@ -138,13 +141,89 @@ static int safeRead(int fd, void *buf, int c)
return amt;
}
/* Bug: HDFS-2551.
* When a program writes a file, closes it, and immediately re-opens it,
* it might not appear to have the correct length. This is because FUSE
* invokes the release() callback asynchronously.
*
* To work around this, we keep retrying until the file length is what we
* expect.
*/
static int closeWorkaroundHdfs2551(int fd, const char *path, off_t expectedSize)
{
int ret, try;
struct stat stBuf;
RETRY_ON_EINTR_GET_ERRNO(ret, close(fd));
EXPECT_ZERO(ret);
for (try = 0; try < MAX_TRIES; try++) {
EXPECT_ZERO(stat(path, &stBuf));
EXPECT_NONZERO(S_ISREG(stBuf.st_mode));
if (stBuf.st_size == expectedSize) {
return 0;
}
sleepNoSig(1);
}
fprintf(stderr, "FUSE_WORKLOAD: error: expected file %s to have length "
"%lld; instead, it had length %lld\n",
path, (long long)expectedSize, (long long)stBuf.st_size);
return -EIO;
}
#ifdef FUSE_CAP_ATOMIC_O_TRUNC
/**
* Test that we can create a file, write some contents to it, close that file,
* and then successfully re-open with O_TRUNC.
*/
static int testOpenTrunc(const char *base)
{
int fd, err;
char path[PATH_MAX];
const char * const SAMPLE1 = "this is the first file that we wrote.";
const char * const SAMPLE2 = "this is the second file that we wrote. "
"It's #2!";
snprintf(path, sizeof(path), "%s/trunc.txt", base);
fd = open(path, O_CREAT | O_TRUNC | O_WRONLY, 0644);
if (fd < 0) {
err = errno;
fprintf(stderr, "TEST_ERROR: testOpenTrunc(%s): first open "
"failed with error %d\n", path, err);
return -err;
}
EXPECT_ZERO(safeWrite(fd, SAMPLE1, strlen(SAMPLE1)));
EXPECT_ZERO(closeWorkaroundHdfs2551(fd, path, strlen(SAMPLE1)));
fd = open(path, O_CREAT | O_TRUNC | O_WRONLY, 0644);
if (fd < 0) {
err = errno;
fprintf(stderr, "TEST_ERROR: testOpenTrunc(%s): second open "
"failed with error %d\n", path, err);
return -err;
}
EXPECT_ZERO(safeWrite(fd, SAMPLE2, strlen(SAMPLE2)));
EXPECT_ZERO(closeWorkaroundHdfs2551(fd, path, strlen(SAMPLE2)));
return 0;
}
#else
static int testOpenTrunc(const char *base)
{
fprintf(stderr, "FUSE_WORKLOAD: We lack FUSE_CAP_ATOMIC_O_TRUNC support. "
"Not testing open(O_TRUNC).\n");
return 0;
}
#endif
int runFuseWorkloadImpl(const char *root, const char *pcomp,
struct fileCtx *ctx)
{
char base[PATH_MAX], tmp[PATH_MAX], *tmpBuf;
char src[PATH_MAX], dst[PATH_MAX];
struct stat stBuf;
int ret, i, try;
int ret, i;
struct utimbuf tbuf;
struct statvfs stvBuf;
@ -241,34 +320,9 @@ int runFuseWorkloadImpl(const char *root, const char *pcomp,
EXPECT_ZERO(safeWrite(ctx[i].fd, ctx[i].str, ctx[i].strLen));
}
for (i = 0; i < NUM_FILE_CTX; i++) {
RETRY_ON_EINTR_GET_ERRNO(ret, close(ctx[i].fd));
EXPECT_ZERO(ret);
EXPECT_ZERO(closeWorkaroundHdfs2551(ctx[i].fd, ctx[i].path, ctx[i].strLen));
ctx[i].fd = -1;
}
for (i = 0; i < NUM_FILE_CTX; i++) {
/* Bug: HDFS-2551.
* When a program writes a file, closes it, and immediately re-opens it,
* it might not appear to have the correct length. This is because FUSE
* invokes the release() callback asynchronously.
*
* To work around this, we keep retrying until the file length is what we
* expect.
*/
for (try = 0; try < MAX_TRIES; try++) {
EXPECT_ZERO(stat(ctx[i].path, &stBuf));
EXPECT_NONZERO(S_ISREG(stBuf.st_mode));
if (ctx[i].strLen == stBuf.st_size) {
break;
}
sleepNoSig(1);
}
if (try == MAX_TRIES) {
fprintf(stderr, "FUSE_WORKLOAD: error: expected file %s to have length "
"%d; instead, it had length %lld\n",
ctx[i].path, ctx[i].strLen, (long long)stBuf.st_size);
return -EIO;
}
}
for (i = 0; i < NUM_FILE_CTX; i++) {
ctx[i].fd = open(ctx[i].path, O_RDONLY);
if (ctx[i].fd < 0) {
@ -308,6 +362,7 @@ int runFuseWorkloadImpl(const char *root, const char *pcomp,
for (i = 0; i < NUM_FILE_CTX; i++) {
free(ctx[i].path);
}
EXPECT_ZERO(testOpenTrunc(base));
EXPECT_ZERO(recursiveDelete(base));
return 0;
}

View File

@ -0,0 +1,512 @@
~~ Licensed under the Apache License, Version 2.0 (the "License");
~~ you may not use this file except in compliance with the License.
~~ You may obtain a copy of the License at
~~
~~ http://www.apache.org/licenses/LICENSE-2.0
~~
~~ Unless required by applicable law or agreed to in writing, software
~~ distributed under the License is distributed on an "AS IS" BASIS,
~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~~ See the License for the specific language governing permissions and
~~ limitations under the License. See accompanying LICENSE file.
---
HDFS Architecture
---
Dhruba Borthakur
---
${maven.build.timestamp}
%{toc|section=1|fromDepth=0}
HDFS Architecture
Introduction
The Hadoop Distributed File System (HDFS) is a distributed file system
designed to run on commodity hardware. It has many similarities with
existing distributed file systems. However, the differences from other
distributed file systems are significant. HDFS is highly fault-tolerant
and is designed to be deployed on low-cost hardware. HDFS provides high
throughput access to application data and is suitable for applications
that have large data sets. HDFS relaxes a few POSIX requirements to
enable streaming access to file system data. HDFS was originally built
as infrastructure for the Apache Nutch web search engine project. HDFS
is part of the Apache Hadoop Core project. The project URL is
{{http://hadoop.apache.org/}}.
Assumptions and Goals
Hardware Failure
Hardware failure is the norm rather than the exception. An HDFS
instance may consist of hundreds or thousands of server machines, each
storing part of the file systems data. The fact that there are a huge
number of components and that each component has a non-trivial
probability of failure means that some component of HDFS is always
non-functional. Therefore, detection of faults and quick, automatic
recovery from them is a core architectural goal of HDFS.
Streaming Data Access
Applications that run on HDFS need streaming access to their data sets.
They are not general purpose applications that typically run on general
purpose file systems. HDFS is designed more for batch processing rather
than interactive use by users. The emphasis is on high throughput of
data access rather than low latency of data access. POSIX imposes many
hard requirements that are not needed for applications that are
targeted for HDFS. POSIX semantics in a few key areas has been traded
to increase data throughput rates.
Large Data Sets
Applications that run on HDFS have large data sets. A typical file in
HDFS is gigabytes to terabytes in size. Thus, HDFS is tuned to support
large files. It should provide high aggregate data bandwidth and scale
to hundreds of nodes in a single cluster. It should support tens of
millions of files in a single instance.
Simple Coherency Model
HDFS applications need a write-once-read-many access model for files. A
file once created, written, and closed need not be changed. This
assumption simplifies data coherency issues and enables high throughput
data access. A Map/Reduce application or a web crawler application fits
perfectly with this model. There is a plan to support appending-writes
to files in the future.
“Moving Computation is Cheaper than Moving Data”
A computation requested by an application is much more efficient if it
is executed near the data it operates on. This is especially true when
the size of the data set is huge. This minimizes network congestion and
increases the overall throughput of the system. The assumption is that
it is often better to migrate the computation closer to where the data
is located rather than moving the data to where the application is
running. HDFS provides interfaces for applications to move themselves
closer to where the data is located.
Portability Across Heterogeneous Hardware and Software Platforms
HDFS has been designed to be easily portable from one platform to
another. This facilitates widespread adoption of HDFS as a platform of
choice for a large set of applications.
NameNode and DataNodes
HDFS has a master/slave architecture. An HDFS cluster consists of a
single NameNode, a master server that manages the file system namespace
and regulates access to files by clients. In addition, there are a
number of DataNodes, usually one per node in the cluster, which manage
storage attached to the nodes that they run on. HDFS exposes a file
system namespace and allows user data to be stored in files.
Internally, a file is split into one or more blocks and these blocks
are stored in a set of DataNodes. The NameNode executes file system
namespace operations like opening, closing, and renaming files and
directories. It also determines the mapping of blocks to DataNodes. The
DataNodes are responsible for serving read and write requests from the
file systems clients. The DataNodes also perform block creation,
deletion, and replication upon instruction from the NameNode.
[images/hdfsarchitecture.png] HDFS Architecture
The NameNode and DataNode are pieces of software designed to run on
commodity machines. These machines typically run a GNU/Linux operating
system (OS). HDFS is built using the Java language; any machine that
supports Java can run the NameNode or the DataNode software. Usage of
the highly portable Java language means that HDFS can be deployed on a
wide range of machines. A typical deployment has a dedicated machine
that runs only the NameNode software. Each of the other machines in the
cluster runs one instance of the DataNode software. The architecture
does not preclude running multiple DataNodes on the same machine but in
a real deployment that is rarely the case.
The existence of a single NameNode in a cluster greatly simplifies the
architecture of the system. The NameNode is the arbitrator and
repository for all HDFS metadata. The system is designed in such a way
that user data never flows through the NameNode.
The File System Namespace
HDFS supports a traditional hierarchical file organization. A user or
an application can create directories and store files inside these
directories. The file system namespace hierarchy is similar to most
other existing file systems; one can create and remove files, move a
file from one directory to another, or rename a file. HDFS does not yet
implement user quotas or access permissions. HDFS does not support hard
links or soft links. However, the HDFS architecture does not preclude
implementing these features.
The NameNode maintains the file system namespace. Any change to the
file system namespace or its properties is recorded by the NameNode. An
application can specify the number of replicas of a file that should be
maintained by HDFS. The number of copies of a file is called the
replication factor of that file. This information is stored by the
NameNode.
Data Replication
HDFS is designed to reliably store very large files across machines in
a large cluster. It stores each file as a sequence of blocks; all
blocks in a file except the last block are the same size. The blocks of
a file are replicated for fault tolerance. The block size and
replication factor are configurable per file. An application can
specify the number of replicas of a file. The replication factor can be
specified at file creation time and can be changed later. Files in HDFS
are write-once and have strictly one writer at any time.
The NameNode makes all decisions regarding replication of blocks. It
periodically receives a Heartbeat and a Blockreport from each of the
DataNodes in the cluster. Receipt of a Heartbeat implies that the
DataNode is functioning properly. A Blockreport contains a list of all
blocks on a DataNode.
[images/hdfsdatanodes.png] HDFS DataNodes
Replica Placement: The First Baby Steps
The placement of replicas is critical to HDFS reliability and
performance. Optimizing replica placement distinguishes HDFS from most
other distributed file systems. This is a feature that needs lots of
tuning and experience. The purpose of a rack-aware replica placement
policy is to improve data reliability, availability, and network
bandwidth utilization. The current implementation for the replica
placement policy is a first effort in this direction. The short-term
goals of implementing this policy are to validate it on production
systems, learn more about its behavior, and build a foundation to test
and research more sophisticated policies.
Large HDFS instances run on a cluster of computers that commonly spread
across many racks. Communication between two nodes in different racks
has to go through switches. In most cases, network bandwidth between
machines in the same rack is greater than network bandwidth between
machines in different racks.
The NameNode determines the rack id each DataNode belongs to via the
process outlined in {{{../hadoop-common/ClusterSetup.html#Hadoop+Rack+Awareness}Hadoop Rack Awareness}}. A simple but non-optimal policy
is to place replicas on unique racks. This prevents losing data when an
entire rack fails and allows use of bandwidth from multiple racks when
reading data. This policy evenly distributes replicas in the cluster
which makes it easy to balance load on component failure. However, this
policy increases the cost of writes because a write needs to transfer
blocks to multiple racks.
For the common case, when the replication factor is three, HDFSs
placement policy is to put one replica on one node in the local rack,
another on a different node in the local rack, and the last on a
different node in a different rack. This policy cuts the inter-rack
write traffic which generally improves write performance. The chance of
rack failure is far less than that of node failure; this policy does
not impact data reliability and availability guarantees. However, it
does reduce the aggregate network bandwidth used when reading data
since a block is placed in only two unique racks rather than three.
With this policy, the replicas of a file do not evenly distribute
across the racks. One third of replicas are on one node, two thirds of
replicas are on one rack, and the other third are evenly distributed
across the remaining racks. This policy improves write performance
without compromising data reliability or read performance.
The current, default replica placement policy described here is a work
in progress.
Replica Selection
To minimize global bandwidth consumption and read latency, HDFS tries
to satisfy a read request from a replica that is closest to the reader.
If there exists a replica on the same rack as the reader node, then
that replica is preferred to satisfy the read request. If angg/ HDFS
cluster spans multiple data centers, then a replica that is resident in
the local data center is preferred over any remote replica.
Safemode
On startup, the NameNode enters a special state called Safemode.
Replication of data blocks does not occur when the NameNode is in the
Safemode state. The NameNode receives Heartbeat and Blockreport
messages from the DataNodes. A Blockreport contains the list of data
blocks that a DataNode is hosting. Each block has a specified minimum
number of replicas. A block is considered safely replicated when the
minimum number of replicas of that data block has checked in with the
NameNode. After a configurable percentage of safely replicated data
blocks checks in with the NameNode (plus an additional 30 seconds), the
NameNode exits the Safemode state. It then determines the list of data
blocks (if any) that still have fewer than the specified number of
replicas. The NameNode then replicates these blocks to other DataNodes.
The Persistence of File System Metadata
The HDFS namespace is stored by the NameNode. The NameNode uses a
transaction log called the EditLog to persistently record every change
that occurs to file system metadata. For example, creating a new file
in HDFS causes the NameNode to insert a record into the EditLog
indicating this. Similarly, changing the replication factor of a file
causes a new record to be inserted into the EditLog. The NameNode uses
a file in its local host OS file system to store the EditLog. The
entire file system namespace, including the mapping of blocks to files
and file system properties, is stored in a file called the FsImage. The
FsImage is stored as a file in the NameNodes local file system too.
The NameNode keeps an image of the entire file system namespace and
file Blockmap in memory. This key metadata item is designed to be
compact, such that a NameNode with 4 GB of RAM is plenty to support a
huge number of files and directories. When the NameNode starts up, it
reads the FsImage and EditLog from disk, applies all the transactions
from the EditLog to the in-memory representation of the FsImage, and
flushes out this new version into a new FsImage on disk. It can then
truncate the old EditLog because its transactions have been applied to
the persistent FsImage. This process is called a checkpoint. In the
current implementation, a checkpoint only occurs when the NameNode
starts up. Work is in progress to support periodic checkpointing in the
near future.
The DataNode stores HDFS data in files in its local file system. The
DataNode has no knowledge about HDFS files. It stores each block of
HDFS data in a separate file in its local file system. The DataNode
does not create all files in the same directory. Instead, it uses a
heuristic to determine the optimal number of files per directory and
creates subdirectories appropriately. It is not optimal to create all
local files in the same directory because the local file system might
not be able to efficiently support a huge number of files in a single
directory. When a DataNode starts up, it scans through its local file
system, generates a list of all HDFS data blocks that correspond to
each of these local files and sends this report to the NameNode: this
is the Blockreport.
The Communication Protocols
All HDFS communication protocols are layered on top of the TCP/IP
protocol. A client establishes a connection to a configurable TCP port
on the NameNode machine. It talks the ClientProtocol with the NameNode.
The DataNodes talk to the NameNode using the DataNode Protocol. A
Remote Procedure Call (RPC) abstraction wraps both the Client Protocol
and the DataNode Protocol. By design, the NameNode never initiates any
RPCs. Instead, it only responds to RPC requests issued by DataNodes or
clients.
Robustness
The primary objective of HDFS is to store data reliably even in the
presence of failures. The three common types of failures are NameNode
failures, DataNode failures and network partitions.
Data Disk Failure, Heartbeats and Re-Replication
Each DataNode sends a Heartbeat message to the NameNode periodically. A
network partition can cause a subset of DataNodes to lose connectivity
with the NameNode. The NameNode detects this condition by the absence
of a Heartbeat message. The NameNode marks DataNodes without recent
Heartbeats as dead and does not forward any new IO requests to them.
Any data that was registered to a dead DataNode is not available to
HDFS any more. DataNode death may cause the replication factor of some
blocks to fall below their specified value. The NameNode constantly
tracks which blocks need to be replicated and initiates replication
whenever necessary. The necessity for re-replication may arise due to
many reasons: a DataNode may become unavailable, a replica may become
corrupted, a hard disk on a DataNode may fail, or the replication
factor of a file may be increased.
Cluster Rebalancing
The HDFS architecture is compatible with data rebalancing schemes. A
scheme might automatically move data from one DataNode to another if
the free space on a DataNode falls below a certain threshold. In the
event of a sudden high demand for a particular file, a scheme might
dynamically create additional replicas and rebalance other data in the
cluster. These types of data rebalancing schemes are not yet
implemented.
Data Integrity
It is possible that a block of data fetched from a DataNode arrives
corrupted. This corruption can occur because of faults in a storage
device, network faults, or buggy software. The HDFS client software
implements checksum checking on the contents of HDFS files. When a
client creates an HDFS file, it computes a checksum of each block of
the file and stores these checksums in a separate hidden file in the
same HDFS namespace. When a client retrieves file contents it verifies
that the data it received from each DataNode matches the checksum
stored in the associated checksum file. If not, then the client can opt
to retrieve that block from another DataNode that has a replica of that
block.
Metadata Disk Failure
The FsImage and the EditLog are central data structures of HDFS. A
corruption of these files can cause the HDFS instance to be
non-functional. For this reason, the NameNode can be configured to
support maintaining multiple copies of the FsImage and EditLog. Any
update to either the FsImage or EditLog causes each of the FsImages and
EditLogs to get updated synchronously. This synchronous updating of
multiple copies of the FsImage and EditLog may degrade the rate of
namespace transactions per second that a NameNode can support. However,
this degradation is acceptable because even though HDFS applications
are very data intensive in nature, they are not metadata intensive.
When a NameNode restarts, it selects the latest consistent FsImage and
EditLog to use.
The NameNode machine is a single point of failure for an HDFS cluster.
If the NameNode machine fails, manual intervention is necessary.
Currently, automatic restart and failover of the NameNode software to
another machine is not supported.
Snapshots
Snapshots support storing a copy of data at a particular instant of
time. One usage of the snapshot feature may be to roll back a corrupted
HDFS instance to a previously known good point in time. HDFS does not
currently support snapshots but will in a future release.
Data Organization
Data Blocks
HDFS is designed to support very large files. Applications that are
compatible with HDFS are those that deal with large data sets. These
applications write their data only once but they read it one or more
times and require these reads to be satisfied at streaming speeds. HDFS
supports write-once-read-many semantics on files. A typical block size
used by HDFS is 64 MB. Thus, an HDFS file is chopped up into 64 MB
chunks, and if possible, each chunk will reside on a different
DataNode.
Staging
A client request to create a file does not reach the NameNode
immediately. In fact, initially the HDFS client caches the file data
into a temporary local file. Application writes are transparently
redirected to this temporary local file. When the local file
accumulates data worth over one HDFS block size, the client contacts
the NameNode. The NameNode inserts the file name into the file system
hierarchy and allocates a data block for it. The NameNode responds to
the client request with the identity of the DataNode and the
destination data block. Then the client flushes the block of data from
the local temporary file to the specified DataNode. When a file is
closed, the remaining un-flushed data in the temporary local file is
transferred to the DataNode. The client then tells the NameNode that
the file is closed. At this point, the NameNode commits the file
creation operation into a persistent store. If the NameNode dies before
the file is closed, the file is lost.
The above approach has been adopted after careful consideration of
target applications that run on HDFS. These applications need streaming
writes to files. If a client writes to a remote file directly without
any client side buffering, the network speed and the congestion in the
network impacts throughput considerably. This approach is not without
precedent. Earlier distributed file systems, e.g. AFS, have used client
side caching to improve performance. A POSIX requirement has been
relaxed to achieve higher performance of data uploads.
Replication Pipelining
When a client is writing data to an HDFS file, its data is first
written to a local file as explained in the previous section. Suppose
the HDFS file has a replication factor of three. When the local file
accumulates a full block of user data, the client retrieves a list of
DataNodes from the NameNode. This list contains the DataNodes that will
host a replica of that block. The client then flushes the data block to
the first DataNode. The first DataNode starts receiving the data in
small portions (4 KB), writes each portion to its local repository and
transfers that portion to the second DataNode in the list. The second
DataNode, in turn starts receiving each portion of the data block,
writes that portion to its repository and then flushes that portion to
the third DataNode. Finally, the third DataNode writes the data to its
local repository. Thus, a DataNode can be receiving data from the
previous one in the pipeline and at the same time forwarding data to
the next one in the pipeline. Thus, the data is pipelined from one
DataNode to the next.
Accessibility
HDFS can be accessed from applications in many different ways.
Natively, HDFS provides a
{{{http://hadoop.apache.org/docs/current/api/}FileSystem Java API}}
for applications to use. A C language wrapper for this Java API is also
available. In addition, an HTTP browser can also be used to browse the files
of an HDFS instance. Work is in progress to expose HDFS through the WebDAV
protocol.
FS Shell
HDFS allows user data to be organized in the form of files and
directories. It provides a commandline interface called FS shell that
lets a user interact with the data in HDFS. The syntax of this command
set is similar to other shells (e.g. bash, csh) that users are already
familiar with. Here are some sample action/command pairs:
*---------+---------+
|| Action | Command
*---------+---------+
| Create a directory named <<</foodir>>> | <<<bin/hadoop dfs -mkdir /foodir>>>
*---------+---------+
| Remove a directory named <<</foodir>>> | <<<bin/hadoop dfs -rmr /foodir>>>
*---------+---------+
| View the contents of a file named <<</foodir/myfile.txt>>> | <<<bin/hadoop dfs -cat /foodir/myfile.txt>>>
*---------+---------+
FS shell is targeted for applications that need a scripting language to
interact with the stored data.
DFSAdmin
The DFSAdmin command set is used for administering an HDFS cluster.
These are commands that are used only by an HDFS administrator. Here
are some sample action/command pairs:
*---------+---------+
|| Action | Command
*---------+---------+
|Put the cluster in Safemode | <<<bin/hadoop dfsadmin -safemode enter>>>
*---------+---------+
|Generate a list of DataNodes | <<<bin/hadoop dfsadmin -report>>>
*---------+---------+
|Recommission or decommission DataNode(s) | <<<bin/hadoop dfsadmin -refreshNodes>>>
*---------+---------+
Browser Interface
A typical HDFS install configures a web server to expose the HDFS
namespace through a configurable TCP port. This allows a user to
navigate the HDFS namespace and view the contents of its files using a
web browser.
Space Reclamation
File Deletes and Undeletes
When a file is deleted by a user or an application, it is not
immediately removed from HDFS. Instead, HDFS first renames it to a file
in the <<</trash>>> directory. The file can be restored quickly as long as it
remains in <<</trash>>>. A file remains in <<</trash>>> for a configurable amount
of time. After the expiry of its life in <<</trash>>>, the NameNode deletes
the file from the HDFS namespace. The deletion of a file causes the
blocks associated with the file to be freed. Note that there could be
an appreciable time delay between the time a file is deleted by a user
and the time of the corresponding increase in free space in HDFS.
A user can Undelete a file after deleting it as long as it remains in
the <<</trash>>> directory. If a user wants to undelete a file that he/she
has deleted, he/she can navigate the <<</trash>>> directory and retrieve the
file. The <<</trash>>> directory contains only the latest copy of the file
that was deleted. The <<</trash>>> directory is just like any other directory
with one special feature: HDFS applies specified policies to
automatically delete files from this directory. The current default
policy is to delete files from <<</trash>>> that are more than 6 hours old.
In the future, this policy will be configurable through a well defined
interface.
Decrease Replication Factor
When the replication factor of a file is reduced, the NameNode selects
excess replicas that can be deleted. The next Heartbeat transfers this
information to the DataNode. The DataNode then removes the
corresponding blocks and the corresponding free space appears in the
cluster. Once again, there might be a time delay between the completion
of the setReplication API call and the appearance of free space in the
cluster.
References
Hadoop {{{http://hadoop.apache.org/docs/current/api/}JavaDoc API}}.
HDFS source code: {{http://hadoop.apache.org/version_control.html}}

View File

@ -256,6 +256,21 @@ public class TestHDFSFileContextMainOperations extends
Assert.assertTrue(fs.exists(dst1)); // ensure rename dst exists
}
@Test
public void testIsValidNameInvalidNames() {
String[] invalidNames = {
"/foo/../bar",
"/foo/./bar",
"/foo/:/bar",
"/foo:bar"
};
for (String invalidName: invalidNames) {
Assert.assertFalse(invalidName + " is not valid",
fc.getDefaultFileSystem().isValidName(invalidName));
}
}
private void oldRename(Path src, Path dst, boolean renameSucceeds,
boolean exception) throws Exception {
DistributedFileSystem fs = (DistributedFileSystem) cluster.getFileSystem();

View File

@ -51,7 +51,7 @@ public class TestVolumeId {
}
@SuppressWarnings("unchecked")
private <T> void testEq(final boolean eq, Comparable<? super T> id1, Comparable<? super T> id2) {
private <T> void testEq(final boolean eq, Comparable<T> id1, Comparable<T> id2) {
final int h1 = id1.hashCode();
final int h2 = id2.hashCode();
@ -99,8 +99,8 @@ public class TestVolumeId {
}
@SuppressWarnings("unchecked")
private <T> void testEqMany(final boolean eq, Comparable<? super T>... volumeIds) {
Comparable<? super T> vidNext;
private <T> void testEqMany(final boolean eq, Comparable<T>... volumeIds) {
Comparable<T> vidNext;
int sum = 0;
for (int i=0; i<volumeIds.length; i++) {
if (i == volumeIds.length - 1) {

View File

@ -624,8 +624,11 @@ public class TestDFSUtil {
@Test
public void testIsValidName() {
assertFalse(DFSUtil.isValidName("/foo/../bar"));
assertFalse(DFSUtil.isValidName("/foo/./bar"));
assertFalse(DFSUtil.isValidName("/foo//bar"));
assertTrue(DFSUtil.isValidName("/"));
assertTrue(DFSUtil.isValidName("/bar/"));
assertFalse(DFSUtil.isValidName("/foo/:/bar"));
assertFalse(DFSUtil.isValidName("/foo:bar"));
}
}

View File

@ -59,6 +59,15 @@ public class TestLayoutVersion {
Feature.RESERVED_REL20_204.lv));
}
/**
* Test to make sure release 1.2.0 support CONCAT
*/
@Test
public void testRelease1_2_0() {
assertTrue(LayoutVersion.supports(Feature.CONCAT,
Feature.RESERVED_REL1_2_0.lv));
}
/**
* Given feature {@code f}, ensures the layout version of that feature
* supports all the features supported by it's ancestor.
@ -69,7 +78,9 @@ public class TestLayoutVersion {
EnumSet<Feature> ancestorSet = LayoutVersion.map.get(ancestorLV);
assertNotNull(ancestorSet);
for (Feature feature : ancestorSet) {
assertTrue(LayoutVersion.supports(feature, lv));
assertTrue("LV " + lv + " does nto support " + feature
+ " supported by the ancestor LV " + f.ancestorLV,
LayoutVersion.supports(feature, lv));
}
}
}

View File

@ -97,6 +97,7 @@ public class TestBlockRecovery {
MiniDFSCluster.getBaseDirectory() + "data";
private DataNode dn;
private Configuration conf;
private boolean tearDownDone;
private final static long RECOVERY_ID = 3000L;
private final static String CLUSTER_ID = "testClusterID";
private final static String POOL_ID = "BP-TEST";
@ -121,6 +122,7 @@ public class TestBlockRecovery {
*/
@Before
public void startUp() throws IOException {
tearDownDone = false;
conf = new HdfsConfiguration();
conf.set(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY, DATA_DIR);
conf.set(DFSConfigKeys.DFS_DATANODE_ADDRESS_KEY, "0.0.0.0:0");
@ -177,7 +179,7 @@ public class TestBlockRecovery {
*/
@After
public void tearDown() throws IOException {
if (dn != null) {
if (!tearDownDone && dn != null) {
try {
dn.shutdown();
} catch(Exception e) {
@ -188,6 +190,7 @@ public class TestBlockRecovery {
Assert.assertTrue(
"Cannot delete data-node dirs", FileUtil.fullyDelete(dir));
}
tearDownDone = true;
}
}

View File

@ -17,24 +17,14 @@
package org.apache.hadoop.hdfs.server.datanode;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.IOException;
import java.security.PrivilegedExceptionAction;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CommonConfigurationKeys;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod;
import static org.apache.hadoop.security.SecurityUtilTestHelper.isExternalKdcRunning;
import org.junit.Assume;
import org.junit.Before;
@ -67,7 +57,7 @@ public class TestStartSecureDataNode {
}
@Test
public void testSecureNameNode() throws IOException, InterruptedException {
public void testSecureNameNode() throws Exception {
MiniDFSCluster cluster = null;
try {
String nnPrincipal =
@ -105,9 +95,9 @@ public class TestStartSecureDataNode {
.build();
cluster.waitActive();
assertTrue(cluster.isDataNodeUp());
} catch (Exception ex) {
ex.printStackTrace();
throw ex;
} finally {
if (cluster != null) {
cluster.shutdown();

View File

@ -1209,22 +1209,19 @@ public class TestEditLog {
*
*/
static void validateNoCrash(byte garbage[]) throws IOException {
final String TEST_LOG_NAME = "test_edit_log";
final File TEST_LOG_NAME = new File(TEST_DIR, "test_edit_log");
EditLogFileOutputStream elfos = null;
File file = null;
EditLogFileInputStream elfis = null;
try {
file = new File(TEST_LOG_NAME);
elfos = new EditLogFileOutputStream(file, 0);
elfos = new EditLogFileOutputStream(TEST_LOG_NAME, 0);
elfos.create();
elfos.writeRaw(garbage, 0, garbage.length);
elfos.setReadyToFlush();
elfos.flushAndSync(true);
elfos.close();
elfos = null;
file = new File(TEST_LOG_NAME);
elfis = new EditLogFileInputStream(file);
elfis = new EditLogFileInputStream(TEST_LOG_NAME);
// verify that we can read everything without killing the JVM or
// throwing an exception other than IOException

View File

@ -59,6 +59,8 @@ import com.google.common.collect.Sets;
public class TestNameNodeRecovery {
private static final Log LOG = LogFactory.getLog(TestNameNodeRecovery.class);
private static StartupOption recoverStartOpt = StartupOption.RECOVER;
private static final File TEST_DIR = new File(
System.getProperty("test.build.data","build/test/data"));
static {
recoverStartOpt.setForce(MetaRecoveryContext.FORCE_ALL);
@ -66,15 +68,13 @@ public class TestNameNodeRecovery {
}
static void runEditLogTest(EditLogTestSetup elts) throws IOException {
final String TEST_LOG_NAME = "test_edit_log";
final File TEST_LOG_NAME = new File(TEST_DIR, "test_edit_log");
final OpInstanceCache cache = new OpInstanceCache();
EditLogFileOutputStream elfos = null;
File file = null;
EditLogFileInputStream elfis = null;
try {
file = new File(TEST_LOG_NAME);
elfos = new EditLogFileOutputStream(file, 0);
elfos = new EditLogFileOutputStream(TEST_LOG_NAME, 0);
elfos.create();
elts.addTransactionsToLog(elfos, cache);
@ -82,8 +82,7 @@ public class TestNameNodeRecovery {
elfos.flushAndSync(true);
elfos.close();
elfos = null;
file = new File(TEST_LOG_NAME);
elfis = new EditLogFileInputStream(file);
elfis = new EditLogFileInputStream(TEST_LOG_NAME);
// reading through normally will get you an exception
Set<Long> validTxIds = elts.getValidTxIds();

View File

@ -56,7 +56,6 @@ public class TestWebHdfsWithMultipleNameNodes {
((Log4JLogger)NameNode.stateChangeLog).getLogger().setLevel(Level.OFF);
((Log4JLogger)LeaseManager.LOG).getLogger().setLevel(Level.OFF);
((Log4JLogger)LogFactory.getLog(FSNamesystem.class)).getLogger().setLevel(Level.OFF);
((Log4JLogger)DataNode.LOG).getLogger().setLevel(Level.OFF);
}
private static final Configuration conf = new HdfsConfiguration();

View File

@ -14,6 +14,11 @@ Trunk (Unreleased)
MAPREDUCE-4049. Experimental api to allow for alternate shuffle plugins.
(Avner BenHanoch via acmurthy)
MAPREDUCE-4807. Allow MapOutputBuffer to be pluggable. (masokan via tucu)
MAPREDUCE-4887. Add RehashPartitioner, to smooth distributions
with poor implementations of Object#hashCode(). (Radim Kolar via cutting)
IMPROVEMENTS
MAPREDUCE-3787. [Gridmix] Optimize job monitoring and STRESS mode for
@ -71,8 +76,14 @@ Trunk (Unreleased)
MAPREDUCE-4735. Make arguments in TestDFSIO case insensitive.
(Brandon Li via suresh)
MAPREDUCE-4809. Change visibility of classes for pluggable sort changes.
(masokan via tucu)
BUG FIXES
MAPREDUCE-4272. SortedRanges.Range#compareTo is not spec compliant.
(Yu Gao via llu)
MAPREDUCE-4356. [Rumen] Provide access to the method
ParsedTask.obtainTaskAttempts(). (ravigummadi)
@ -171,6 +182,16 @@ Release 2.0.3-alpha - Unreleased
MAPREDUCE-4723. Fix warnings found by findbugs 2. (Sandy Ryza via eli)
MAPREDUCE-4703. Add the ability to start the MiniMRClientCluster using
the configurations used before it is being stopped. (ahmed.radwan via tucu)
MAPREDUCE-4845. ClusterStatus.getMaxMemory() and getUsedMemory() exist in
MR1 but not MR2. (Sandy Ryza via tomwhite)
MAPREDUCE-4899. Implemented a MR specific plugin for tracking finished
applications that YARN's ResourceManager doesn't keep track off anymore
(Derek Dagit via vinodkv)
OPTIMIZATIONS
BUG FIXES
@ -205,6 +226,12 @@ Release 2.0.3-alpha - Unreleased
MAPREDUCE-4800. Cleanup o.a.h.mapred.MapTaskStatus - remove unused
code. (kkambatl via tucu)
MAPREDUCE-4861. Cleanup: Remove unused mapreduce.security.token.DelegationTokenRenewal.
(kkambatl via tucu)
MAPREDUCE-4856. TestJobOutputCommitter uses same directory as
TestJobCleanup. (Sandy Ryza via tomwhite)
Release 2.0.2-alpha - 2012-09-07
INCOMPATIBLE CHANGES
@ -610,6 +637,20 @@ Release 0.23.6 - UNRELEASED
MAPREDUCE-4836. Elapsed time for running tasks on AM web UI tasks page is 0
(Ravi Prakash via jeagles)
MAPREDUCE-4842. Shuffle race can hang reducer (Mariappan Asokan via jlowe)
MAPREDUCE-4833. Task can get stuck in FAIL_CONTAINER_CLEANUP (Robert
Parker via jlowe)
MAPREDUCE-4793. Problem with adding resources when using both -files and
-file to hadoop streaming (jlowe)
MAPREDUCE-4890. Invalid TaskImpl state transitions when task fails while
speculating (jlowe)
MAPREDUCE-4902. Fix typo "receievd" should be "received" in log output
(Albert Chu via jlowe)
Release 0.23.5 - UNRELEASED
INCOMPATIBLE CHANGES

View File

@ -137,11 +137,6 @@
<Class name="org.apache.hadoop.mapred.Task$TaskReporter" />
<Method name="run" />
<Bug pattern="DM_EXIT" />
</Match>
<Match>
<Class name="org.apache.hadoop.mapreduce.security.token.DelegationTokenRenewal$DelegationTokenCancelThread" />
<Method name="run" />
<Bug pattern="DM_EXIT" />
</Match>
<!--
We need to cast objects between old and new api objects

View File

@ -231,7 +231,12 @@ public abstract class TaskImpl implements Task, EventHandler<TaskEvent> {
// Transitions from FAILED state
.addTransition(TaskStateInternal.FAILED, TaskStateInternal.FAILED,
EnumSet.of(TaskEventType.T_KILL,
TaskEventType.T_ADD_SPEC_ATTEMPT))
TaskEventType.T_ADD_SPEC_ATTEMPT,
TaskEventType.T_ATTEMPT_COMMIT_PENDING,
TaskEventType.T_ATTEMPT_FAILED,
TaskEventType.T_ATTEMPT_KILLED,
TaskEventType.T_ATTEMPT_LAUNCHED,
TaskEventType.T_ATTEMPT_SUCCEEDED))
// Transitions from KILLED state
.addTransition(TaskStateInternal.KILLED, TaskStateInternal.KILLED,
@ -942,6 +947,13 @@ public abstract class TaskImpl implements Task, EventHandler<TaskEvent> {
taskAttemptId,
TaskAttemptCompletionEventStatus.TIPFAILED);
// issue kill to all non finished attempts
for (TaskAttempt taskAttempt : task.attempts.values()) {
task.killUnfinishedAttempt
(taskAttempt, "Task has failed. Killing attempt!");
}
task.inProgressAttempts.clear();
if (task.historyTaskStartGenerated) {
TaskFailedEvent taskFailedEvent = createTaskFailedEvent(task, attempt.getDiagnostics(),
TaskStateInternal.FAILED, taskAttemptId);

View File

@ -191,12 +191,9 @@ public class ContainerLauncherImpl extends AbstractService implements
@SuppressWarnings("unchecked")
public synchronized void kill() {
if(isCompletelyDone()) {
return;
}
if(this.state == ContainerState.PREP) {
this.state = ContainerState.KILLED_BEFORE_LAUNCH;
} else {
} else if (!isCompletelyDone()) {
LOG.info("KILLING " + taskAttemptID);
ContainerManager proxy = null;

View File

@ -602,4 +602,73 @@ public class TestTaskImpl {
assertTaskScheduledState();
assertEquals(3, taskAttempts.size());
}
@Test
public void testFailedTransitions() {
mockTask = new MockTaskImpl(jobId, partition, dispatcher.getEventHandler(),
remoteJobConfFile, conf, taskAttemptListener, committer, jobToken,
credentials, clock,
completedTasksFromPreviousRun, startCount,
metrics, appContext, TaskType.MAP) {
@Override
protected int getMaxAttempts() {
return 1;
}
};
TaskId taskId = getNewTaskID();
scheduleTaskAttempt(taskId);
launchTaskAttempt(getLastAttempt().getAttemptId());
// add three more speculative attempts
mockTask.handle(new TaskTAttemptEvent(getLastAttempt().getAttemptId(),
TaskEventType.T_ADD_SPEC_ATTEMPT));
launchTaskAttempt(getLastAttempt().getAttemptId());
mockTask.handle(new TaskTAttemptEvent(getLastAttempt().getAttemptId(),
TaskEventType.T_ADD_SPEC_ATTEMPT));
launchTaskAttempt(getLastAttempt().getAttemptId());
mockTask.handle(new TaskTAttemptEvent(getLastAttempt().getAttemptId(),
TaskEventType.T_ADD_SPEC_ATTEMPT));
launchTaskAttempt(getLastAttempt().getAttemptId());
assertEquals(4, taskAttempts.size());
// have the first attempt fail, verify task failed due to no retries
MockTaskAttemptImpl taskAttempt = taskAttempts.get(0);
taskAttempt.setState(TaskAttemptState.FAILED);
mockTask.handle(new TaskTAttemptEvent(taskAttempt.getAttemptId(),
TaskEventType.T_ATTEMPT_FAILED));
assertEquals(TaskState.FAILED, mockTask.getState());
// verify task can no longer be killed
mockTask.handle(new TaskEvent(taskId, TaskEventType.T_KILL));
assertEquals(TaskState.FAILED, mockTask.getState());
// verify speculative doesn't launch new tasks
mockTask.handle(new TaskTAttemptEvent(getLastAttempt().getAttemptId(),
TaskEventType.T_ADD_SPEC_ATTEMPT));
mockTask.handle(new TaskTAttemptEvent(getLastAttempt().getAttemptId(),
TaskEventType.T_ATTEMPT_LAUNCHED));
assertEquals(TaskState.FAILED, mockTask.getState());
assertEquals(4, taskAttempts.size());
// verify attempt events from active tasks don't knock task out of FAILED
taskAttempt = taskAttempts.get(1);
taskAttempt.setState(TaskAttemptState.COMMIT_PENDING);
mockTask.handle(new TaskTAttemptEvent(taskAttempt.getAttemptId(),
TaskEventType.T_ATTEMPT_COMMIT_PENDING));
assertEquals(TaskState.FAILED, mockTask.getState());
taskAttempt.setState(TaskAttemptState.FAILED);
mockTask.handle(new TaskTAttemptEvent(taskAttempt.getAttemptId(),
TaskEventType.T_ATTEMPT_FAILED));
assertEquals(TaskState.FAILED, mockTask.getState());
taskAttempt = taskAttempts.get(2);
taskAttempt.setState(TaskAttemptState.SUCCEEDED);
mockTask.handle(new TaskTAttemptEvent(taskAttempt.getAttemptId(),
TaskEventType.T_ATTEMPT_SUCCEEDED));
assertEquals(TaskState.FAILED, mockTask.getState());
taskAttempt = taskAttempts.get(3);
taskAttempt.setState(TaskAttemptState.KILLED);
mockTask.handle(new TaskTAttemptEvent(taskAttempt.getAttemptId(),
TaskEventType.T_ATTEMPT_KILLED));
assertEquals(TaskState.FAILED, mockTask.getState());
}
}

View File

@ -6,8 +6,12 @@ import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.never;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
import static org.mockito.Mockito.atLeast;
import org.mockito.ArgumentCaptor;
import java.net.InetSocketAddress;
import java.util.concurrent.BrokenBarrierException;
import java.util.concurrent.CyclicBarrier;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@ -18,15 +22,21 @@ import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
import org.apache.hadoop.mapreduce.v2.api.records.TaskType;
import org.apache.hadoop.mapreduce.v2.app.AppContext;
import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEventType;
import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncher.EventType;
import org.apache.hadoop.mapreduce.v2.util.MRBuilderUtils;
import org.apache.hadoop.yarn.api.ContainerManager;
import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusRequest;
import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusResponse;
import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
import org.apache.hadoop.yarn.api.protocolrecords.StartContainerResponse;
import org.apache.hadoop.yarn.api.protocolrecords.StopContainerRequest;
import org.apache.hadoop.yarn.api.protocolrecords.StopContainerResponse;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.event.Event;
import org.apache.hadoop.yarn.event.EventHandler;
import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
import org.apache.hadoop.yarn.factories.RecordFactory;
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
import org.apache.hadoop.yarn.ipc.YarnRPC;
@ -272,6 +282,150 @@ public class TestContainerLauncherImpl {
} finally {
ut.stop();
verify(mockCM).stopContainer(any(StopContainerRequest.class));
}
}
}
@SuppressWarnings({ "rawtypes", "unchecked" })
@Test
public void testContainerCleaned() throws Exception {
LOG.info("STARTING testContainerCleaned");
CyclicBarrier startLaunchBarrier = new CyclicBarrier(2);
CyclicBarrier completeLaunchBarrier = new CyclicBarrier(2);
YarnRPC mockRpc = mock(YarnRPC.class);
AppContext mockContext = mock(AppContext.class);
EventHandler mockEventHandler = mock(EventHandler.class);
when(mockContext.getEventHandler()).thenReturn(mockEventHandler);
ContainerManager mockCM = new ContainerManagerForTest(startLaunchBarrier, completeLaunchBarrier);
when(mockRpc.getProxy(eq(ContainerManager.class),
any(InetSocketAddress.class), any(Configuration.class)))
.thenReturn(mockCM);
ContainerLauncherImplUnderTest ut =
new ContainerLauncherImplUnderTest(mockContext, mockRpc);
Configuration conf = new Configuration();
ut.init(conf);
ut.start();
try {
ContainerId contId = makeContainerId(0l, 0, 0, 1);
TaskAttemptId taskAttemptId = makeTaskAttemptId(0l, 0, 0, TaskType.MAP, 0);
String cmAddress = "127.0.0.1:8000";
StartContainerResponse startResp =
recordFactory.newRecordInstance(StartContainerResponse.class);
startResp.setServiceResponse(ShuffleHandler.MAPREDUCE_SHUFFLE_SERVICEID,
ShuffleHandler.serializeMetaData(80));
LOG.info("inserting launch event");
ContainerRemoteLaunchEvent mockLaunchEvent =
mock(ContainerRemoteLaunchEvent.class);
when(mockLaunchEvent.getType())
.thenReturn(EventType.CONTAINER_REMOTE_LAUNCH);
when(mockLaunchEvent.getContainerID())
.thenReturn(contId);
when(mockLaunchEvent.getTaskAttemptID()).thenReturn(taskAttemptId);
when(mockLaunchEvent.getContainerMgrAddress()).thenReturn(cmAddress);
ut.handle(mockLaunchEvent);
startLaunchBarrier.await();
LOG.info("inserting cleanup event");
ContainerLauncherEvent mockCleanupEvent =
mock(ContainerLauncherEvent.class);
when(mockCleanupEvent.getType())
.thenReturn(EventType.CONTAINER_REMOTE_CLEANUP);
when(mockCleanupEvent.getContainerID())
.thenReturn(contId);
when(mockCleanupEvent.getTaskAttemptID()).thenReturn(taskAttemptId);
when(mockCleanupEvent.getContainerMgrAddress()).thenReturn(cmAddress);
ut.handle(mockCleanupEvent);
completeLaunchBarrier.await();
ut.waitForPoolToIdle();
ArgumentCaptor<Event> arg = ArgumentCaptor.forClass(Event.class);
verify(mockEventHandler, atLeast(2)).handle(arg.capture());
boolean containerCleaned = false;
for (int i =0; i < arg.getAllValues().size(); i++) {
LOG.info(arg.getAllValues().get(i).toString());
Event currentEvent = arg.getAllValues().get(i);
if (currentEvent.getType() == TaskAttemptEventType.TA_CONTAINER_CLEANED) {
containerCleaned = true;
}
}
assert(containerCleaned);
} finally {
ut.stop();
}
}
private static class ContainerManagerForTest implements ContainerManager {
private CyclicBarrier startLaunchBarrier;
private CyclicBarrier completeLaunchBarrier;
ContainerManagerForTest (CyclicBarrier startLaunchBarrier, CyclicBarrier completeLaunchBarrier) {
this.startLaunchBarrier = startLaunchBarrier;
this.completeLaunchBarrier = completeLaunchBarrier;
}
@Override
public StartContainerResponse startContainer(StartContainerRequest request)
throws YarnRemoteException {
try {
startLaunchBarrier.await();
completeLaunchBarrier.await();
//To ensure the kill is started before the launch
Thread.sleep(100);
} catch (InterruptedException e) {
e.printStackTrace();
} catch (BrokenBarrierException e) {
e.printStackTrace();
}
throw new ContainerException("Force fail CM");
}
@Override
public StopContainerResponse stopContainer(StopContainerRequest request)
throws YarnRemoteException {
return null;
}
@Override
public GetContainerStatusResponse getContainerStatus(
GetContainerStatusRequest request) throws YarnRemoteException {
return null;
}
}
@SuppressWarnings("serial")
private static class ContainerException extends YarnRemoteException {
public ContainerException(String message) {
super(message);
}
@Override
public String getRemoteTrace() {
return null;
}
@Override
public YarnRemoteException getCause() {
return null;
}
}
}

Some files were not shown because too many files have changed in this diff Show More