Merge r1440222 through r1441205 from trunk.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-2802@1441206 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
commit
40df526bd3
|
@ -325,6 +325,9 @@ Trunk (Unreleased)
|
|||
HADOOP-9249. hadoop-maven-plugins version-info goal causes build failure
|
||||
when running with Clover. (Chris Nauroth via suresh)
|
||||
|
||||
HADOOP-9264. Port change to use Java untar API on Windows from
|
||||
branch-1-win to trunk. (Chris Nauroth via suresh)
|
||||
|
||||
OPTIMIZATIONS
|
||||
|
||||
HADOOP-7761. Improve the performance of raw comparisons. (todd)
|
||||
|
@ -586,6 +589,10 @@ Release 2.0.3-alpha - Unreleased
|
|||
HADOOP-8857. hadoop.http.authentication.signature.secret.file docs
|
||||
should not state that secret is randomly generated. (tucu)
|
||||
|
||||
HADOOP-9221. Convert remaining xdocs to APT. (Andy Isaacson via atm)
|
||||
|
||||
HADOOP-8981. TestMetricsSystemImpl fails on Windows. (Xuan Gong via suresh)
|
||||
|
||||
Release 2.0.2-alpha - 2012-09-07
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
|
|
@ -241,6 +241,11 @@
|
|||
<type>test-jar</type>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
<artifactId>commons-compress</artifactId>
|
||||
<version>1.4</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
|
@ -381,6 +386,23 @@
|
|||
</target>
|
||||
</configuration>
|
||||
</execution>
|
||||
<execution>
|
||||
<id>copy-test-tarballs</id>
|
||||
<phase>process-test-resources</phase>
|
||||
<goals>
|
||||
<goal>run</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<target>
|
||||
<copy toDir="${test.cache.data}">
|
||||
<fileset dir="${basedir}/src/test/java/org/apache/hadoop/fs">
|
||||
<include name="test-untar.tar"/>
|
||||
<include name="test-untar.tgz"/>
|
||||
</fileset>
|
||||
</copy>
|
||||
</target>
|
||||
</configuration>
|
||||
</execution>
|
||||
<execution>
|
||||
<phase>pre-site</phase>
|
||||
<goals>
|
||||
|
@ -485,6 +507,7 @@
|
|||
<exclude>src/test/all-tests</exclude>
|
||||
<exclude>src/test/resources/kdc/ldif/users.ldif</exclude>
|
||||
<exclude>src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4.c</exclude>
|
||||
<exclude>src/test/java/org/apache/hadoop/fs/test-untar.tgz</exclude>
|
||||
</excludes>
|
||||
</configuration>
|
||||
</plugin>
|
||||
|
|
|
@ -1,109 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Copyright 2002-2004 The Apache Software Foundation
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN"
|
||||
"http://forrest.apache.org/dtd/document-v20.dtd">
|
||||
|
||||
|
||||
<document>
|
||||
|
||||
<header>
|
||||
<title>
|
||||
Superusers Acting On Behalf Of Other Users
|
||||
</title>
|
||||
</header>
|
||||
|
||||
<body>
|
||||
<section>
|
||||
<title> Introduction </title>
|
||||
<p>
|
||||
This document describes how a superuser can submit jobs or access hdfs on behalf of another user in a secured way.
|
||||
</p>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<title> Use Case </title>
|
||||
<p>
|
||||
The code example described in the next section is applicable for the following use case.
|
||||
</p>
|
||||
<p>
|
||||
A superuser with username 'super' wants to submit job and access hdfs on behalf of a user joe. The superuser has kerberos credentials but user joe doesn't have any. The tasks are required to run as user joe and any file accesses on namenode are required to be done as user joe. It is required that user joe can connect to the namenode or job tracker on a connection authenticated with super's kerberos credentials. In other words super is impersonating the user joe.
|
||||
</p>
|
||||
</section>
|
||||
|
||||
|
||||
<section>
|
||||
<title> Code example </title>
|
||||
<p>
|
||||
In this example super's kerberos credentials are used for login and a proxy user ugi object is created for joe. The operations are performed within the doAs method of this proxy user ugi object.
|
||||
</p>
|
||||
<source>
|
||||
...
|
||||
//Create ugi for joe. The login user is 'super'.
|
||||
UserGroupInformation ugi =
|
||||
UserGroupInformation.createProxyUser("joe", UserGroupInformation.getLoginUser());
|
||||
ugi.doAs(new PrivilegedExceptionAction<Void>() {
|
||||
public Void run() throws Exception {
|
||||
//Submit a job
|
||||
JobClient jc = new JobClient(conf);
|
||||
jc.submitJob(conf);
|
||||
//OR access hdfs
|
||||
FileSystem fs = FileSystem.get(conf);
|
||||
fs.mkdir(someFilePath);
|
||||
}
|
||||
}
|
||||
</source>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<title> Configurations </title>
|
||||
<p>
|
||||
The superuser must be configured on namenode and jobtracker to be allowed to impersonate another user. Following configurations are required.
|
||||
</p>
|
||||
<source>
|
||||
<property>
|
||||
<name>hadoop.proxyuser.super.groups</name>
|
||||
<value>group1,group2</value>
|
||||
<description>Allow the superuser super to impersonate any members of the group group1 and group2</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>hadoop.proxyuser.super.hosts</name>
|
||||
<value>host1,host2</value>
|
||||
<description>The superuser can connect only from host1 and host2 to impersonate a user</description>
|
||||
</property>
|
||||
</source>
|
||||
<p>
|
||||
If these configurations are not present, impersonation will not be allowed and connection will fail.
|
||||
</p>
|
||||
<p>
|
||||
If more lax security is preferred, the wildcard value <code>*</code> may be used to allow impersonation from any host or of any user.
|
||||
</p>
|
||||
</section>
|
||||
|
||||
|
||||
<section>
|
||||
<title> Caveats </title>
|
||||
<p>
|
||||
The superuser must have kerberos credentials to be able to impersonate another user. It cannot use delegation tokens for this feature. It would be wrong if superuser adds its own delegation token to the proxy user ugi, as it will allow the proxy user to connect to the service with the privileges of the superuser.
|
||||
</p>
|
||||
<p>
|
||||
However, if the superuser does want to give a delegation token to joe, it must first impersonate joe and get a delegation token for joe, in the same way as the code example above, and add it to the ugi of joe. In this way the delegation token will have the owner as joe.
|
||||
</p>
|
||||
</section>
|
||||
</body>
|
||||
</document>
|
||||
|
|
@ -1,147 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Copyright 2002-2004 The Apache Software Foundation
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN"
|
||||
"http://forrest.apache.org/dtd/document-v20.dtd">
|
||||
|
||||
|
||||
<document>
|
||||
|
||||
<header>
|
||||
<title>
|
||||
Hadoop Deployment Layout
|
||||
</title>
|
||||
</header>
|
||||
|
||||
<body>
|
||||
<section>
|
||||
<title> Introduction </title>
|
||||
<p>
|
||||
This document describes the standard deployment layout for Hadoop. With increased complexity and evolving Hadoop ecosystem, having standard deployment layout ensures better integration between Hadoop sub-projects. By making the installation process easier, we can lower the barrier to entry and increase Hadoop adoption.
|
||||
</p>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<title> Packages </title>
|
||||
<p>
|
||||
We need to divide Hadoop up into packages that can be independently upgraded. The list of packages should include:
|
||||
</p>
|
||||
<ul>
|
||||
<li>Hadoop Common - Common including the native code and required jar files.</li>
|
||||
<li>HDFS Client - HDFS jars, scripts, and shared libraries.</li>
|
||||
<li>HDFS Server - jsvc executable</li>
|
||||
<li>Yarn Client - Yarn client jars and scripts</li>
|
||||
<li>Yarn Server - Yarn server jars and scripts</li>
|
||||
<li>MapReduce - MapReduce jars, scripts, and shared libraries</li>
|
||||
<li>LZO - LZ0 codec from github.com/omally/hadoop-gpl-compression</li>
|
||||
<li>Metrics - Plugins for Chukwa and Ganglia</li>
|
||||
</ul>
|
||||
<p>Packages from other teams will include:</p>
|
||||
<ul>
|
||||
<li>Pig</li>
|
||||
<li>Hive</li>
|
||||
<li>Oozie client</li>
|
||||
<li>Oozie server</li>
|
||||
<li>Howl client</li>
|
||||
<li>Howl server</li>
|
||||
</ul>
|
||||
<p>These packages should be deployable with RPM on RedHat. We also need a package that depends on a version of each of these packages. In general, we can generate tarballs in the new deployment layout.</p>
|
||||
<p>Note that some packages, like Pig, which are user facing, will have 2 versions installed in a given deployment. This will be accomplished by modifying the package name and the associated binaries to include the version number.</p>
|
||||
<p>All of the following paths are based on a prefix directory that is the root of the installation. Our packages must support having multiple Hadoop stack installation on a computer at the same time. For RPMs, this means that the packages must be relocatable and honor the --prefix option.</p>
|
||||
</section>
|
||||
|
||||
|
||||
<section>
|
||||
<title> Deployment </title>
|
||||
<p>It is important to have a standard deployment that results from installing the packages regardless of the package manager. Here are the top level directories and a sample of what would be under each. Note that all of the packages are installed "flattened" into the prefix directory. For compatibility reasons, we should create "share/hadoop" that matches the old HADOOP_PREFIX and set the HADOOP_PREFIX variable to that.</p>
|
||||
<source>
|
||||
$PREFIX/ bin / hadoop
|
||||
| | mapred
|
||||
| | pig -> pig7
|
||||
| | pig6
|
||||
| + pig7
|
||||
|
|
||||
+ etc / hadoop / core-site.xml
|
||||
| | hdfs-site.xml
|
||||
| + mapred-site.xml
|
||||
|
|
||||
+ include / hadoop / Pipes.hh
|
||||
| | + TemplateFactory.hh
|
||||
| + hdfs.h
|
||||
|
|
||||
+ lib / jni / hadoop-common / libhadoop.so.0.20.0
|
||||
| |
|
||||
| | libhdfs.so -> libhdfs.so.0.20.0
|
||||
| + libhdfs.so.0.20.0
|
||||
|
|
||||
+ libexec / task-controller
|
||||
|
|
||||
+ man / man1 / hadoop.1
|
||||
| | mapred.1
|
||||
| | pig6.1
|
||||
| + pig7.1
|
||||
|
|
||||
+ share / hadoop-common
|
||||
| | hadoop-hdfs
|
||||
| | hadoop-mapreduce
|
||||
| | pig6
|
||||
| + pig7
|
||||
|
|
||||
+ sbin / hdfs-admin
|
||||
| | mapred-admin
|
||||
|
|
||||
+ src / hadoop-common
|
||||
| | hadoop-hdfs
|
||||
| + hadoop-mapreduce
|
||||
|
|
||||
+ var / lib / data-node
|
||||
| + task-tracker
|
||||
|
|
||||
| log / hadoop-datanode
|
||||
| + hadoop-tasktracker
|
||||
|
|
||||
+ run / hadoop-datanode.pid
|
||||
+ hadoop-tasktracker.pid
|
||||
</source>
|
||||
<p>Note that we must continue to honor HADOOP_CONF_DIR to override the configuration location, but that it should default to $prefix/etc. User facing binaries and scripts go into bin. Configuration files go into etc with multiple configuration files having a directory. JNI shared libraries go into lib/jni/$tool since Java does not allow to specify the version of the library to load. Libraries that aren't loaded via System.loadLibrary are placed directly under lib. 64 bit versions of the libraries for platforms that support them should be placed in lib64. All of the architecture-independent pieces, including the jars for each tool will be placed in share/$tool. The default location for all the run time information will be in var. The storage will be in var/lib, the logs in var/log and the pid files in var/run.</p>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<title> Path Configurations </title>
|
||||
<p>Path can be configured at compile phase or installation phase. For RPM, it takes advantage of the --relocate directive to allow path reconfiguration at install phase. For Debian package, path is configured at compile phase.
|
||||
</p>
|
||||
<p>Build phase parameter:</p>
|
||||
<ul>
|
||||
<li>package.prefix - Location of package prefix (Default /usr)</li>
|
||||
<li>package.conf.dir - Location of configuration directory (Default /etc/hadoop)</li>
|
||||
<li>package.log.dir - Location of log directory (Default /var/log/hadoop)</li>
|
||||
<li>package.pid.dir - Location of pid directory (Default /var/run/hadoop)</li>
|
||||
</ul>
|
||||
|
||||
<p>Install phase parameter:</p>
|
||||
<source>
|
||||
rpm -i hadoop-[version]-[rev].[arch].rpm \
|
||||
--relocate /usr=/usr/local/hadoop \
|
||||
--relocate /etc/hadoop=/usr/local/etc/hadoop \
|
||||
--relocate /var/log/hadoop=/opt/logs/hadoop \
|
||||
--relocate /var/run/hadoop=/opt/run/hadoop
|
||||
</source>
|
||||
</section>
|
||||
|
||||
</body>
|
||||
</document>
|
||||
|
|
@ -1,232 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" "http://forrest.apache.org/dtd/document-v20.dtd">
|
||||
|
||||
<document>
|
||||
|
||||
<header>
|
||||
<title>Native Libraries Guide</title>
|
||||
</header>
|
||||
|
||||
<body>
|
||||
|
||||
<section>
|
||||
<title>Overview</title>
|
||||
|
||||
<p>This guide describes the native hadoop library and includes a small discussion about native shared libraries.</p>
|
||||
|
||||
<p><strong>Note:</strong> Depending on your environment, the term "native libraries" <em>could</em>
|
||||
refer to all *.so's you need to compile; and, the term "native compression" <em>could</em> refer to all *.so's
|
||||
you need to compile that are specifically related to compression.
|
||||
Currently, however, this document only addresses the native hadoop library (<em>libhadoop.so</em>).</p>
|
||||
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<title>Native Hadoop Library </title>
|
||||
|
||||
<p>Hadoop has native implementations of certain components for
|
||||
performance reasons and for non-availability of Java implementations. These
|
||||
components are available in a single, dynamically-linked native library called
|
||||
the native hadoop library. On the *nix platforms the library is named <em>libhadoop.so</em>. </p>
|
||||
|
||||
<section>
|
||||
<title>Usage</title>
|
||||
|
||||
<p>It is fairly easy to use the native hadoop library:</p>
|
||||
|
||||
<ol>
|
||||
<li>
|
||||
Review the <a href="#Components">components</a>.
|
||||
</li>
|
||||
<li>
|
||||
Review the <a href="#Supported+Platforms">supported platforms</a>.
|
||||
</li>
|
||||
<li>
|
||||
Either <a href="#Download">download</a> a hadoop release, which will
|
||||
include a pre-built version of the native hadoop library, or
|
||||
<a href="#Build">build</a> your own version of the
|
||||
native hadoop library. Whether you download or build, the name for the library is
|
||||
the same: <em>libhadoop.so</em>
|
||||
</li>
|
||||
<li>
|
||||
Install the compression codec development packages
|
||||
(<strong>>zlib-1.2</strong>, <strong>>gzip-1.2</strong>):
|
||||
<ul>
|
||||
<li>If you download the library, install one or more development packages -
|
||||
whichever compression codecs you want to use with your deployment.</li>
|
||||
<li>If you build the library, it is <strong>mandatory</strong>
|
||||
to install both development packages.</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li>
|
||||
Check the <a href="#Runtime">runtime</a> log files.
|
||||
</li>
|
||||
</ol>
|
||||
</section>
|
||||
<section>
|
||||
<title>Components</title>
|
||||
<p>The native hadoop library includes two components, the zlib and gzip
|
||||
<a href="http://hadoop.apache.org/common/docs/current/api/org/apache/hadoop/io/compress/CompressionCodec.html">
|
||||
compression codecs</a>:
|
||||
</p>
|
||||
<ul>
|
||||
<li><a href="ext:zlib">zlib</a></li>
|
||||
<li><a href="ext:gzip">gzip</a></li>
|
||||
</ul>
|
||||
<p>The native hadoop library is imperative for gzip to work.</p>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<title>Supported Platforms</title>
|
||||
|
||||
<p>The native hadoop library is supported on *nix platforms only.
|
||||
The library does not to work with <a href="ext:cygwin">Cygwin</a>
|
||||
or the <a href="ext:osx">Mac OS X</a> platform.</p>
|
||||
|
||||
<p>The native hadoop library is mainly used on the GNU/Linus platform and
|
||||
has been tested on these distributions:</p>
|
||||
<ul>
|
||||
<li>
|
||||
<a href="http://www.redhat.com/rhel/">RHEL4</a>/<a href="http://fedora.redhat.com/">Fedora</a>
|
||||
</li>
|
||||
<li><a href="http://www.ubuntu.com/">Ubuntu</a></li>
|
||||
<li><a href="http://www.gentoo.org/">Gentoo</a></li>
|
||||
</ul>
|
||||
|
||||
<p>On all the above distributions a 32/64 bit native hadoop library will work
|
||||
with a respective 32/64 bit jvm.</p>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<title>Download</title>
|
||||
|
||||
<p>The pre-built 32-bit i386-Linux native hadoop library is available as part of the
|
||||
hadoop distribution and is located in the <code>lib/native</code> directory. You can download the
|
||||
hadoop distribution from <a href="ext:releases/download">Hadoop Common Releases</a>.</p>
|
||||
|
||||
<p>Be sure to install the zlib and/or gzip development packages - whichever compression
|
||||
codecs you want to use with your deployment.</p>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<title>Build</title>
|
||||
|
||||
<p>The native hadoop library is written in <a href="http://en.wikipedia.org/wiki/ANSI_C">ANSI C</a>
|
||||
and is built using the GNU autotools-chain (autoconf, autoheader, automake, autoscan, libtool).
|
||||
This means it should be straight-forward to build the library on any platform with a standards-compliant
|
||||
C compiler and the GNU autotools-chain (see the <a href="#Supported+Platforms">supported platforms</a>).</p>
|
||||
|
||||
<p>The packages you need to install on the target platform are:</p>
|
||||
<ul>
|
||||
<li>
|
||||
C compiler (e.g. <a href="http://gcc.gnu.org/">GNU C Compiler</a>)
|
||||
</li>
|
||||
<li>
|
||||
GNU Autools Chain:
|
||||
<a href="http://www.gnu.org/software/autoconf/">autoconf</a>,
|
||||
<a href="http://www.gnu.org/software/automake/">automake</a>,
|
||||
<a href="http://www.gnu.org/software/libtool/">libtool</a>
|
||||
</li>
|
||||
<li>
|
||||
zlib-development package (stable version >= 1.2.0)
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
<p>Once you installed the prerequisite packages use the standard hadoop <code>build.xml</code>
|
||||
file and pass along the <code>compile.native</code> flag (set to <code>true</code>) to build the native hadoop library:</p>
|
||||
|
||||
<p><code>$ ant -Dcompile.native=true <target></code></p>
|
||||
|
||||
<p>You should see the newly-built library in:</p>
|
||||
|
||||
<p><code>$ build/native/<platform>/lib</code></p>
|
||||
|
||||
<p>where <<code>platform</code>> is a combination of the system-properties:
|
||||
<code>${os.name}-${os.arch}-${sun.arch.data.model}</code> (for example, Linux-i386-32).</p>
|
||||
|
||||
<p>Please note the following:</p>
|
||||
<ul>
|
||||
<li>
|
||||
It is <strong>mandatory</strong> to install both the zlib and gzip
|
||||
development packages on the target platform in order to build the
|
||||
native hadoop library; however, for deployment it is sufficient to
|
||||
install just one package if you wish to use only one codec.
|
||||
</li>
|
||||
<li>
|
||||
It is necessary to have the correct 32/64 libraries for zlib,
|
||||
depending on the 32/64 bit jvm for the target platform, in order to
|
||||
build and deploy the native hadoop library.
|
||||
</li>
|
||||
</ul>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<title>Runtime</title>
|
||||
<p>The <code>bin/hadoop</code> script ensures that the native hadoop
|
||||
library is on the library path via the system property: <br/>
|
||||
<em>-Djava.library.path=<path></em></p>
|
||||
|
||||
<p>During runtime, check the hadoop log files for your MapReduce tasks.</p>
|
||||
|
||||
<ul>
|
||||
<li>If everything is all right, then:<br/><br/>
|
||||
<code> DEBUG util.NativeCodeLoader - Trying to load the custom-built native-hadoop library... </code><br/>
|
||||
<code> INFO util.NativeCodeLoader - Loaded the native-hadoop library </code><br/>
|
||||
</li>
|
||||
|
||||
<li>If something goes wrong, then:<br/><br/>
|
||||
<code>
|
||||
INFO util.NativeCodeLoader - Unable to load native-hadoop library for
|
||||
your platform... using builtin-java classes where applicable
|
||||
</code>
|
||||
|
||||
</li>
|
||||
</ul>
|
||||
</section>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<title>Native Shared Libraries</title>
|
||||
<p>You can load <strong>any</strong> native shared library using
|
||||
<a href="http://hadoop.apache.org/mapreduce/docs/current/mapred_tutorial.html#DistributedCache">DistributedCache</a>
|
||||
for <em>distributing</em> and <em>symlinking</em> the library files.</p>
|
||||
|
||||
<p>This example shows you how to distribute a shared library, <code>mylib.so</code>,
|
||||
and load it from a MapReduce task.</p>
|
||||
<ol>
|
||||
<li> First copy the library to the HDFS: <br/>
|
||||
<code>bin/hadoop fs -copyFromLocal mylib.so.1 /libraries/mylib.so.1</code>
|
||||
</li>
|
||||
<li> The job launching program should contain the following: <br/>
|
||||
<code> DistributedCache.createSymlink(conf); </code> <br/>
|
||||
<code> DistributedCache.addCacheFile("hdfs://host:port/libraries/mylib.so.1#mylib.so", conf);
|
||||
</code>
|
||||
</li>
|
||||
<li> The MapReduce task can contain: <br/>
|
||||
<code> System.loadLibrary("mylib.so"); </code>
|
||||
</li>
|
||||
</ol>
|
||||
|
||||
<p><br/><strong>Note:</strong> If you downloaded or built the native hadoop library, you don’t need to use DistibutedCache to
|
||||
make the library available to your MapReduce tasks.</p>
|
||||
</section>
|
||||
</body>
|
||||
|
||||
</document>
|
|
@ -1,222 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" "http://forrest.apache.org/dtd/document-v20.dtd">
|
||||
|
||||
<document>
|
||||
|
||||
<header>
|
||||
<title>Service Level Authorization Guide</title>
|
||||
</header>
|
||||
|
||||
<body>
|
||||
|
||||
<section>
|
||||
<title>Purpose</title>
|
||||
|
||||
<p>This document describes how to configure and manage <em>Service Level
|
||||
Authorization</em> for Hadoop.</p>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<title>Prerequisites</title>
|
||||
|
||||
<p>Make sure Hadoop is installed, configured and setup correctly. For more information see: </p>
|
||||
<ul>
|
||||
<li>
|
||||
<a href="single_node_setup.html">Single Node Setup</a> for first-time users.
|
||||
</li>
|
||||
<li>
|
||||
<a href="cluster_setup.html">Cluster Setup</a> for large, distributed clusters.
|
||||
</li>
|
||||
</ul>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<title>Overview</title>
|
||||
|
||||
<p>Service Level Authorization is the initial authorization mechanism to
|
||||
ensure clients connecting to a particular Hadoop <em>service</em> have the
|
||||
necessary, pre-configured, permissions and are authorized to access the given
|
||||
service. For example, a MapReduce cluster can use this mechanism to allow a
|
||||
configured list of users/groups to submit jobs.</p>
|
||||
|
||||
<p>The <code>${HADOOP_CONF_DIR}/hadoop-policy.xml</code> configuration file
|
||||
is used to define the access control lists for various Hadoop services.</p>
|
||||
|
||||
<p>Service Level Authorization is performed much before to other access
|
||||
control checks such as file-permission checks, access control on job queues
|
||||
etc.</p>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<title>Configuration</title>
|
||||
|
||||
<p>This section describes how to configure service-level authorization
|
||||
via the configuration file <code>{HADOOP_CONF_DIR}/hadoop-policy.xml</code>.
|
||||
</p>
|
||||
|
||||
<section>
|
||||
<title>Enable Service Level Authorization</title>
|
||||
|
||||
<p>By default, service-level authorization is disabled for Hadoop. To
|
||||
enable it set the configuration property
|
||||
<code>hadoop.security.authorization</code> to <strong>true</strong>
|
||||
in <code>${HADOOP_CONF_DIR}/core-site.xml</code>.</p>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<title>Hadoop Services and Configuration Properties</title>
|
||||
|
||||
<p>This section lists the various Hadoop services and their configuration
|
||||
knobs:</p>
|
||||
|
||||
<table>
|
||||
<tr>
|
||||
<th>Property</th>
|
||||
<th>Service</th>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>security.client.protocol.acl</code></td>
|
||||
<td>ACL for ClientProtocol, which is used by user code via the
|
||||
DistributedFileSystem.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>security.client.datanode.protocol.acl</code></td>
|
||||
<td>ACL for ClientDatanodeProtocol, the client-to-datanode protocol
|
||||
for block recovery.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>security.datanode.protocol.acl</code></td>
|
||||
<td>ACL for DatanodeProtocol, which is used by datanodes to
|
||||
communicate with the namenode.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>security.inter.datanode.protocol.acl</code></td>
|
||||
<td>ACL for InterDatanodeProtocol, the inter-datanode protocol
|
||||
for updating generation timestamp.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>security.namenode.protocol.acl</code></td>
|
||||
<td>ACL for NamenodeProtocol, the protocol used by the secondary
|
||||
namenode to communicate with the namenode.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>security.refresh.policy.protocol.acl</code></td>
|
||||
<td>ACL for RefreshAuthorizationPolicyProtocol, used by the
|
||||
dfsadmin and mradmin commands to refresh the security policy in-effect.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>security.ha.service.protocol.acl</code></td>
|
||||
<td>ACL for HAService protocol used by HAAdmin to manage the
|
||||
active and stand-by states of namenode.
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<title>Access Control Lists</title>
|
||||
|
||||
<p><code>${HADOOP_CONF_DIR}/hadoop-policy.xml</code> defines an access
|
||||
control list for each Hadoop service. Every access control list has a
|
||||
simple format:</p>
|
||||
|
||||
<p>The list of users and groups are both comma separated list of names.
|
||||
The two lists are separated by a space.</p>
|
||||
|
||||
<p>Example: <code>user1,user2 group1,group2</code>.</p>
|
||||
|
||||
<p>Add a blank at the beginning of the line if only a list of groups
|
||||
is to be provided, equivalently a comman-separated list of users followed
|
||||
by a space or nothing implies only a set of given users.</p>
|
||||
|
||||
<p>A special value of <strong>*</strong> implies that all users are
|
||||
allowed to access the service.</p>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<title>Refreshing Service Level Authorization Configuration</title>
|
||||
|
||||
<p>The service-level authorization configuration for the NameNode and
|
||||
JobTracker can be changed without restarting either of the Hadoop master
|
||||
daemons. The cluster administrator can change
|
||||
<code>${HADOOP_CONF_DIR}/hadoop-policy.xml</code> on the master nodes and
|
||||
instruct the NameNode and JobTracker to reload their respective
|
||||
configurations via the <em>-refreshServiceAcl</em> switch to
|
||||
<em>dfsadmin</em> and <em>mradmin</em> commands respectively.</p>
|
||||
|
||||
<p>Refresh the service-level authorization configuration for the
|
||||
NameNode:</p>
|
||||
<p>
|
||||
<code>$ bin/hadoop dfsadmin -refreshServiceAcl</code>
|
||||
</p>
|
||||
|
||||
<p>Refresh the service-level authorization configuration for the
|
||||
JobTracker:</p>
|
||||
<p>
|
||||
<code>$ bin/hadoop mradmin -refreshServiceAcl</code>
|
||||
</p>
|
||||
|
||||
<p>Of course, one can use the
|
||||
<code>security.refresh.policy.protocol.acl</code> property in
|
||||
<code>${HADOOP_CONF_DIR}/hadoop-policy.xml</code> to restrict access to
|
||||
the ability to refresh the service-level authorization configuration to
|
||||
certain users/groups.</p>
|
||||
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<title>Examples</title>
|
||||
|
||||
<p>Allow only users <code>alice</code>, <code>bob</code> and users in the
|
||||
<code>mapreduce</code> group to submit jobs to the MapReduce cluster:</p>
|
||||
|
||||
<source>
|
||||
<property>
|
||||
<name>security.job.submission.protocol.acl</name>
|
||||
<value>alice,bob mapreduce</value>
|
||||
</property>
|
||||
</source>
|
||||
|
||||
<p></p><p>Allow only DataNodes running as the users who belong to the
|
||||
group <code>datanodes</code> to communicate with the NameNode:</p>
|
||||
|
||||
<source>
|
||||
<property>
|
||||
<name>security.datanode.protocol.acl</name>
|
||||
<value>datanodes</value>
|
||||
</property>
|
||||
</source>
|
||||
|
||||
<p></p><p>Allow any user to talk to the HDFS cluster as a DFSClient:</p>
|
||||
|
||||
<source>
|
||||
<property>
|
||||
<name>security.client.protocol.acl</name>
|
||||
<value>*</value>
|
||||
</property>
|
||||
</source>
|
||||
|
||||
</section>
|
||||
</section>
|
||||
|
||||
</body>
|
||||
|
||||
</document>
|
|
@ -1,293 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" "http://forrest.apache.org/dtd/document-v20.dtd">
|
||||
|
||||
<document>
|
||||
|
||||
<header>
|
||||
<title>Single Node Setup</title>
|
||||
</header>
|
||||
|
||||
<body>
|
||||
|
||||
<section>
|
||||
<title>Purpose</title>
|
||||
|
||||
<p>This document describes how to set up and configure a single-node Hadoop
|
||||
installation so that you can quickly perform simple operations using Hadoop
|
||||
MapReduce and the Hadoop Distributed File System (HDFS).</p>
|
||||
|
||||
</section>
|
||||
|
||||
<section id="PreReqs">
|
||||
<title>Prerequisites</title>
|
||||
|
||||
<section>
|
||||
<title>Supported Platforms</title>
|
||||
|
||||
<ul>
|
||||
<li>
|
||||
GNU/Linux is supported as a development and production platform.
|
||||
Hadoop has been demonstrated on GNU/Linux clusters with 2000 nodes.
|
||||
</li>
|
||||
<li>
|
||||
Win32 is supported as a <em>development platform</em>. Distributed
|
||||
operation has not been well tested on Win32, so it is not
|
||||
supported as a <em>production platform</em>.
|
||||
</li>
|
||||
</ul>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<title>Required Software</title>
|
||||
<p>Required software for Linux and Windows include:</p>
|
||||
<ol>
|
||||
<li>
|
||||
Java<sup>TM</sup> 1.6.x, preferably from Sun, must be installed.
|
||||
</li>
|
||||
<li>
|
||||
<strong>ssh</strong> must be installed and <strong>sshd</strong> must
|
||||
be running to use the Hadoop scripts that manage remote Hadoop
|
||||
daemons.
|
||||
</li>
|
||||
</ol>
|
||||
<p>Additional requirements for Windows include:</p>
|
||||
<ol>
|
||||
<li>
|
||||
<a href="http://www.cygwin.com/">Cygwin</a> - Required for shell
|
||||
support in addition to the required software above.
|
||||
</li>
|
||||
</ol>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<title>Installing Software</title>
|
||||
|
||||
<p>If your cluster doesn't have the requisite software you will need to
|
||||
install it.</p>
|
||||
|
||||
<p>For example on Ubuntu Linux:</p>
|
||||
<p>
|
||||
<code>$ sudo apt-get install ssh</code><br/>
|
||||
<code>$ sudo apt-get install rsync</code>
|
||||
</p>
|
||||
|
||||
<p>On Windows, if you did not install the required software when you
|
||||
installed cygwin, start the cygwin installer and select the packages:</p>
|
||||
<ul>
|
||||
<li>openssh - the <em>Net</em> category</li>
|
||||
</ul>
|
||||
</section>
|
||||
|
||||
</section>
|
||||
|
||||
<section id="Download">
|
||||
<title>Download</title>
|
||||
|
||||
<p>
|
||||
To get a Hadoop distribution, download a recent
|
||||
<a href="ext:releases">stable release</a> from one of the Apache Download
|
||||
Mirrors.
|
||||
</p>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<title>Prepare to Start the Hadoop Cluster</title>
|
||||
<p>
|
||||
Unpack the downloaded Hadoop distribution. In the distribution, edit the
|
||||
file <code>conf/hadoop-env.sh</code> to define at least
|
||||
<code>JAVA_HOME</code> to be the root of your Java installation.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
Try the following command:<br/>
|
||||
<code>$ bin/hadoop</code><br/>
|
||||
This will display the usage documentation for the <strong>hadoop</strong>
|
||||
script.
|
||||
</p>
|
||||
|
||||
<p>Now you are ready to start your Hadoop cluster in one of the three supported
|
||||
modes:
|
||||
</p>
|
||||
<ul>
|
||||
<li>Local (Standalone) Mode</li>
|
||||
<li>Pseudo-Distributed Mode</li>
|
||||
<li>Fully-Distributed Mode</li>
|
||||
</ul>
|
||||
</section>
|
||||
|
||||
<section id="Local">
|
||||
<title>Standalone Operation</title>
|
||||
|
||||
<p>By default, Hadoop is configured to run in a non-distributed
|
||||
mode, as a single Java process. This is useful for debugging.</p>
|
||||
|
||||
<p>
|
||||
The following example copies the unpacked <code>conf</code> directory to
|
||||
use as input and then finds and displays every match of the given regular
|
||||
expression. Output is written to the given <code>output</code> directory.
|
||||
<br/>
|
||||
<code>$ mkdir input</code><br/>
|
||||
<code>$ cp conf/*.xml input</code><br/>
|
||||
<code>
|
||||
$ bin/hadoop jar hadoop-*-examples.jar grep input output 'dfs[a-z.]+'
|
||||
</code><br/>
|
||||
<code>$ cat output/*</code>
|
||||
</p>
|
||||
</section>
|
||||
|
||||
<section id="PseudoDistributed">
|
||||
<title>Pseudo-Distributed Operation</title>
|
||||
|
||||
<p>Hadoop can also be run on a single-node in a pseudo-distributed mode
|
||||
where each Hadoop daemon runs in a separate Java process.</p>
|
||||
|
||||
<section>
|
||||
<title>Configuration</title>
|
||||
<p>Use the following:
|
||||
<br/><br/>
|
||||
<code>conf/core-site.xml</code>:</p>
|
||||
|
||||
<source>
|
||||
<configuration>
|
||||
<property>
|
||||
<name>fs.defaultFS</name>
|
||||
<value>hdfs://localhost:9000</value>
|
||||
</property>
|
||||
</configuration>
|
||||
</source>
|
||||
|
||||
<p><br/><code>conf/hdfs-site.xml</code>:</p>
|
||||
<source>
|
||||
<configuration>
|
||||
<property>
|
||||
<name>dfs.replication</name>
|
||||
<value>1</value>
|
||||
</property>
|
||||
</configuration>
|
||||
</source>
|
||||
|
||||
|
||||
<p><br/><code>conf/mapred-site.xml</code>:</p>
|
||||
<source>
|
||||
<configuration>
|
||||
<property>
|
||||
<name>mapred.job.tracker</name>
|
||||
<value>localhost:9001</value>
|
||||
</property>
|
||||
</configuration>
|
||||
</source>
|
||||
|
||||
|
||||
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<title>Setup passphraseless <em>ssh</em></title>
|
||||
|
||||
<p>
|
||||
Now check that you can ssh to the localhost without a passphrase:<br/>
|
||||
<code>$ ssh localhost</code>
|
||||
</p>
|
||||
|
||||
<p>
|
||||
If you cannot ssh to localhost without a passphrase, execute the
|
||||
following commands:<br/>
|
||||
<code>$ ssh-keygen -t dsa -P '' -f ~/.ssh/id_dsa</code><br/>
|
||||
<code>$ cat ~/.ssh/id_dsa.pub >> ~/.ssh/authorized_keys</code>
|
||||
</p>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<title>Execution</title>
|
||||
|
||||
<p>
|
||||
Format a new distributed-filesystem:<br/>
|
||||
<code>$ bin/hadoop namenode -format</code>
|
||||
</p>
|
||||
|
||||
<p>
|
||||
Start the hadoop daemons:<br/>
|
||||
<code>$ bin/start-all.sh</code>
|
||||
</p>
|
||||
|
||||
<p>The hadoop daemon log output is written to the
|
||||
<code>${HADOOP_LOG_DIR}</code> directory (defaults to
|
||||
<code>${HADOOP_PREFIX}/logs</code>).</p>
|
||||
|
||||
<p>Browse the web interface for the NameNode and the JobTracker; by
|
||||
default they are available at:</p>
|
||||
<ul>
|
||||
<li>
|
||||
<code>NameNode</code> -
|
||||
<a href="http://localhost:50070/">http://localhost:50070/</a>
|
||||
</li>
|
||||
<li>
|
||||
<code>JobTracker</code> -
|
||||
<a href="http://localhost:50030/">http://localhost:50030/</a>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
<p>
|
||||
Copy the input files into the distributed filesystem:<br/>
|
||||
<code>$ bin/hadoop fs -put conf input</code>
|
||||
</p>
|
||||
|
||||
<p>
|
||||
Run some of the examples provided:<br/>
|
||||
<code>
|
||||
$ bin/hadoop jar hadoop-*-examples.jar grep input output 'dfs[a-z.]+'
|
||||
</code>
|
||||
</p>
|
||||
|
||||
<p>Examine the output files:</p>
|
||||
<p>
|
||||
Copy the output files from the distributed filesystem to the local
|
||||
filesytem and examine them:<br/>
|
||||
<code>$ bin/hadoop fs -get output output</code><br/>
|
||||
<code>$ cat output/*</code>
|
||||
</p>
|
||||
<p> or </p>
|
||||
<p>
|
||||
View the output files on the distributed filesystem:<br/>
|
||||
<code>$ bin/hadoop fs -cat output/*</code>
|
||||
</p>
|
||||
|
||||
<p>
|
||||
When you're done, stop the daemons with:<br/>
|
||||
<code>$ bin/stop-all.sh</code>
|
||||
</p>
|
||||
</section>
|
||||
</section>
|
||||
|
||||
<section id="FullyDistributed">
|
||||
<title>Fully-Distributed Operation</title>
|
||||
|
||||
<p>For information on setting up fully-distributed, non-trivial clusters
|
||||
see <a href="cluster_setup.html">Cluster Setup</a>.</p>
|
||||
</section>
|
||||
|
||||
<p>
|
||||
<em>Java and JNI are trademarks or registered trademarks of
|
||||
Sun Microsystems, Inc. in the United States and other countries.</em>
|
||||
</p>
|
||||
|
||||
</body>
|
||||
|
||||
</document>
|
|
@ -1128,6 +1128,17 @@ public abstract class FileSystem extends Configured implements Closeable {
|
|||
public abstract FSDataOutputStream append(Path f, int bufferSize,
|
||||
Progressable progress) throws IOException;
|
||||
|
||||
/**
|
||||
* Concat existing files together.
|
||||
* @param trg the path to the target destination.
|
||||
* @param psrcs the paths to the sources to use for the concatenation.
|
||||
* @throws IOException
|
||||
*/
|
||||
public void concat(final Path trg, final Path [] psrcs) throws IOException {
|
||||
throw new UnsupportedOperationException("Not implemented by the " +
|
||||
getClass().getSimpleName() + " FileSystem implementation");
|
||||
}
|
||||
|
||||
/**
|
||||
* Get replication.
|
||||
*
|
||||
|
|
|
@ -21,9 +21,12 @@ package org.apache.hadoop.fs;
|
|||
import java.io.*;
|
||||
import java.util.Arrays;
|
||||
import java.util.Enumeration;
|
||||
import java.util.zip.GZIPInputStream;
|
||||
import java.util.zip.ZipEntry;
|
||||
import java.util.zip.ZipFile;
|
||||
|
||||
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
|
||||
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.classification.InterfaceStability;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
|
@ -624,14 +627,28 @@ public class FileUtil {
|
|||
* @throws IOException
|
||||
*/
|
||||
public static void unTar(File inFile, File untarDir) throws IOException {
|
||||
if (!untarDir.mkdirs()) {
|
||||
if (!untarDir.mkdirs()) {
|
||||
if (!untarDir.isDirectory()) {
|
||||
throw new IOException("Mkdirs failed to create " + untarDir);
|
||||
}
|
||||
}
|
||||
|
||||
StringBuilder untarCommand = new StringBuilder();
|
||||
boolean gzipped = inFile.toString().endsWith("gz");
|
||||
if(Shell.WINDOWS) {
|
||||
// Tar is not native to Windows. Use simple Java based implementation for
|
||||
// tests and simple tar archives
|
||||
unTarUsingJava(inFile, untarDir, gzipped);
|
||||
}
|
||||
else {
|
||||
// spawn tar utility to untar archive for full fledged unix behavior such
|
||||
// as resolving symlinks in tar archives
|
||||
unTarUsingTar(inFile, untarDir, gzipped);
|
||||
}
|
||||
}
|
||||
|
||||
private static void unTarUsingTar(File inFile, File untarDir,
|
||||
boolean gzipped) throws IOException {
|
||||
StringBuffer untarCommand = new StringBuffer();
|
||||
if (gzipped) {
|
||||
untarCommand.append(" gzip -dc '");
|
||||
untarCommand.append(FileUtil.makeShellPath(inFile));
|
||||
|
@ -656,7 +673,62 @@ public class FileUtil {
|
|||
". Tar process exited with exit code " + exitcode);
|
||||
}
|
||||
}
|
||||
|
||||
private static void unTarUsingJava(File inFile, File untarDir,
|
||||
boolean gzipped) throws IOException {
|
||||
InputStream inputStream = null;
|
||||
if (gzipped) {
|
||||
inputStream = new BufferedInputStream(new GZIPInputStream(
|
||||
new FileInputStream(inFile)));
|
||||
} else {
|
||||
inputStream = new BufferedInputStream(new FileInputStream(inFile));
|
||||
}
|
||||
|
||||
TarArchiveInputStream tis = new TarArchiveInputStream(inputStream);
|
||||
|
||||
for (TarArchiveEntry entry = tis.getNextTarEntry(); entry != null;) {
|
||||
unpackEntries(tis, entry, untarDir);
|
||||
entry = tis.getNextTarEntry();
|
||||
}
|
||||
}
|
||||
|
||||
private static void unpackEntries(TarArchiveInputStream tis,
|
||||
TarArchiveEntry entry, File outputDir) throws IOException {
|
||||
if (entry.isDirectory()) {
|
||||
File subDir = new File(outputDir, entry.getName());
|
||||
if (!subDir.mkdir() && !subDir.isDirectory()) {
|
||||
throw new IOException("Mkdirs failed to create tar internal dir "
|
||||
+ outputDir);
|
||||
}
|
||||
|
||||
for (TarArchiveEntry e : entry.getDirectoryEntries()) {
|
||||
unpackEntries(tis, e, subDir);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
File outputFile = new File(outputDir, entry.getName());
|
||||
if (!outputDir.exists()) {
|
||||
if (!outputDir.mkdirs()) {
|
||||
throw new IOException("Mkdirs failed to create tar internal dir "
|
||||
+ outputDir);
|
||||
}
|
||||
}
|
||||
|
||||
int count;
|
||||
byte data[] = new byte[2048];
|
||||
BufferedOutputStream outputStream = new BufferedOutputStream(
|
||||
new FileOutputStream(outputFile));
|
||||
|
||||
while ((count = tis.read(data)) != -1) {
|
||||
outputStream.write(data, 0, count);
|
||||
}
|
||||
|
||||
outputStream.flush();
|
||||
outputStream.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Class for creating hardlinks.
|
||||
* Supports Unix, Cygwin, WindXP.
|
||||
|
|
|
@ -159,6 +159,11 @@ public class FilterFileSystem extends FileSystem {
|
|||
return fs.append(f, bufferSize, progress);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void concat(Path f, Path[] psrcs) throws IOException {
|
||||
fs.concat(f, psrcs);
|
||||
}
|
||||
|
||||
@Override
|
||||
public FSDataOutputStream create(Path f, FsPermission permission,
|
||||
boolean overwrite, int bufferSize, short replication, long blockSize,
|
||||
|
|
|
@ -55,6 +55,9 @@ public enum Errno {
|
|||
EPIPE,
|
||||
EDOM,
|
||||
ERANGE,
|
||||
ELOOP,
|
||||
ENAMETOOLONG,
|
||||
ENOTEMPTY,
|
||||
|
||||
UNKNOWN;
|
||||
}
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
*/
|
||||
package org.apache.hadoop.io.nativeio;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileDescriptor;
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
@ -293,4 +294,35 @@ public class NativeIO {
|
|||
stat.group = getName(IdCache.GROUP, stat.groupId);
|
||||
return stat;
|
||||
}
|
||||
|
||||
/**
|
||||
* A version of renameTo that throws a descriptive exception when it fails.
|
||||
*
|
||||
* @param src The source path
|
||||
* @param dst The destination path
|
||||
*
|
||||
* @throws NativeIOException On failure.
|
||||
*/
|
||||
public static void renameTo(File src, File dst)
|
||||
throws IOException {
|
||||
if (!nativeLoaded) {
|
||||
if (!src.renameTo(dst)) {
|
||||
throw new IOException("renameTo(src=" + src + ", dst=" +
|
||||
dst + ") failed.");
|
||||
}
|
||||
} else {
|
||||
renameTo0(src.getAbsolutePath(), dst.getAbsolutePath());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A version of renameTo that throws a descriptive exception when it fails.
|
||||
*
|
||||
* @param src The source path
|
||||
* @param dst The destination path
|
||||
*
|
||||
* @throws NativeIOException On failure.
|
||||
*/
|
||||
private static native void renameTo0(String src, String dst)
|
||||
throws NativeIOException;
|
||||
}
|
||||
|
|
|
@ -24,11 +24,12 @@
|
|||
#include <grp.h>
|
||||
#include <jni.h>
|
||||
#include <pwd.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "config.h"
|
||||
|
@ -502,6 +503,26 @@ ssize_t get_pw_buflen() {
|
|||
#endif
|
||||
return (ret > 512) ? ret : 512;
|
||||
}
|
||||
|
||||
JNIEXPORT void JNICALL
|
||||
Java_org_apache_hadoop_io_nativeio_NativeIO_renameTo0(JNIEnv *env,
|
||||
jclass clazz, jstring jsrc, jstring jdst)
|
||||
{
|
||||
const char *src = NULL, *dst = NULL;
|
||||
|
||||
src = (*env)->GetStringUTFChars(env, jsrc, NULL);
|
||||
if (!src) goto done; // exception was thrown
|
||||
dst = (*env)->GetStringUTFChars(env, jdst, NULL);
|
||||
if (!dst) goto done; // exception was thrown
|
||||
if (rename(src, dst)) {
|
||||
throw_ioe(env, errno);
|
||||
}
|
||||
|
||||
done:
|
||||
if (src) (*env)->ReleaseStringUTFChars(env, jsrc, src);
|
||||
if (dst) (*env)->ReleaseStringUTFChars(env, jdst, dst);
|
||||
}
|
||||
|
||||
/**
|
||||
* vim: sw=2: ts=2: et:
|
||||
*/
|
||||
|
|
|
@ -63,6 +63,9 @@ static errno_mapping_t ERRNO_MAPPINGS[] = {
|
|||
MAPPING(EPIPE),
|
||||
MAPPING(EDOM),
|
||||
MAPPING(ERANGE),
|
||||
MAPPING(ELOOP),
|
||||
MAPPING(ENAMETOOLONG),
|
||||
MAPPING(ENOTEMPTY),
|
||||
{-1, NULL}
|
||||
};
|
||||
|
||||
|
|
|
@ -0,0 +1,183 @@
|
|||
~~ Licensed under the Apache License, Version 2.0 (the "License");
|
||||
~~ you may not use this file except in compliance with the License.
|
||||
~~ You may obtain a copy of the License at
|
||||
~~
|
||||
~~ http://www.apache.org/licenses/LICENSE-2.0
|
||||
~~
|
||||
~~ Unless required by applicable law or agreed to in writing, software
|
||||
~~ distributed under the License is distributed on an "AS IS" BASIS,
|
||||
~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
~~ See the License for the specific language governing permissions and
|
||||
~~ limitations under the License. See accompanying LICENSE file.
|
||||
|
||||
---
|
||||
Native Libraries Guide
|
||||
---
|
||||
---
|
||||
${maven.build.timestamp}
|
||||
|
||||
Native Libraries Guide
|
||||
|
||||
%{toc|section=1|fromDepth=0}
|
||||
|
||||
* Overview
|
||||
|
||||
This guide describes the native hadoop library and includes a small
|
||||
discussion about native shared libraries.
|
||||
|
||||
Note: Depending on your environment, the term "native libraries" could
|
||||
refer to all *.so's you need to compile; and, the term "native
|
||||
compression" could refer to all *.so's you need to compile that are
|
||||
specifically related to compression. Currently, however, this document
|
||||
only addresses the native hadoop library (<<<libhadoop.so>>>).
|
||||
|
||||
* Native Hadoop Library
|
||||
|
||||
Hadoop has native implementations of certain components for performance
|
||||
reasons and for non-availability of Java implementations. These
|
||||
components are available in a single, dynamically-linked native library
|
||||
called the native hadoop library. On the *nix platforms the library is
|
||||
named <<<libhadoop.so>>>.
|
||||
|
||||
* Usage
|
||||
|
||||
It is fairly easy to use the native hadoop library:
|
||||
|
||||
[[1]] Review the components.
|
||||
|
||||
[[2]] Review the supported platforms.
|
||||
|
||||
[[3]] Either download a hadoop release, which will include a pre-built
|
||||
version of the native hadoop library, or build your own version of
|
||||
the native hadoop library. Whether you download or build, the name
|
||||
for the library is the same: libhadoop.so
|
||||
|
||||
[[4]] Install the compression codec development packages (>zlib-1.2,
|
||||
>gzip-1.2):
|
||||
+ If you download the library, install one or more development
|
||||
packages - whichever compression codecs you want to use with
|
||||
your deployment.
|
||||
+ If you build the library, it is mandatory to install both
|
||||
development packages.
|
||||
|
||||
[[5]] Check the runtime log files.
|
||||
|
||||
* Components
|
||||
|
||||
The native hadoop library includes two components, the zlib and gzip
|
||||
compression codecs:
|
||||
|
||||
* zlib
|
||||
|
||||
* gzip
|
||||
|
||||
The native hadoop library is imperative for gzip to work.
|
||||
|
||||
* Supported Platforms
|
||||
|
||||
The native hadoop library is supported on *nix platforms only. The
|
||||
library does not to work with Cygwin or the Mac OS X platform.
|
||||
|
||||
The native hadoop library is mainly used on the GNU/Linus platform and
|
||||
has been tested on these distributions:
|
||||
|
||||
* RHEL4/Fedora
|
||||
|
||||
* Ubuntu
|
||||
|
||||
* Gentoo
|
||||
|
||||
On all the above distributions a 32/64 bit native hadoop library will
|
||||
work with a respective 32/64 bit jvm.
|
||||
|
||||
* Download
|
||||
|
||||
The pre-built 32-bit i386-Linux native hadoop library is available as
|
||||
part of the hadoop distribution and is located in the <<<lib/native>>>
|
||||
directory. You can download the hadoop distribution from Hadoop Common
|
||||
Releases.
|
||||
|
||||
Be sure to install the zlib and/or gzip development packages -
|
||||
whichever compression codecs you want to use with your deployment.
|
||||
|
||||
* Build
|
||||
|
||||
The native hadoop library is written in ANSI C and is built using the
|
||||
GNU autotools-chain (autoconf, autoheader, automake, autoscan,
|
||||
libtool). This means it should be straight-forward to build the library
|
||||
on any platform with a standards-compliant C compiler and the GNU
|
||||
autotools-chain (see the supported platforms).
|
||||
|
||||
The packages you need to install on the target platform are:
|
||||
|
||||
* C compiler (e.g. GNU C Compiler)
|
||||
|
||||
* GNU Autools Chain: autoconf, automake, libtool
|
||||
|
||||
* zlib-development package (stable version >= 1.2.0)
|
||||
|
||||
Once you installed the prerequisite packages use the standard hadoop
|
||||
build.xml file and pass along the compile.native flag (set to true) to
|
||||
build the native hadoop library:
|
||||
|
||||
----
|
||||
$ ant -Dcompile.native=true <target>
|
||||
----
|
||||
|
||||
You should see the newly-built library in:
|
||||
|
||||
----
|
||||
$ build/native/<platform>/lib
|
||||
----
|
||||
|
||||
where <platform> is a combination of the system-properties:
|
||||
${os.name}-${os.arch}-${sun.arch.data.model} (for example,
|
||||
Linux-i386-32).
|
||||
|
||||
Please note the following:
|
||||
|
||||
* It is mandatory to install both the zlib and gzip development
|
||||
packages on the target platform in order to build the native hadoop
|
||||
library; however, for deployment it is sufficient to install just
|
||||
one package if you wish to use only one codec.
|
||||
|
||||
* It is necessary to have the correct 32/64 libraries for zlib,
|
||||
depending on the 32/64 bit jvm for the target platform, in order to
|
||||
build and deploy the native hadoop library.
|
||||
|
||||
* Runtime
|
||||
|
||||
The bin/hadoop script ensures that the native hadoop library is on the
|
||||
library path via the system property:
|
||||
<<<-Djava.library.path=<path> >>>
|
||||
|
||||
During runtime, check the hadoop log files for your MapReduce tasks.
|
||||
|
||||
* If everything is all right, then:
|
||||
<<<DEBUG util.NativeCodeLoader - Trying to load the custom-built native-hadoop library...>>>
|
||||
<<<INFO util.NativeCodeLoader - Loaded the native-hadoop library>>>
|
||||
|
||||
* If something goes wrong, then:
|
||||
<<<INFO util.NativeCodeLoader - Unable to load native-hadoop library for your platform... using builtin-java classes where applicable>>>
|
||||
|
||||
* Native Shared Libraries
|
||||
|
||||
You can load any native shared library using DistributedCache for
|
||||
distributing and symlinking the library files.
|
||||
|
||||
This example shows you how to distribute a shared library, mylib.so,
|
||||
and load it from a MapReduce task.
|
||||
|
||||
[[1]] First copy the library to the HDFS:
|
||||
<<<bin/hadoop fs -copyFromLocal mylib.so.1 /libraries/mylib.so.1>>>
|
||||
|
||||
[[2]] The job launching program should contain the following:
|
||||
<<<DistributedCache.createSymlink(conf);>>>
|
||||
<<<DistributedCache.addCacheFile("hdfs://host:port/libraries/mylib.so. 1#mylib.so", conf);>>>
|
||||
|
||||
[[3]] The MapReduce task can contain:
|
||||
<<<System.loadLibrary("mylib.so");>>>
|
||||
|
||||
Note: If you downloaded or built the native hadoop library, you don’t
|
||||
need to use DistibutedCache to make the library available to your
|
||||
MapReduce tasks.
|
|
@ -0,0 +1,164 @@
|
|||
~~ Licensed under the Apache License, Version 2.0 (the "License");
|
||||
~~ you may not use this file except in compliance with the License.
|
||||
~~ You may obtain a copy of the License at
|
||||
~~
|
||||
~~ http://www.apache.org/licenses/LICENSE-2.0
|
||||
~~
|
||||
~~ Unless required by applicable law or agreed to in writing, software
|
||||
~~ distributed under the License is distributed on an "AS IS" BASIS,
|
||||
~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
~~ See the License for the specific language governing permissions and
|
||||
~~ limitations under the License. See accompanying LICENSE file.
|
||||
|
||||
---
|
||||
Service Level Authorization Guide
|
||||
---
|
||||
---
|
||||
${maven.build.timestamp}
|
||||
|
||||
Service Level Authorization Guide
|
||||
|
||||
%{toc|section=1|fromDepth=0}
|
||||
|
||||
* Purpose
|
||||
|
||||
This document describes how to configure and manage Service Level
|
||||
Authorization for Hadoop.
|
||||
|
||||
* Prerequisites
|
||||
|
||||
Make sure Hadoop is installed, configured and setup correctly. For more
|
||||
information see:
|
||||
* Single Node Setup for first-time users.
|
||||
* Cluster Setup for large, distributed clusters.
|
||||
|
||||
* Overview
|
||||
|
||||
Service Level Authorization is the initial authorization mechanism to
|
||||
ensure clients connecting to a particular Hadoop service have the
|
||||
necessary, pre-configured, permissions and are authorized to access the
|
||||
given service. For example, a MapReduce cluster can use this mechanism
|
||||
to allow a configured list of users/groups to submit jobs.
|
||||
|
||||
The <<<${HADOOP_CONF_DIR}/hadoop-policy.xml>>> configuration file is used to
|
||||
define the access control lists for various Hadoop services.
|
||||
|
||||
Service Level Authorization is performed much before to other access
|
||||
control checks such as file-permission checks, access control on job
|
||||
queues etc.
|
||||
|
||||
* Configuration
|
||||
|
||||
This section describes how to configure service-level authorization via
|
||||
the configuration file <<<${HADOOP_CONF_DIR}/hadoop-policy.xml>>>.
|
||||
|
||||
** Enable Service Level Authorization
|
||||
|
||||
By default, service-level authorization is disabled for Hadoop. To
|
||||
enable it set the configuration property hadoop.security.authorization
|
||||
to true in <<<${HADOOP_CONF_DIR}/core-site.xml>>>.
|
||||
|
||||
** Hadoop Services and Configuration Properties
|
||||
|
||||
This section lists the various Hadoop services and their configuration
|
||||
knobs:
|
||||
|
||||
*-------------------------------------+--------------------------------------+
|
||||
|| Property || Service
|
||||
*-------------------------------------+--------------------------------------+
|
||||
security.client.protocol.acl | ACL for ClientProtocol, which is used by user code via the DistributedFileSystem.
|
||||
*-------------------------------------+--------------------------------------+
|
||||
security.client.datanode.protocol.acl | ACL for ClientDatanodeProtocol, the client-to-datanode protocol for block recovery.
|
||||
*-------------------------------------+--------------------------------------+
|
||||
security.datanode.protocol.acl | ACL for DatanodeProtocol, which is used by datanodes to communicate with the namenode.
|
||||
*-------------------------------------+--------------------------------------+
|
||||
security.inter.datanode.protocol.acl | ACL for InterDatanodeProtocol, the inter-datanode protocol for updating generation timestamp.
|
||||
*-------------------------------------+--------------------------------------+
|
||||
security.namenode.protocol.acl | ACL for NamenodeProtocol, the protocol used by the secondary namenode to communicate with the namenode.
|
||||
*-------------------------------------+--------------------------------------+
|
||||
security.inter.tracker.protocol.acl | ACL for InterTrackerProtocol, used by the tasktrackers to communicate with the jobtracker.
|
||||
*-------------------------------------+--------------------------------------+
|
||||
security.job.submission.protocol.acl | ACL for JobSubmissionProtocol, used by job clients to communciate with the jobtracker for job submission, querying job status etc.
|
||||
*-------------------------------------+--------------------------------------+
|
||||
security.task.umbilical.protocol.acl | ACL for TaskUmbilicalProtocol, used by the map and reduce tasks to communicate with the parent tasktracker.
|
||||
*-------------------------------------+--------------------------------------+
|
||||
security.refresh.policy.protocol.acl | ACL for RefreshAuthorizationPolicyProtocol, used by the dfsadmin and mradmin commands to refresh the security policy in-effect.
|
||||
*-------------------------------------+--------------------------------------+
|
||||
security.ha.service.protocol.acl | ACL for HAService protocol used by HAAdmin to manage the active and stand-by states of namenode.
|
||||
*-------------------------------------+--------------------------------------+
|
||||
|
||||
** Access Control Lists
|
||||
|
||||
<<<${HADOOP_CONF_DIR}/hadoop-policy.xml>>> defines an access control list for
|
||||
each Hadoop service. Every access control list has a simple format:
|
||||
|
||||
The list of users and groups are both comma separated list of names.
|
||||
The two lists are separated by a space.
|
||||
|
||||
Example: <<<user1,user2 group1,group2>>>.
|
||||
|
||||
Add a blank at the beginning of the line if only a list of groups is to
|
||||
be provided, equivalently a comman-separated list of users followed by
|
||||
a space or nothing implies only a set of given users.
|
||||
|
||||
A special value of <<<*>>> implies that all users are allowed to access the
|
||||
service.
|
||||
|
||||
** Refreshing Service Level Authorization Configuration
|
||||
|
||||
The service-level authorization configuration for the NameNode and
|
||||
JobTracker can be changed without restarting either of the Hadoop
|
||||
master daemons. The cluster administrator can change
|
||||
<<<${HADOOP_CONF_DIR}/hadoop-policy.xml>>> on the master nodes and instruct
|
||||
the NameNode and JobTracker to reload their respective configurations
|
||||
via the <<<-refreshServiceAcl>>> switch to <<<dfsadmin>>> and <<<mradmin>>> commands
|
||||
respectively.
|
||||
|
||||
Refresh the service-level authorization configuration for the NameNode:
|
||||
|
||||
----
|
||||
$ bin/hadoop dfsadmin -refreshServiceAcl
|
||||
----
|
||||
|
||||
Refresh the service-level authorization configuration for the
|
||||
JobTracker:
|
||||
|
||||
----
|
||||
$ bin/hadoop mradmin -refreshServiceAcl
|
||||
----
|
||||
|
||||
Of course, one can use the <<<security.refresh.policy.protocol.acl>>>
|
||||
property in <<<${HADOOP_CONF_DIR}/hadoop-policy.xml>>> to restrict access to
|
||||
the ability to refresh the service-level authorization configuration to
|
||||
certain users/groups.
|
||||
|
||||
** Examples
|
||||
|
||||
Allow only users <<<alice>>>, <<<bob>>> and users in the <<<mapreduce>>> group to submit
|
||||
jobs to the MapReduce cluster:
|
||||
|
||||
----
|
||||
<property>
|
||||
<name>security.job.submission.protocol.acl</name>
|
||||
<value>alice,bob mapreduce</value>
|
||||
</property>
|
||||
----
|
||||
|
||||
Allow only DataNodes running as the users who belong to the group
|
||||
datanodes to communicate with the NameNode:
|
||||
|
||||
----
|
||||
<property>
|
||||
<name>security.datanode.protocol.acl</name>
|
||||
<value>datanodes</value>
|
||||
</property>
|
||||
----
|
||||
|
||||
Allow any user to talk to the HDFS cluster as a DFSClient:
|
||||
|
||||
----
|
||||
<property>
|
||||
<name>security.client.protocol.acl</name>
|
||||
<value>*</value>
|
||||
</property>
|
||||
----
|
|
@ -0,0 +1,239 @@
|
|||
~~ Licensed under the Apache License, Version 2.0 (the "License");
|
||||
~~ you may not use this file except in compliance with the License.
|
||||
~~ You may obtain a copy of the License at
|
||||
~~
|
||||
~~ http://www.apache.org/licenses/LICENSE-2.0
|
||||
~~
|
||||
~~ Unless required by applicable law or agreed to in writing, software
|
||||
~~ distributed under the License is distributed on an "AS IS" BASIS,
|
||||
~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
~~ See the License for the specific language governing permissions and
|
||||
~~ limitations under the License. See accompanying LICENSE file.
|
||||
|
||||
---
|
||||
Single Node Setup
|
||||
---
|
||||
---
|
||||
${maven.build.timestamp}
|
||||
|
||||
Single Node Setup
|
||||
|
||||
%{toc|section=1|fromDepth=0}
|
||||
|
||||
* Purpose
|
||||
|
||||
This document describes how to set up and configure a single-node
|
||||
Hadoop installation so that you can quickly perform simple operations
|
||||
using Hadoop MapReduce and the Hadoop Distributed File System (HDFS).
|
||||
|
||||
* Prerequisites
|
||||
|
||||
** Supported Platforms
|
||||
|
||||
* GNU/Linux is supported as a development and production platform.
|
||||
Hadoop has been demonstrated on GNU/Linux clusters with 2000 nodes.
|
||||
|
||||
* Win32 is supported as a development platform. Distributed operation
|
||||
has not been well tested on Win32, so it is not supported as a
|
||||
production platform.
|
||||
|
||||
** Required Software
|
||||
|
||||
Required software for Linux and Windows include:
|
||||
|
||||
[[1]] Java^TM 1.6.x, preferably from Sun, must be installed.
|
||||
|
||||
[[2]] ssh must be installed and sshd must be running to use the Hadoop
|
||||
scripts that manage remote Hadoop daemons.
|
||||
|
||||
Additional requirements for Windows include:
|
||||
|
||||
[[1]] Cygwin - Required for shell support in addition to the required
|
||||
software above.
|
||||
|
||||
** Installing Software
|
||||
|
||||
If your cluster doesn't have the requisite software you will need to
|
||||
install it.
|
||||
|
||||
For example on Ubuntu Linux:
|
||||
|
||||
----
|
||||
$ sudo apt-get install ssh
|
||||
$ sudo apt-get install rsync
|
||||
----
|
||||
|
||||
On Windows, if you did not install the required software when you
|
||||
installed cygwin, start the cygwin installer and select the packages:
|
||||
|
||||
* openssh - the Net category
|
||||
|
||||
* Download
|
||||
|
||||
To get a Hadoop distribution, download a recent stable release from one
|
||||
of the Apache Download Mirrors.
|
||||
|
||||
* Prepare to Start the Hadoop Cluster
|
||||
|
||||
Unpack the downloaded Hadoop distribution. In the distribution, edit
|
||||
the file <<<conf/hadoop-env.sh>>> to define at least <<<JAVA_HOME>>> to be the root
|
||||
of your Java installation.
|
||||
|
||||
Try the following command:
|
||||
|
||||
----
|
||||
$ bin/hadoop
|
||||
----
|
||||
|
||||
This will display the usage documentation for the hadoop script.
|
||||
|
||||
Now you are ready to start your Hadoop cluster in one of the three
|
||||
supported modes:
|
||||
|
||||
* Local (Standalone) Mode
|
||||
|
||||
* Pseudo-Distributed Mode
|
||||
|
||||
* Fully-Distributed Mode
|
||||
|
||||
* Standalone Operation
|
||||
|
||||
By default, Hadoop is configured to run in a non-distributed mode, as a
|
||||
single Java process. This is useful for debugging.
|
||||
|
||||
The following example copies the unpacked conf directory to use as
|
||||
input and then finds and displays every match of the given regular
|
||||
expression. Output is written to the given output directory.
|
||||
|
||||
----
|
||||
$ mkdir input
|
||||
$ cp conf/*.xml input
|
||||
$ bin/hadoop jar hadoop-*-examples.jar grep input output 'dfs[a-z.]+'
|
||||
$ cat output/*
|
||||
---
|
||||
|
||||
* Pseudo-Distributed Operation
|
||||
|
||||
Hadoop can also be run on a single-node in a pseudo-distributed mode
|
||||
where each Hadoop daemon runs in a separate Java process.
|
||||
|
||||
** Configuration
|
||||
|
||||
Use the following:
|
||||
|
||||
conf/core-site.xml:
|
||||
|
||||
----
|
||||
<configuration>
|
||||
<property>
|
||||
<name>fs.defaultFS</name>
|
||||
<value>hdfs://localhost:9000</value>
|
||||
</property>
|
||||
</configuration>
|
||||
----
|
||||
|
||||
conf/hdfs-site.xml:
|
||||
|
||||
----
|
||||
<configuration>
|
||||
<property>
|
||||
<name>dfs.replication</name>
|
||||
<value>1</value>
|
||||
</property>
|
||||
</configuration>
|
||||
----
|
||||
|
||||
conf/mapred-site.xml:
|
||||
|
||||
----
|
||||
<configuration>
|
||||
<property>
|
||||
<name>mapred.job.tracker</name>
|
||||
<value>localhost:9001</value>
|
||||
</property>
|
||||
</configuration>
|
||||
----
|
||||
|
||||
** Setup passphraseless ssh
|
||||
|
||||
Now check that you can ssh to the localhost without a passphrase:
|
||||
|
||||
----
|
||||
$ ssh localhost
|
||||
----
|
||||
|
||||
If you cannot ssh to localhost without a passphrase, execute the
|
||||
following commands:
|
||||
|
||||
----
|
||||
$ ssh-keygen -t dsa -P '' -f ~/.ssh/id_dsa
|
||||
$ cat ~/.ssh/id_dsa.pub >> ~/.ssh/authorized_keys
|
||||
----
|
||||
|
||||
** Execution
|
||||
|
||||
Format a new distributed-filesystem:
|
||||
|
||||
----
|
||||
$ bin/hadoop namenode -format
|
||||
----
|
||||
|
||||
Start the hadoop daemons:
|
||||
|
||||
----
|
||||
$ bin/start-all.sh
|
||||
----
|
||||
|
||||
The hadoop daemon log output is written to the <<<${HADOOP_LOG_DIR}>>>
|
||||
directory (defaults to <<<${HADOOP_PREFIX}/logs>>>).
|
||||
|
||||
Browse the web interface for the NameNode and the JobTracker; by
|
||||
default they are available at:
|
||||
|
||||
* NameNode - <<<http://localhost:50070/>>>
|
||||
|
||||
* JobTracker - <<<http://localhost:50030/>>>
|
||||
|
||||
Copy the input files into the distributed filesystem:
|
||||
|
||||
----
|
||||
$ bin/hadoop fs -put conf input
|
||||
----
|
||||
|
||||
Run some of the examples provided:
|
||||
|
||||
----
|
||||
$ bin/hadoop jar hadoop-*-examples.jar grep input output 'dfs[a-z.]+'
|
||||
----
|
||||
|
||||
Examine the output files:
|
||||
|
||||
Copy the output files from the distributed filesystem to the local
|
||||
filesytem and examine them:
|
||||
|
||||
----
|
||||
$ bin/hadoop fs -get output output
|
||||
$ cat output/*
|
||||
----
|
||||
|
||||
or
|
||||
|
||||
View the output files on the distributed filesystem:
|
||||
|
||||
----
|
||||
$ bin/hadoop fs -cat output/*
|
||||
----
|
||||
|
||||
When you're done, stop the daemons with:
|
||||
|
||||
----
|
||||
$ bin/stop-all.sh
|
||||
----
|
||||
|
||||
* Fully-Distributed Operation
|
||||
|
||||
For information on setting up fully-distributed, non-trivial clusters
|
||||
see {{{Cluster Setup}}}.
|
||||
|
||||
Java and JNI are trademarks or registered trademarks of Sun
|
||||
Microsystems, Inc. in the United States and other countries.
|
|
@ -0,0 +1,100 @@
|
|||
~~ Licensed under the Apache License, Version 2.0 (the "License");
|
||||
~~ you may not use this file except in compliance with the License.
|
||||
~~ You may obtain a copy of the License at
|
||||
~~
|
||||
~~ http://www.apache.org/licenses/LICENSE-2.0
|
||||
~~
|
||||
~~ Unless required by applicable law or agreed to in writing, software
|
||||
~~ distributed under the License is distributed on an "AS IS" BASIS,
|
||||
~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
~~ See the License for the specific language governing permissions and
|
||||
~~ limitations under the License. See accompanying LICENSE file.
|
||||
|
||||
---
|
||||
Superusers Acting On Behalf Of Other Users
|
||||
---
|
||||
---
|
||||
${maven.build.timestamp}
|
||||
|
||||
Superusers Acting On Behalf Of Other Users
|
||||
|
||||
%{toc|section=1|fromDepth=0}
|
||||
|
||||
* Introduction
|
||||
|
||||
This document describes how a superuser can submit jobs or access hdfs
|
||||
on behalf of another user in a secured way.
|
||||
|
||||
* Use Case
|
||||
|
||||
The code example described in the next section is applicable for the
|
||||
following use case.
|
||||
|
||||
A superuser with username 'super' wants to submit job and access hdfs
|
||||
on behalf of a user joe. The superuser has kerberos credentials but
|
||||
user joe doesn't have any. The tasks are required to run as user joe
|
||||
and any file accesses on namenode are required to be done as user joe.
|
||||
It is required that user joe can connect to the namenode or job tracker
|
||||
on a connection authenticated with super's kerberos credentials. In
|
||||
other words super is impersonating the user joe.
|
||||
|
||||
* Code example
|
||||
|
||||
In this example super's kerberos credentials are used for login and a
|
||||
proxy user ugi object is created for joe. The operations are performed
|
||||
within the doAs method of this proxy user ugi object.
|
||||
|
||||
----
|
||||
...
|
||||
//Create ugi for joe. The login user is 'super'.
|
||||
UserGroupInformation ugi =
|
||||
UserGroupInformation.createProxyUser("joe", UserGroupInformation.getLoginUser());
|
||||
ugi.doAs(new PrivilegedExceptionAction<Void>() {
|
||||
public Void run() throws Exception {
|
||||
//Submit a job
|
||||
JobClient jc = new JobClient(conf);
|
||||
jc.submitJob(conf);
|
||||
//OR access hdfs
|
||||
FileSystem fs = FileSystem.get(conf);
|
||||
fs.mkdir(someFilePath);
|
||||
}
|
||||
}
|
||||
----
|
||||
|
||||
* Configurations
|
||||
|
||||
The superuser must be configured on namenode and jobtracker to be
|
||||
allowed to impersonate another user. Following configurations are
|
||||
required.
|
||||
|
||||
----
|
||||
<property>
|
||||
<name>hadoop.proxyuser.super.groups</name>
|
||||
<value>group1,group2</value>
|
||||
<description>Allow the superuser super to impersonate any members of the group group1 and group2</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>hadoop.proxyuser.super.hosts</name>
|
||||
<value>host1,host2</value>
|
||||
<description>The superuser can connect only from host1 and host2 to impersonate a user</description>
|
||||
</property>
|
||||
----
|
||||
|
||||
If these configurations are not present, impersonation will not be
|
||||
allowed and connection will fail.
|
||||
|
||||
If more lax security is preferred, the wildcard value * may be used to
|
||||
allow impersonation from any host or of any user.
|
||||
|
||||
* Caveats
|
||||
|
||||
The superuser must have kerberos credentials to be able to impersonate
|
||||
another user. It cannot use delegation tokens for this feature. It
|
||||
would be wrong if superuser adds its own delegation token to the proxy
|
||||
user ugi, as it will allow the proxy user to connect to the service
|
||||
with the privileges of the superuser.
|
||||
|
||||
However, if the superuser does want to give a delegation token to joe,
|
||||
it must first impersonate joe and get a delegation token for joe, in
|
||||
the same way as the code example above, and add it to the ugi of joe.
|
||||
In this way the delegation token will have the owner as joe.
|
|
@ -546,4 +546,44 @@ public class TestFileUtil {
|
|||
long expected = 2 * (3 + System.getProperty("line.separator").length());
|
||||
Assert.assertEquals(expected, du);
|
||||
}
|
||||
|
||||
private void doUntarAndVerify(File tarFile, File untarDir)
|
||||
throws IOException {
|
||||
if (untarDir.exists() && !FileUtil.fullyDelete(untarDir)) {
|
||||
throw new IOException("Could not delete directory '" + untarDir + "'");
|
||||
}
|
||||
FileUtil.unTar(tarFile, untarDir);
|
||||
|
||||
String parentDir = untarDir.getCanonicalPath() + Path.SEPARATOR + "name";
|
||||
File testFile = new File(parentDir + Path.SEPARATOR + "version");
|
||||
Assert.assertTrue(testFile.exists());
|
||||
Assert.assertTrue(testFile.length() == 0);
|
||||
String imageDir = parentDir + Path.SEPARATOR + "image";
|
||||
testFile = new File(imageDir + Path.SEPARATOR + "fsimage");
|
||||
Assert.assertTrue(testFile.exists());
|
||||
Assert.assertTrue(testFile.length() == 157);
|
||||
String currentDir = parentDir + Path.SEPARATOR + "current";
|
||||
testFile = new File(currentDir + Path.SEPARATOR + "fsimage");
|
||||
Assert.assertTrue(testFile.exists());
|
||||
Assert.assertTrue(testFile.length() == 4331);
|
||||
testFile = new File(currentDir + Path.SEPARATOR + "edits");
|
||||
Assert.assertTrue(testFile.exists());
|
||||
Assert.assertTrue(testFile.length() == 1033);
|
||||
testFile = new File(currentDir + Path.SEPARATOR + "fstime");
|
||||
Assert.assertTrue(testFile.exists());
|
||||
Assert.assertTrue(testFile.length() == 8);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUntar() throws IOException {
|
||||
String tarGzFileName = System.getProperty("test.cache.data",
|
||||
"build/test/cache") + "/test-untar.tgz";
|
||||
String tarFileName = System.getProperty("test.cache.data",
|
||||
"build/test/cache") + "/test-untar.tar";
|
||||
String dataDir = System.getProperty("test.build.data", "build/test/data");
|
||||
File untarDir = new File(dataDir, "untarDir");
|
||||
|
||||
doUntarAndVerify(new File(tarGzFileName), untarDir);
|
||||
doUntarAndVerify(new File(tarFileName), untarDir);
|
||||
}
|
||||
}
|
||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -25,11 +25,14 @@ import java.io.IOException;
|
|||
import java.util.concurrent.atomic.AtomicReference;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.junit.Assert;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import static org.junit.Assume.*;
|
||||
import static org.junit.Assert.*;
|
||||
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
|
@ -293,4 +296,40 @@ public class TestNativeIO {
|
|||
assertFalse(NativeIO.getGroupName(0).isEmpty());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRenameTo() throws Exception {
|
||||
final File TEST_DIR = new File(new File(
|
||||
System.getProperty("test.build.data","build/test/data")), "renameTest");
|
||||
assumeTrue(TEST_DIR.mkdirs());
|
||||
File nonExistentFile = new File(TEST_DIR, "nonexistent");
|
||||
File targetFile = new File(TEST_DIR, "target");
|
||||
// Test attempting to rename a nonexistent file.
|
||||
try {
|
||||
NativeIO.renameTo(nonExistentFile, targetFile);
|
||||
Assert.fail();
|
||||
} catch (NativeIOException e) {
|
||||
Assert.assertEquals(e.getErrno(), Errno.ENOENT);
|
||||
}
|
||||
|
||||
// Test renaming a file to itself. It should succeed and do nothing.
|
||||
File sourceFile = new File(TEST_DIR, "source");
|
||||
Assert.assertTrue(sourceFile.createNewFile());
|
||||
NativeIO.renameTo(sourceFile, sourceFile);
|
||||
|
||||
// Test renaming a source to a destination.
|
||||
NativeIO.renameTo(sourceFile, targetFile);
|
||||
|
||||
// Test renaming a source to a path which uses a file as a directory.
|
||||
sourceFile = new File(TEST_DIR, "source");
|
||||
Assert.assertTrue(sourceFile.createNewFile());
|
||||
File badTarget = new File(targetFile, "subdir");
|
||||
try {
|
||||
NativeIO.renameTo(sourceFile, badTarget);
|
||||
Assert.fail();
|
||||
} catch (NativeIOException e) {
|
||||
Assert.assertEquals(e.getErrno(), Errno.ENOTDIR);
|
||||
}
|
||||
|
||||
FileUtils.deleteQuietly(TEST_DIR);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -56,6 +56,7 @@ import org.apache.hadoop.metrics2.lib.MutableCounterLong;
|
|||
import org.apache.hadoop.metrics2.lib.MutableRate;
|
||||
import org.apache.hadoop.metrics2.lib.MutableGaugeLong;
|
||||
import org.apache.hadoop.util.StringUtils;
|
||||
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
|
||||
|
||||
/**
|
||||
* Test the MetricsSystemImpl class
|
||||
|
@ -80,7 +81,7 @@ public class TestMetricsSystemImpl {
|
|||
}
|
||||
}
|
||||
|
||||
@Test public void testInitFirst() throws Exception {
|
||||
@Test public void testInitFirstVerifyStopInvokedImmediately() throws Exception {
|
||||
new ConfigBuilder().add("*.period", 8)
|
||||
//.add("test.sink.plugin.urls", getPluginUrlsAsString())
|
||||
.add("test.sink.test.class", TestSink.class.getName())
|
||||
|
@ -106,14 +107,61 @@ public class TestMetricsSystemImpl {
|
|||
ms.stop();
|
||||
ms.shutdown();
|
||||
|
||||
verify(sink1, times(2)).putMetrics(r1.capture());
|
||||
//When we call stop, at most two sources will be consumed by each sink thread.
|
||||
verify(sink1, atMost(2)).putMetrics(r1.capture());
|
||||
List<MetricsRecord> mr1 = r1.getAllValues();
|
||||
verify(sink2, atMost(2)).putMetrics(r2.capture());
|
||||
List<MetricsRecord> mr2 = r2.getAllValues();
|
||||
if (mr1.size() != 0 && mr2.size() != 0) {
|
||||
checkMetricsRecords(mr1);
|
||||
assertEquals("output", mr1, mr2);
|
||||
} else if (mr1.size() != 0) {
|
||||
checkMetricsRecords(mr1);
|
||||
} else if (mr2.size() != 0) {
|
||||
checkMetricsRecords(mr2);
|
||||
}
|
||||
}
|
||||
|
||||
@Test public void testInitFirstVerifyCallBacks() throws Exception {
|
||||
DefaultMetricsSystem.shutdown();
|
||||
new ConfigBuilder().add("*.period", 8)
|
||||
//.add("test.sink.plugin.urls", getPluginUrlsAsString())
|
||||
.add("test.sink.test.class", TestSink.class.getName())
|
||||
.add("test.*.source.filter.exclude", "s0")
|
||||
.add("test.source.s1.metric.filter.exclude", "X*")
|
||||
.add("test.sink.sink1.metric.filter.exclude", "Y*")
|
||||
.add("test.sink.sink2.metric.filter.exclude", "Y*")
|
||||
.save(TestMetricsConfig.getTestFilename("hadoop-metrics2-test"));
|
||||
MetricsSystemImpl ms = new MetricsSystemImpl("Test");
|
||||
ms.start();
|
||||
ms.register("s0", "s0 desc", new TestSource("s0rec"));
|
||||
TestSource s1 = ms.register("s1", "s1 desc", new TestSource("s1rec"));
|
||||
s1.c1.incr();
|
||||
s1.xxx.incr();
|
||||
s1.g1.set(2);
|
||||
s1.yyy.incr(2);
|
||||
s1.s1.add(0);
|
||||
MetricsSink sink1 = mock(MetricsSink.class);
|
||||
MetricsSink sink2 = mock(MetricsSink.class);
|
||||
ms.registerSink("sink1", "sink1 desc", sink1);
|
||||
ms.registerSink("sink2", "sink2 desc", sink2);
|
||||
ms.publishMetricsNow(); // publish the metrics
|
||||
|
||||
try {
|
||||
verify(sink1, timeout(200).times(2)).putMetrics(r1.capture());
|
||||
verify(sink2, timeout(200).times(2)).putMetrics(r2.capture());
|
||||
} finally {
|
||||
ms.stop();
|
||||
ms.shutdown();
|
||||
}
|
||||
//When we call stop, at most two sources will be consumed by each sink thread.
|
||||
List<MetricsRecord> mr1 = r1.getAllValues();
|
||||
verify(sink2, times(2)).putMetrics(r2.capture());
|
||||
List<MetricsRecord> mr2 = r2.getAllValues();
|
||||
checkMetricsRecords(mr1);
|
||||
assertEquals("output", mr1, mr2);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Test public void testMultiThreadedPublish() throws Exception {
|
||||
new ConfigBuilder().add("*.period", 80)
|
||||
.add("test.sink.Collector.queue.capacity", "20")
|
||||
|
|
|
@ -314,6 +314,10 @@ Release 2.0.3-alpha - Unreleased
|
|||
HDFS-4369. GetBlockKeysResponseProto does not handle null response.
|
||||
(suresh)
|
||||
|
||||
HDFS-4451. hdfs balancer command returns exit code 1 on success instead
|
||||
of 0. (Joshua Blatt via suresh)
|
||||
|
||||
|
||||
NEW FEATURES
|
||||
|
||||
HDFS-2656. Add libwebhdfs, a pure C client based on WebHDFS.
|
||||
|
@ -499,6 +503,8 @@ Release 2.0.3-alpha - Unreleased
|
|||
|
||||
HDFS-4259. Improve pipeline DN replacement failure message (harsh)
|
||||
|
||||
HDFS-3598. WebHDFS support for file concat. (Plamen Jeliazkov via shv)
|
||||
|
||||
OPTIMIZATIONS
|
||||
|
||||
HDFS-3429. DataNode reads checksums even if client does not need them (todd)
|
||||
|
@ -733,6 +739,9 @@ Release 2.0.3-alpha - Unreleased
|
|||
HDFS-4444. Add space between total transaction time and number of
|
||||
transactions in FSEditLog#printStatistics. (Stephen Chu via suresh)
|
||||
|
||||
HDFS-4428. FsDatasetImpl should disclose what the error is when a rename
|
||||
fails. (Colin Patrick McCabe via atm)
|
||||
|
||||
BREAKDOWN OF HDFS-3077 SUBTASKS
|
||||
|
||||
HDFS-3077. Quorum-based protocol for reading and writing edit logs.
|
||||
|
|
|
@ -1,237 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" "http://forrest.apache.org/dtd/document-v20.dtd">
|
||||
<document>
|
||||
<header>
|
||||
<title>Synthetic Load Generator Guide </title>
|
||||
</header>
|
||||
<body>
|
||||
<section>
|
||||
<title>Overview</title>
|
||||
<p>
|
||||
The synthetic load generator (SLG) is a tool for testing NameNode behavior
|
||||
under different client loads. The user can generate different mixes
|
||||
of read, write, and list requests by specifying the probabilities of
|
||||
read and write. The user controls the intensity of the load by adjusting
|
||||
parameters for the number of worker threads and the delay between
|
||||
operations. While load generators are running, the user can profile and
|
||||
monitor the running of the NameNode. When a load generator exits, it
|
||||
prints some NameNode statistics like the average execution time of each
|
||||
kind of operation and the NameNode throughput.
|
||||
</p>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<title> Synopsis </title>
|
||||
<p>
|
||||
The synopsis of the command is:
|
||||
</p>
|
||||
<source>java LoadGenerator [options]</source>
|
||||
<p> Options include:</p>
|
||||
|
||||
<ul>
|
||||
<li>
|
||||
<code>-readProbability <read probability></code><br/>
|
||||
The probability of the read operation; default is 0.3333.
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<code>-writeProbability <write probability></code><br/>
|
||||
The probability of the write operations; default is 0.3333.
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<code>-root <test space root></code><br/>
|
||||
The root of the test space; default is /testLoadSpace.
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<code>-maxDelayBetweenOps <maxDelayBetweenOpsInMillis></code><br/>
|
||||
The maximum delay between two consecutive operations in a thread; default is 0 indicating no delay.
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<code>-numOfThreads <numOfThreads></code><br/>
|
||||
The number of threads to spawn; default is 200.
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<code>-elapsedTime <elapsedTimeInSecs></code><br/>
|
||||
The number of seconds that the program
|
||||
will run; A value of zero indicates that the program runs
|
||||
forever. The default value is 0.
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<code>-startTime <startTimeInMillis></code><br/>
|
||||
The time that all worker threads
|
||||
start to run. By default it is 10 seconds after the main
|
||||
program starts running.This creates a barrier if more than
|
||||
one load generator is running.
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<code>-seed <seed></code><br/>
|
||||
The random generator seed for repeating
|
||||
requests to NameNode when running with a single thread;
|
||||
default is the current time.
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
|
||||
<p>
|
||||
After command line argument parsing, the load generator traverses
|
||||
the test space and builds a table of all directories and another table
|
||||
of all files in the test space. It then waits until the start time to
|
||||
spawn the number of worker threads as specified by the user.
|
||||
|
||||
Each thread sends a stream of requests to NameNode. At each iteration,
|
||||
it first decides if it is going to read a file, create a file, or
|
||||
list a directory following the read and write probabilities specified
|
||||
by the user. The listing probability is equal to
|
||||
<em>1-read probability-write probability</em>. When reading,
|
||||
it randomly picks a file in the test space and reads the entire file.
|
||||
When writing, it randomly picks a directory in the test space and
|
||||
creates a file there.
|
||||
</p>
|
||||
<p>
|
||||
To avoid two threads with the same load
|
||||
generator or from two different load generators creating the same
|
||||
file, the file name consists of the current machine's host name
|
||||
and the thread id. The length of the file follows Gaussian
|
||||
distribution with an average size of 2 blocks and the standard
|
||||
deviation of 1. The new file is filled with byte 'a'. To avoid the test
|
||||
space growing indefinitely, the file is deleted immediately
|
||||
after the file creation completes. While listing, it randomly picks
|
||||
a directory in the test space and lists its content.
|
||||
</p>
|
||||
<p>
|
||||
After an operation completes, the thread pauses for a random
|
||||
amount of time in the range of [0, maxDelayBetweenOps] if the
|
||||
specified maximum delay is not zero. All threads are stopped when
|
||||
the specified elapsed time is passed. Before exiting, the program
|
||||
prints the average execution for each kind of NameNode operations,
|
||||
and the number of requests served by the NameNode per second.
|
||||
</p>
|
||||
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<title> Test Space Population </title>
|
||||
<p>
|
||||
The user needs to populate a test space before running a
|
||||
load generator. The structure generator generates a random
|
||||
test space structure and the data generator creates the files
|
||||
and directories of the test space in Hadoop distributed file system.
|
||||
</p>
|
||||
|
||||
<section>
|
||||
<title> Structure Generator </title>
|
||||
<p>
|
||||
This tool generates a random namespace structure with the
|
||||
following constraints:
|
||||
</p>
|
||||
|
||||
<ol>
|
||||
<li>The number of subdirectories that a directory can have is
|
||||
a random number in [minWidth, maxWidth].</li>
|
||||
<li>The maximum depth of each subdirectory is a random number
|
||||
[2*maxDepth/3, maxDepth].</li>
|
||||
<li>Files are randomly placed in leaf directories. The size of
|
||||
each file follows Gaussian distribution with an average size
|
||||
of 1 block and a standard deviation of 1.</li>
|
||||
</ol>
|
||||
<p>
|
||||
The generated namespace structure is described by two files in
|
||||
the output directory. Each line of the first file contains the
|
||||
full name of a leaf directory. Each line of the second file
|
||||
contains the full name of a file and its size, separated by a blank.
|
||||
</p>
|
||||
<p>
|
||||
The synopsis of the command is:
|
||||
</p>
|
||||
<source>java StructureGenerator [options]</source>
|
||||
|
||||
<p>Options include:</p>
|
||||
<ul>
|
||||
<li>
|
||||
<code>-maxDepth <maxDepth></code><br/>
|
||||
Maximum depth of the directory tree; default is 5.
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<code>-minWidth <minWidth></code><br/>
|
||||
Minimum number of subdirectories per directories; default is 1.
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<code>-maxWidth <maxWidth></code><br/>
|
||||
Maximum number of subdirectories per directories; default is 5.
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<code>-numOfFiles <#OfFiles></code><br/>
|
||||
The total number of files in the test space; default is 10.
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<code>-avgFileSize <avgFileSizeInBlocks></code><br/>
|
||||
Average size of blocks; default is 1.
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<code>-outDir <outDir></code><br/>
|
||||
Output directory; default is the current directory.
|
||||
</li>
|
||||
|
||||
<li>
|
||||
<code>-seed <seed></code><br/>
|
||||
Random number generator seed; default is the current time.
|
||||
</li>
|
||||
</ul>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<title>Data Generator </title>
|
||||
<p>
|
||||
This tool reads the directory structure and file structure from
|
||||
the input directory and creates the namespace in Hadoop distributed
|
||||
file system. All files are filled with byte 'a'.
|
||||
</p>
|
||||
<p>
|
||||
The synopsis of the command is:
|
||||
</p>
|
||||
<source>java DataGenerator [options]</source>
|
||||
<p>Options include:</p>
|
||||
<ul>
|
||||
<li>
|
||||
<code>-inDir <inDir></code><br/>
|
||||
Input directory name where directory/file
|
||||
structures are stored; default is the current directory.
|
||||
</li>
|
||||
<li>
|
||||
<code>-root <test space root></code><br/>
|
||||
The name of the root directory which the
|
||||
new namespace is going to be placed under;
|
||||
default is "/testLoadSpace".
|
||||
</li>
|
||||
</ul>
|
||||
</section>
|
||||
</section>
|
||||
</body>
|
||||
</document>
|
|
@ -1,395 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" "http://forrest.apache.org/dtd/document-v20.dtd">
|
||||
|
||||
|
||||
<document>
|
||||
<header>
|
||||
<title>Fault Injection Framework and Development Guide</title>
|
||||
</header>
|
||||
|
||||
<body>
|
||||
<section>
|
||||
<title>Introduction</title>
|
||||
<p>This guide provides an overview of the Hadoop Fault Injection (FI) framework for those
|
||||
who will be developing their own faults (aspects).
|
||||
</p>
|
||||
<p>The idea of fault injection is fairly simple: it is an
|
||||
infusion of errors and exceptions into an application's logic to
|
||||
achieve a higher coverage and fault tolerance of the system.
|
||||
Different implementations of this idea are available today.
|
||||
Hadoop's FI framework is built on top of Aspect Oriented Paradigm
|
||||
(AOP) implemented by AspectJ toolkit.
|
||||
</p>
|
||||
</section>
|
||||
<section>
|
||||
<title>Assumptions</title>
|
||||
<p>The current implementation of the FI framework assumes that the faults it
|
||||
will be emulating are of non-deterministic nature. That is, the moment
|
||||
of a fault's happening isn't known in advance and is a coin-flip based.
|
||||
</p>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<title>Architecture of the Fault Injection Framework</title>
|
||||
<figure src="images/FI-framework.gif" alt="Components layout" />
|
||||
|
||||
<section>
|
||||
<title>Configuration Management</title>
|
||||
<p>This piece of the FI framework allows you to set expectations for faults to happen.
|
||||
The settings can be applied either statically (in advance) or in runtime.
|
||||
The desired level of faults in the framework can be configured two ways:
|
||||
</p>
|
||||
<ul>
|
||||
<li>
|
||||
editing
|
||||
<code>src/aop/fi-site.xml</code>
|
||||
configuration file. This file is similar to other Hadoop's config
|
||||
files
|
||||
</li>
|
||||
<li>
|
||||
setting system properties of JVM through VM startup parameters or in
|
||||
<code>build.properties</code>
|
||||
file
|
||||
</li>
|
||||
</ul>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<title>Probability Model</title>
|
||||
<p>This is fundamentally a coin flipper. The methods of this class are
|
||||
getting a random number between 0.0
|
||||
and 1.0 and then checking if a new number has happened in the
|
||||
range of 0.0 and a configured level for the fault in question. If that
|
||||
condition is true then the fault will occur.
|
||||
</p>
|
||||
<p>Thus, to guarantee the happening of a fault one needs to set an
|
||||
appropriate level to 1.0.
|
||||
To completely prevent a fault from happening its probability level
|
||||
has to be set to 0.0.
|
||||
</p>
|
||||
<p><strong>Note</strong>: The default probability level is set to 0
|
||||
(zero) unless the level is changed explicitly through the
|
||||
configuration file or in the runtime. The name of the default
|
||||
level's configuration parameter is
|
||||
<code>fi.*</code>
|
||||
</p>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<title>Fault Injection Mechanism: AOP and AspectJ</title>
|
||||
<p>The foundation of Hadoop's FI framework includes a
|
||||
cross-cutting concept implemented by AspectJ. The following basic
|
||||
terms are important to remember:
|
||||
</p>
|
||||
<ul>
|
||||
<li>
|
||||
<strong>A cross-cutting concept</strong>
|
||||
(aspect) is behavior, and often data, that is used across the scope
|
||||
of a piece of software
|
||||
</li>
|
||||
<li>In AOP, the
|
||||
<strong>aspects</strong>
|
||||
provide a mechanism by which a cross-cutting concern can be
|
||||
specified in a modular way
|
||||
</li>
|
||||
<li>
|
||||
<strong>Advice</strong>
|
||||
is the
|
||||
code that is executed when an aspect is invoked
|
||||
</li>
|
||||
<li>
|
||||
<strong>Join point</strong>
|
||||
(or pointcut) is a specific
|
||||
point within the application that may or not invoke some advice
|
||||
</li>
|
||||
</ul>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<title>Existing Join Points</title>
|
||||
<p>
|
||||
The following readily available join points are provided by AspectJ:
|
||||
</p>
|
||||
<ul>
|
||||
<li>Join when a method is called
|
||||
</li>
|
||||
<li>Join during a method's execution
|
||||
</li>
|
||||
<li>Join when a constructor is invoked
|
||||
</li>
|
||||
<li>Join during a constructor's execution
|
||||
</li>
|
||||
<li>Join during aspect advice execution
|
||||
</li>
|
||||
<li>Join before an object is initialized
|
||||
</li>
|
||||
<li>Join during object initialization
|
||||
</li>
|
||||
<li>Join during static initializer execution
|
||||
</li>
|
||||
<li>Join when a class's field is referenced
|
||||
</li>
|
||||
<li>Join when a class's field is assigned
|
||||
</li>
|
||||
<li>Join when a handler is executed
|
||||
</li>
|
||||
</ul>
|
||||
</section>
|
||||
</section>
|
||||
<section>
|
||||
<title>Aspect Example</title>
|
||||
<source>
|
||||
package org.apache.hadoop.hdfs.server.datanode;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.fi.ProbabilityModel;
|
||||
import org.apache.hadoop.hdfs.server.datanode.DataNode;
|
||||
import org.apache.hadoop.util.DiskChecker.*;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.io.DataOutputStream;
|
||||
|
||||
/**
|
||||
* This aspect takes care about faults injected into datanode.BlockReceiver
|
||||
* class
|
||||
*/
|
||||
public aspect BlockReceiverAspects {
|
||||
public static final Log LOG = LogFactory.getLog(BlockReceiverAspects.class);
|
||||
|
||||
public static final String BLOCK_RECEIVER_FAULT="hdfs.datanode.BlockReceiver";
|
||||
pointcut callReceivePacket() : call (* OutputStream.write(..))
|
||||
&& withincode (* BlockReceiver.receivePacket(..))
|
||||
// to further limit the application of this aspect a very narrow 'target' can be used as follows
|
||||
// && target(DataOutputStream)
|
||||
&& !within(BlockReceiverAspects +);
|
||||
|
||||
before () throws IOException : callReceivePacket () {
|
||||
if (ProbabilityModel.injectCriteria(BLOCK_RECEIVER_FAULT)) {
|
||||
LOG.info("Before the injection point");
|
||||
Thread.dumpStack();
|
||||
throw new DiskOutOfSpaceException ("FI: injected fault point at " +
|
||||
thisJoinPoint.getStaticPart( ).getSourceLocation());
|
||||
}
|
||||
}
|
||||
}
|
||||
</source>
|
||||
|
||||
<p>The aspect has two main parts: </p>
|
||||
<ul>
|
||||
<li>The join point
|
||||
<code>pointcut callReceivepacket()</code>
|
||||
which servers as an identification mark of a specific point (in control
|
||||
and/or data flow) in the life of an application. </li>
|
||||
|
||||
<li> A call to the advice -
|
||||
<code>before () throws IOException : callReceivepacket()</code>
|
||||
- will be injected (see
|
||||
<a href="#Putting+it+all+together">Putting It All Together</a>)
|
||||
before that specific spot of the application's code.</li>
|
||||
</ul>
|
||||
|
||||
|
||||
<p>The pointcut identifies an invocation of class'
|
||||
<code>java.io.OutputStream write()</code>
|
||||
method
|
||||
with any number of parameters and any return type. This invoke should
|
||||
take place within the body of method
|
||||
<code>receivepacket()</code>
|
||||
from class<code>BlockReceiver</code>.
|
||||
The method can have any parameters and any return type.
|
||||
Possible invocations of
|
||||
<code>write()</code>
|
||||
method happening anywhere within the aspect
|
||||
<code>BlockReceiverAspects</code>
|
||||
or its heirs will be ignored.
|
||||
</p>
|
||||
<p><strong>Note 1</strong>: This short example doesn't illustrate
|
||||
the fact that you can have more than a single injection point per
|
||||
class. In such a case the names of the faults have to be different
|
||||
if a developer wants to trigger them separately.
|
||||
</p>
|
||||
<p><strong>Note 2</strong>: After the injection step (see
|
||||
<a href="#Putting+it+all+together">Putting It All Together</a>)
|
||||
you can verify that the faults were properly injected by
|
||||
searching for <code>ajc</code> keywords in a disassembled class file.
|
||||
</p>
|
||||
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<title>Fault Naming Convention and Namespaces</title>
|
||||
<p>For the sake of a unified naming
|
||||
convention the following two types of names are recommended for a
|
||||
new aspects development:</p>
|
||||
<ul>
|
||||
<li>Activity specific notation
|
||||
(when we don't care about a particular location of a fault's
|
||||
happening). In this case the name of the fault is rather abstract:
|
||||
<code>fi.hdfs.DiskError</code>
|
||||
</li>
|
||||
<li>Location specific notation.
|
||||
Here, the fault's name is mnemonic as in:
|
||||
<code>fi.hdfs.datanode.BlockReceiver[optional location details]</code>
|
||||
</li>
|
||||
</ul>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<title>Development Tools</title>
|
||||
<ul>
|
||||
<li>The Eclipse
|
||||
<a href="http://www.eclipse.org/ajdt/">AspectJ Development Toolkit</a>
|
||||
may help you when developing aspects
|
||||
</li>
|
||||
<li>IntelliJ IDEA provides AspectJ weaver and Spring-AOP plugins
|
||||
</li>
|
||||
</ul>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<title>Putting It All Together</title>
|
||||
<p>Faults (aspects) have to injected (or woven) together before
|
||||
they can be used. Follow these instructions:</p>
|
||||
|
||||
<ul>
|
||||
<li>To weave aspects in place use:
|
||||
<source>
|
||||
% ant injectfaults
|
||||
</source>
|
||||
</li>
|
||||
|
||||
<li>If you
|
||||
misidentified the join point of your aspect you will see a
|
||||
warning (similar to the one shown here) when 'injectfaults' target is
|
||||
completed:
|
||||
<source>
|
||||
[iajc] warning at
|
||||
src/test/aop/org/apache/hadoop/hdfs/server/datanode/ \
|
||||
BlockReceiverAspects.aj:44::0
|
||||
advice defined in org.apache.hadoop.hdfs.server.datanode.BlockReceiverAspects
|
||||
has not been applied [Xlint:adviceDidNotMatch]
|
||||
</source>
|
||||
</li>
|
||||
|
||||
<li>It isn't an error, so the build will report the successful result. <br />
|
||||
To prepare dev.jar file with all your faults weaved in place (HDFS-475 pending) use:
|
||||
<source>
|
||||
% ant jar-fault-inject
|
||||
</source>
|
||||
</li>
|
||||
|
||||
<li>To create test jars use:
|
||||
<source>
|
||||
% ant jar-test-fault-inject
|
||||
</source>
|
||||
</li>
|
||||
|
||||
<li>To run HDFS tests with faults injected use:
|
||||
<source>
|
||||
% ant run-test-hdfs-fault-inject
|
||||
</source>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
<section>
|
||||
<title>How to Use the Fault Injection Framework</title>
|
||||
<p>Faults can be triggered as follows:
|
||||
</p>
|
||||
<ul>
|
||||
<li>During runtime:
|
||||
<source>
|
||||
% ant run-test-hdfs -Dfi.hdfs.datanode.BlockReceiver=0.12
|
||||
</source>
|
||||
To set a certain level, for example 25%, of all injected faults use:
|
||||
<br/>
|
||||
<source>
|
||||
% ant run-test-hdfs-fault-inject -Dfi.*=0.25
|
||||
</source>
|
||||
</li>
|
||||
<li>From a program:
|
||||
|
||||
<source>
|
||||
package org.apache.hadoop.fs;
|
||||
|
||||
import org.junit.Test;
|
||||
import org.junit.Before;
|
||||
|
||||
public class DemoFiTest {
|
||||
public static final String BLOCK_RECEIVER_FAULT="hdfs.datanode.BlockReceiver";
|
||||
@Override
|
||||
@Before
|
||||
public void setUp() {
|
||||
//Setting up the test's environment as required
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFI() {
|
||||
// It triggers the fault, assuming that there's one called 'hdfs.datanode.BlockReceiver'
|
||||
System.setProperty("fi." + BLOCK_RECEIVER_FAULT, "0.12");
|
||||
//
|
||||
// The main logic of your tests goes here
|
||||
//
|
||||
// Now set the level back to 0 (zero) to prevent this fault from happening again
|
||||
System.setProperty("fi." + BLOCK_RECEIVER_FAULT, "0.0");
|
||||
// or delete its trigger completely
|
||||
System.getProperties().remove("fi." + BLOCK_RECEIVER_FAULT);
|
||||
}
|
||||
|
||||
@Override
|
||||
@After
|
||||
public void tearDown() {
|
||||
//Cleaning up test test environment
|
||||
}
|
||||
}
|
||||
</source>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
<p>
|
||||
As you can see above these two methods do the same thing. They are
|
||||
setting the probability level of <code>hdfs.datanode.BlockReceiver</code>
|
||||
at 12%. The difference, however, is that the program provides more
|
||||
flexibility and allows you to turn a fault off when a test no longer needs it.
|
||||
</p>
|
||||
</section>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<title>Additional Information and Contacts</title>
|
||||
<p>These two sources of information are particularly
|
||||
interesting and worth reading:
|
||||
</p>
|
||||
<ul>
|
||||
<li>
|
||||
<a href="http://www.eclipse.org/aspectj/doc/next/devguide/">
|
||||
http://www.eclipse.org/aspectj/doc/next/devguide/
|
||||
</a>
|
||||
</li>
|
||||
<li>AspectJ Cookbook (ISBN-13: 978-0-596-00654-9)
|
||||
</li>
|
||||
</ul>
|
||||
<p>If you have additional comments or questions for the author check
|
||||
<a href="http://issues.apache.org/jira/browse/HDFS-435">HDFS-435</a>.
|
||||
</p>
|
||||
</section>
|
||||
</body>
|
||||
</document>
|
|
@ -1,157 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" "http://forrest.apache.org/dtd/document-v20.dtd">
|
||||
|
||||
<document>
|
||||
|
||||
<header>
|
||||
<title>Offline Edits Viewer Guide</title>
|
||||
<authors>
|
||||
<person name="Erik Steffl" email="steffl@yahoo-inc.com"/>
|
||||
</authors>
|
||||
</header>
|
||||
|
||||
<body>
|
||||
|
||||
<section>
|
||||
|
||||
<title>Overview</title>
|
||||
|
||||
<p>
|
||||
Offline Edits Viewer is a tool to parse the Edits log file. The
|
||||
current processors are mostly useful for conversion between
|
||||
different formats, including XML which is human readable and
|
||||
easier to edit than native binary format.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
The tool can parse the edits formats -18 (roughly Hadoop 0.19)
|
||||
and later. The tool operates on files only, it does not need
|
||||
Hadoop cluster to be running.
|
||||
</p>
|
||||
|
||||
<p>Input formats supported:</p>
|
||||
<ol>
|
||||
<li><strong>binary</strong>: native binary format that Hadoop uses internally</li>
|
||||
<li>
|
||||
<strong>xml</strong>: XML format, as produced by
|
||||
<strong>xml</strong> processor, used if filename has xml
|
||||
(case insensitive) extension
|
||||
</li>
|
||||
</ol>
|
||||
|
||||
<p>
|
||||
The Offline Edits Viewer provides several output processors
|
||||
(unless stated otherwise the output of the processor can be
|
||||
converted back to original edits file):
|
||||
</p>
|
||||
<ol>
|
||||
<li><strong>binary</strong>: native binary format that Hadoop uses internally</li>
|
||||
<li><strong>xml</strong>: XML format</li>
|
||||
<li><strong>stats</strong>: prints out statistics, this cannot be converted back to Edits file</li>
|
||||
</ol>
|
||||
|
||||
</section> <!-- Overview -->
|
||||
|
||||
<section>
|
||||
|
||||
<title>Usage</title>
|
||||
|
||||
<p><code>bash$ bin/hdfs oev -i edits -o edits.xml</code></p>
|
||||
|
||||
<table>
|
||||
<tr><th>Flag</th><th>Description</th></tr>
|
||||
<tr>
|
||||
<td><code>[-i|--inputFile] <input file></code></td>
|
||||
<td>
|
||||
Specify the input edits log file to process. Xml (case
|
||||
insensitive) extension means XML format otherwise binary
|
||||
format is assumed. Required.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>[-o|--outputFile] <output file></code></td>
|
||||
<td>
|
||||
Specify the output filename, if the specified output processor
|
||||
generates one. If the specified file already exists, it is
|
||||
silently overwritten. Required.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>[-p|--processor] <processor></code></td>
|
||||
<td>
|
||||
Specify the image processor to apply against the image
|
||||
file. Currently valid options are <strong>binary</strong>,
|
||||
<strong>xml</strong> (default) and <strong>stats</strong>.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>[-v|--verbose]-</code></td>
|
||||
<td>
|
||||
Print the input and output filenames and pipe output of
|
||||
processor to console as well as specified file. On extremely
|
||||
large files, this may increase processing time by an order
|
||||
of magnitude.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>[-h|--help]</code></td>
|
||||
<td>
|
||||
Display the tool usage and help information and exit.
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
</section> <!-- Usage -->
|
||||
|
||||
<section>
|
||||
|
||||
<title>Case study: Hadoop cluster recovery</title>
|
||||
|
||||
<p>
|
||||
In case there is some problem with hadoop cluster and the edits
|
||||
file is corrupted it is possible to save at least part of the
|
||||
edits file that is correct. This can be done by converting the
|
||||
binary edits to XML, edit it manually and then convert it back
|
||||
to binary. The most common problem is that the edits file is
|
||||
missing the closing record (record that has opCode -1). This
|
||||
should be recognized by the tool and the XML format should be
|
||||
properly closed.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
If there is no closing record in the XML file you can add one
|
||||
after last correct record. Anything after the record with opCode
|
||||
-1 is ignored.
|
||||
</p>
|
||||
|
||||
<p>Example of a closing record (with opCode -1):</p>
|
||||
<source>
|
||||
<RECORD>
|
||||
<OPCODE>-1</OPCODE>
|
||||
<DATA>
|
||||
</DATA>
|
||||
</RECORD>
|
||||
</source>
|
||||
|
||||
</section> <!-- Case study: Hadoop cluster recovery -->
|
||||
|
||||
</body>
|
||||
|
||||
</document>
|
|
@ -1,427 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" "http://forrest.apache.org/dtd/document-v20.dtd">
|
||||
|
||||
<document>
|
||||
|
||||
<header>
|
||||
<title>Offline Image Viewer Guide</title>
|
||||
</header>
|
||||
|
||||
<body>
|
||||
|
||||
<section>
|
||||
<title>Overview</title>
|
||||
|
||||
<p>The Offline Image Viewer is a tool to dump the contents of hdfs
|
||||
fsimage files to human-readable formats in order to allow offline analysis
|
||||
and examination of an Hadoop cluster's namespace. The tool is able to
|
||||
process very large image files relatively quickly, converting them to
|
||||
one of several output formats. The tool handles the layout formats that
|
||||
were included with Hadoop versions 16 and up. If the tool is not able to
|
||||
process an image file, it will exit cleanly. The Offline Image Viewer does not require
|
||||
an Hadoop cluster to be running; it is entirely offline in its operation.</p>
|
||||
|
||||
<p>The Offline Image Viewer provides several output processors:</p>
|
||||
<ol>
|
||||
<li><strong>Ls</strong> is the default output processor. It closely mimics the format of
|
||||
the <code>lsr </code> command. It includes the same fields, in the same order, as
|
||||
<code>lsr </code>: directory or file flag, permissions, replication, owner, group,
|
||||
file size, modification date, and full path. Unlike the <code>lsr </code> command,
|
||||
the root path is included. One important difference between the output
|
||||
of the <code>lsr </code> command this processor, is that this output is not sorted
|
||||
by directory name and contents. Rather, the files are listed in the
|
||||
order in which they are stored in the fsimage file. Therefore, it is
|
||||
not possible to directly compare the output of the <code>lsr </code> command this
|
||||
this tool. The Ls processor uses information contained within the Inode blocks to
|
||||
calculate file sizes and ignores the <code>-skipBlocks</code> option.</li>
|
||||
<li><strong>Indented</strong> provides a more complete view of the fsimage's contents,
|
||||
including all of the information included in the image, such as image
|
||||
version, generation stamp and inode- and block-specific listings. This
|
||||
processor uses indentation to organize the output into a hierarchal manner.
|
||||
The <code>lsr </code> format is suitable for easy human comprehension.</li>
|
||||
<li><strong>Delimited</strong> provides one file per line consisting of the path,
|
||||
replication, modification time, access time, block size, number of blocks, file size,
|
||||
namespace quota, diskspace quota, permissions, username and group name. If run against
|
||||
an fsimage that does not contain any of these fields, the field's column will be included,
|
||||
but no data recorded. The default record delimiter is a tab, but this may be changed
|
||||
via the <code>-delimiter</code> command line argument. This processor is designed to
|
||||
create output that is easily analyzed by other tools, such as <a href="http://hadoop.apache.org/pig/">Apache Pig</a>.
|
||||
See the <a href="#analysis">Analyzing Results</a> section
|
||||
for further information on using this processor to analyze the contents of fsimage files.</li>
|
||||
<li><strong>XML</strong> creates an XML document of the fsimage and includes all of the
|
||||
information within the fsimage, similar to the <code>lsr </code> processor. The output
|
||||
of this processor is amenable to automated processing and analysis with XML tools.
|
||||
Due to the verbosity of the XML syntax, this processor will also generate
|
||||
the largest amount of output.</li>
|
||||
<li><strong>FileDistribution</strong> is the tool for analyzing file
|
||||
sizes in the namespace image. In order to run the tool one should
|
||||
define a range of integers <code>[0, maxSize]</code> by specifying
|
||||
<code>maxSize</code> and a <code>step</code>.
|
||||
The range of integers is divided into segments of size
|
||||
<code>step</code>:
|
||||
<code>[0, s</code><sub>1</sub><code>, ..., s</code><sub>n-1</sub><code>, maxSize]</code>,
|
||||
and the processor calculates how many files in the system fall into
|
||||
each segment <code>[s</code><sub>i-1</sub><code>, s</code><sub>i</sub><code>)</code>.
|
||||
Note that files larger than <code>maxSize</code> always fall into
|
||||
the very last segment.
|
||||
The output file is formatted as a tab separated two column table:
|
||||
Size and NumFiles. Where Size represents the start of the segment,
|
||||
and numFiles is the number of files form the image which size falls
|
||||
in this segment.</li>
|
||||
</ol>
|
||||
|
||||
</section> <!-- overview -->
|
||||
|
||||
<section>
|
||||
<title>Usage</title>
|
||||
|
||||
<section>
|
||||
<title>Basic</title>
|
||||
<p>The simplest usage of the Offline Image Viewer is to provide just an input and output
|
||||
file, via the <code>-i</code> and <code>-o</code> command-line switches:</p>
|
||||
|
||||
<p><code>bash$ bin/hdfs oiv -i fsimage -o fsimage.txt</code><br/></p>
|
||||
|
||||
<p>This will create a file named fsimage.txt in the current directory using
|
||||
the Ls output processor. For very large image files, this process may take
|
||||
several minutes.</p>
|
||||
|
||||
<p>One can specify which output processor via the command-line switch <code>-p</code>.
|
||||
For instance:</p>
|
||||
<p><code>bash$ bin/hdfs oiv -i fsimage -o fsimage.xml -p XML</code><br/></p>
|
||||
|
||||
<p>or</p>
|
||||
|
||||
<p><code>bash$ bin/hdfs oiv -i fsimage -o fsimage.txt -p Indented</code><br/></p>
|
||||
|
||||
<p>This will run the tool using either the XML or Indented output processor,
|
||||
respectively.</p>
|
||||
|
||||
<p>One command-line option worth considering is <code>-skipBlocks</code>, which
|
||||
prevents the tool from explicitly enumerating all of the blocks that make up
|
||||
a file in the namespace. This is useful for file systems that have very large
|
||||
files. Enabling this option can significantly decrease the size of the resulting
|
||||
output, as individual blocks are not included. Note, however, that the Ls processor
|
||||
needs to enumerate the blocks and so overrides this option.</p>
|
||||
|
||||
</section> <!-- Basic -->
|
||||
<section id="Example">
|
||||
<title>Example</title>
|
||||
|
||||
<p>Consider the following contrived namespace:</p>
|
||||
<source>
|
||||
drwxr-xr-x - theuser supergroup 0 2009-03-16 21:17 /anotherDir
|
||||
|
||||
-rw-r--r-- 3 theuser supergroup 286631664 2009-03-16 21:15 /anotherDir/biggerfile
|
||||
|
||||
-rw-r--r-- 3 theuser supergroup 8754 2009-03-16 21:17 /anotherDir/smallFile
|
||||
|
||||
drwxr-xr-x - theuser supergroup 0 2009-03-16 21:11 /mapredsystem
|
||||
|
||||
drwxr-xr-x - theuser supergroup 0 2009-03-16 21:11 /mapredsystem/theuser
|
||||
|
||||
drwxr-xr-x - theuser supergroup 0 2009-03-16 21:11 /mapredsystem/theuser/mapredsystem
|
||||
|
||||
drwx-wx-wx - theuser supergroup 0 2009-03-16 21:11 /mapredsystem/theuser/mapredsystem/ip.redacted.com
|
||||
|
||||
drwxr-xr-x - theuser supergroup 0 2009-03-16 21:12 /one
|
||||
|
||||
drwxr-xr-x - theuser supergroup 0 2009-03-16 21:12 /one/two
|
||||
|
||||
drwxr-xr-x - theuser supergroup 0 2009-03-16 21:16 /user
|
||||
|
||||
drwxr-xr-x - theuser supergroup 0 2009-03-16 21:19 /user/theuser
|
||||
</source>
|
||||
|
||||
<p>Applying the Offline Image Processor against this file with default options would result in the following output:</p>
|
||||
<source>
|
||||
machine:hadoop-0.21.0-dev theuser$ bin/hdfs oiv -i fsimagedemo -o fsimage.txt
|
||||
|
||||
drwxr-xr-x - theuser supergroup 0 2009-03-16 14:16 /
|
||||
|
||||
drwxr-xr-x - theuser supergroup 0 2009-03-16 14:17 /anotherDir
|
||||
|
||||
drwxr-xr-x - theuser supergroup 0 2009-03-16 14:11 /mapredsystem
|
||||
|
||||
drwxr-xr-x - theuser supergroup 0 2009-03-16 14:12 /one
|
||||
|
||||
drwxr-xr-x - theuser supergroup 0 2009-03-16 14:16 /user
|
||||
|
||||
-rw-r--r-- 3 theuser supergroup 286631664 2009-03-16 14:15 /anotherDir/biggerfile
|
||||
|
||||
-rw-r--r-- 3 theuser supergroup 8754 2009-03-16 14:17 /anotherDir/smallFile
|
||||
|
||||
drwxr-xr-x - theuser supergroup 0 2009-03-16 14:11 /mapredsystem/theuser
|
||||
|
||||
drwxr-xr-x - theuser supergroup 0 2009-03-16 14:11 /mapredsystem/theuser/mapredsystem
|
||||
|
||||
drwx-wx-wx - theuser supergroup 0 2009-03-16 14:11 /mapredsystem/theuser/mapredsystem/ip.redacted.com
|
||||
|
||||
drwxr-xr-x - theuser supergroup 0 2009-03-16 14:12 /one/two
|
||||
|
||||
drwxr-xr-x - theuser supergroup 0 2009-03-16 14:19 /user/theuser
|
||||
</source>
|
||||
|
||||
<p>Similarly, applying the Indented processor would generate output that begins with:</p>
|
||||
<source>
|
||||
machine:hadoop-0.21.0-dev theuser$ bin/hdfs oiv -i fsimagedemo -p Indented -o fsimage.txt
|
||||
|
||||
FSImage
|
||||
|
||||
ImageVersion = -19
|
||||
|
||||
NamespaceID = 2109123098
|
||||
|
||||
GenerationStamp = 1003
|
||||
|
||||
INodes [NumInodes = 12]
|
||||
|
||||
Inode
|
||||
|
||||
INodePath =
|
||||
|
||||
Replication = 0
|
||||
|
||||
ModificationTime = 2009-03-16 14:16
|
||||
|
||||
AccessTime = 1969-12-31 16:00
|
||||
|
||||
BlockSize = 0
|
||||
|
||||
Blocks [NumBlocks = -1]
|
||||
|
||||
NSQuota = 2147483647
|
||||
|
||||
DSQuota = -1
|
||||
|
||||
Permissions
|
||||
|
||||
Username = theuser
|
||||
|
||||
GroupName = supergroup
|
||||
|
||||
PermString = rwxr-xr-x
|
||||
|
||||
<EFBFBD><EFBFBD><EFBFBD>remaining output omitted<65><64><EFBFBD>
|
||||
</source>
|
||||
|
||||
</section> <!-- example-->
|
||||
|
||||
</section>
|
||||
|
||||
<section id="options">
|
||||
<title>Options</title>
|
||||
|
||||
<section>
|
||||
<title>Option Index</title>
|
||||
<table>
|
||||
<tr><th> Flag </th><th> Description </th></tr>
|
||||
<tr><td><code>[-i|--inputFile] <input file></code></td>
|
||||
<td>Specify the input fsimage file to process. Required.</td></tr>
|
||||
<tr><td><code>[-o|--outputFile] <output file></code></td>
|
||||
<td>Specify the output filename, if the specified output processor
|
||||
generates one. If the specified file already exists, it is silently overwritten. Required.
|
||||
</td></tr>
|
||||
<tr><td><code>[-p|--processor] <processor></code></td>
|
||||
<td>Specify the image processor to apply against the image file. Currently
|
||||
valid options are Ls (default), XML and Indented..
|
||||
</td></tr>
|
||||
<tr><td><code>-skipBlocks</code></td>
|
||||
<td>Do not enumerate individual blocks within files. This may save processing time
|
||||
and outfile file space on namespaces with very large files. The <code>Ls</code> processor reads
|
||||
the blocks to correctly determine file sizes and ignores this option.</td></tr>
|
||||
<tr><td><code>-printToScreen</code></td>
|
||||
<td>Pipe output of processor to console as well as specified file. On extremely
|
||||
large namespaces, this may increase processing time by an order of magnitude.</td></tr>
|
||||
<tr><td><code>-delimiter <arg></code></td>
|
||||
<td>When used in conjunction with the Delimited processor, replaces the default
|
||||
tab delimiter with the string specified by <code>arg</code>.</td></tr>
|
||||
<tr><td><code>[-h|--help]</code></td>
|
||||
<td>Display the tool usage and help information and exit.</td></tr>
|
||||
</table>
|
||||
</section> <!-- options -->
|
||||
</section>
|
||||
|
||||
<section id="analysis">
|
||||
<title>Analyzing Results</title>
|
||||
<p>The Offline Image Viewer makes it easy to gather large amounts of data about the hdfs namespace.
|
||||
This information can then be used to explore file system usage patterns or find
|
||||
specific files that match arbitrary criteria, along with other types of namespace analysis. The Delimited
|
||||
image processor in particular creates
|
||||
output that is amenable to further processing by tools such as <a href="http://hadoop.apache.org/pig/">Apache Pig</a>. Pig provides a particularly
|
||||
good choice for analyzing these data as it is able to deal with the output generated from a small fsimage
|
||||
but also scales up to consume data from extremely large file systems.</p>
|
||||
<p>The Delimited image processor generates lines of text separated, by default, by tabs and includes
|
||||
all of the fields that are common between constructed files and files that were still under constructed
|
||||
when the fsimage was generated. Examples scripts are provided demonstrating how to use this output to
|
||||
accomplish three tasks: determine the number of files each user has created on the file system,
|
||||
find files were created but have not accessed, and find probable duplicates of large files by comparing
|
||||
the size of each file.</p>
|
||||
<p>Each of the following scripts assumes you have generated an output file using the Delimited processor named
|
||||
<code>foo</code> and will be storing the results of the Pig analysis in a file named <code>results</code>.</p>
|
||||
<section>
|
||||
<title>Total Number of Files for Each User</title>
|
||||
<p>This script processes each path within the namespace, groups them by the file owner and determines the total
|
||||
number of files each user owns.</p>
|
||||
<p><strong>numFilesOfEachUser.pig:</strong></p>
|
||||
<source>
|
||||
-- This script determines the total number of files each user has in
|
||||
-- the namespace. Its output is of the form:
|
||||
-- username, totalNumFiles
|
||||
|
||||
-- Load all of the fields from the file
|
||||
A = LOAD '$inputFile' USING PigStorage('\t') AS (path:chararray,
|
||||
replication:int,
|
||||
modTime:chararray,
|
||||
accessTime:chararray,
|
||||
blockSize:long,
|
||||
numBlocks:int,
|
||||
fileSize:long,
|
||||
NamespaceQuota:int,
|
||||
DiskspaceQuota:int,
|
||||
perms:chararray,
|
||||
username:chararray,
|
||||
groupname:chararray);
|
||||
|
||||
|
||||
-- Grab just the path and username
|
||||
B = FOREACH A GENERATE path, username;
|
||||
|
||||
-- Generate the sum of the number of paths for each user
|
||||
C = FOREACH (GROUP B BY username) GENERATE group, COUNT(B.path);
|
||||
|
||||
-- Save results
|
||||
STORE C INTO '$outputFile';
|
||||
</source>
|
||||
<p>This script can be run against pig with the following command:</p>
|
||||
<p><code>bin/pig -x local -param inputFile=../foo -param outputFile=../results ../numFilesOfEachUser.pig</code><br/></p>
|
||||
<p>The output file's content will be similar to that below:</p>
|
||||
<p>
|
||||
<code>bart 1</code><br/>
|
||||
<code>lisa 16</code><br/>
|
||||
<code>homer 28</code><br/>
|
||||
<code>marge 2456</code><br/>
|
||||
</p>
|
||||
</section>
|
||||
|
||||
<section><title>Files That Have Never Been Accessed</title>
|
||||
<p>This script finds files that were created but whose access times were never changed, meaning they were never opened or viewed.</p>
|
||||
<p><strong>neverAccessed.pig:</strong></p>
|
||||
<source>
|
||||
-- This script generates a list of files that were created but never
|
||||
-- accessed, based on their AccessTime
|
||||
|
||||
-- Load all of the fields from the file
|
||||
A = LOAD '$inputFile' USING PigStorage('\t') AS (path:chararray,
|
||||
replication:int,
|
||||
modTime:chararray,
|
||||
accessTime:chararray,
|
||||
blockSize:long,
|
||||
numBlocks:int,
|
||||
fileSize:long,
|
||||
NamespaceQuota:int,
|
||||
DiskspaceQuota:int,
|
||||
perms:chararray,
|
||||
username:chararray,
|
||||
groupname:chararray);
|
||||
|
||||
-- Grab just the path and last time the file was accessed
|
||||
B = FOREACH A GENERATE path, accessTime;
|
||||
|
||||
-- Drop all the paths that don't have the default assigned last-access time
|
||||
C = FILTER B BY accessTime == '1969-12-31 16:00';
|
||||
|
||||
-- Drop the accessTimes, since they're all the same
|
||||
D = FOREACH C GENERATE path;
|
||||
|
||||
-- Save results
|
||||
STORE D INTO '$outputFile';
|
||||
</source>
|
||||
<p>This script can be run against pig with the following command and its output file's content will be a list of files that were created but never viewed afterwards.</p>
|
||||
<p><code>bin/pig -x local -param inputFile=../foo -param outputFile=../results ../neverAccessed.pig</code><br/></p>
|
||||
</section>
|
||||
<section><title>Probable Duplicated Files Based on File Size</title>
|
||||
<p>This script groups files together based on their size, drops any that are of less than 100mb and returns a list of the file size, number of files found and a tuple of the file paths. This can be used to find likely duplicates within the filesystem namespace.</p>
|
||||
|
||||
<p><strong>probableDuplicates.pig:</strong></p>
|
||||
<source>
|
||||
-- This script finds probable duplicate files greater than 100 MB by
|
||||
-- grouping together files based on their byte size. Files of this size
|
||||
-- with exactly the same number of bytes can be considered probable
|
||||
-- duplicates, but should be checked further, either by comparing the
|
||||
-- contents directly or by another proxy, such as a hash of the contents.
|
||||
-- The scripts output is of the type:
|
||||
-- fileSize numProbableDuplicates {(probableDup1), (probableDup2)}
|
||||
|
||||
-- Load all of the fields from the file
|
||||
A = LOAD '$inputFile' USING PigStorage('\t') AS (path:chararray,
|
||||
replication:int,
|
||||
modTime:chararray,
|
||||
accessTime:chararray,
|
||||
blockSize:long,
|
||||
numBlocks:int,
|
||||
fileSize:long,
|
||||
NamespaceQuota:int,
|
||||
DiskspaceQuota:int,
|
||||
perms:chararray,
|
||||
username:chararray,
|
||||
groupname:chararray);
|
||||
|
||||
-- Grab the pathname and filesize
|
||||
B = FOREACH A generate path, fileSize;
|
||||
|
||||
-- Drop files smaller than 100 MB
|
||||
C = FILTER B by fileSize > 100L * 1024L * 1024L;
|
||||
|
||||
-- Gather all the files of the same byte size
|
||||
D = GROUP C by fileSize;
|
||||
|
||||
-- Generate path, num of duplicates, list of duplicates
|
||||
E = FOREACH D generate group AS fileSize, COUNT(C) as numDupes, C.path AS files;
|
||||
|
||||
-- Drop all the files where there are only one of them
|
||||
F = FILTER E by numDupes > 1L;
|
||||
|
||||
-- Sort by the size of the files
|
||||
G = ORDER F by fileSize;
|
||||
|
||||
-- Save results
|
||||
STORE G INTO '$outputFile';
|
||||
</source>
|
||||
<p>This script can be run against pig with the following command:</p>
|
||||
<p><code>bin/pig -x local -param inputFile=../foo -param outputFile=../results ../probableDuplicates.pig</code><br/></p>
|
||||
<p> The output file's content will be similar to that below:</p>
|
||||
|
||||
<source>
|
||||
1077288632 2 {(/user/tennant/work1/part-00501),(/user/tennant/work1/part-00993)}
|
||||
1077288664 4 {(/user/tennant/work0/part-00567),(/user/tennant/work0/part-03980),(/user/tennant/work1/part-00725),(/user/eccelston/output/part-03395)}
|
||||
1077288668 3 {(/user/tennant/work0/part-03705),(/user/tennant/work0/part-04242),(/user/tennant/work1/part-03839)}
|
||||
1077288698 2 {(/user/tennant/work0/part-00435),(/user/eccelston/output/part-01382)}
|
||||
1077288702 2 {(/user/tennant/work0/part-03864),(/user/eccelston/output/part-03234)}
|
||||
</source>
|
||||
<p>Each line includes the file size in bytes that was found to be duplicated, the number of duplicates found, and a list of the duplicated paths.
|
||||
Files less than 100MB are ignored, providing a reasonable likelihood that files of these exact sizes may be duplicates.</p>
|
||||
</section>
|
||||
</section>
|
||||
|
||||
|
||||
</body>
|
||||
|
||||
</document>
|
|
@ -1,260 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN"
|
||||
"http://forrest.apache.org/dtd/document-v20.dtd">
|
||||
|
||||
|
||||
<document>
|
||||
|
||||
<header>
|
||||
<title>
|
||||
Permissions Guide
|
||||
</title>
|
||||
</header>
|
||||
|
||||
<body>
|
||||
<section> <title>Overview</title>
|
||||
<p>
|
||||
The Hadoop Distributed File System (HDFS) implements a permissions model for files and directories that shares much of the POSIX model.
|
||||
Each file and directory is associated with an <em>owner</em> and a <em>group</em>. The file or directory has separate permissions for the
|
||||
user that is the owner, for other users that are members of the group, and for all other users.
|
||||
|
||||
For files, the <em>r</em> permission is required to read the file, and the <em>w</em> permission is required to write or append to the file.
|
||||
|
||||
For directories, the <em>r</em> permission is required to list the contents of the directory, the <em>w</em> permission is required to create
|
||||
or delete files or directories, and the <em>x</em> permission is required to access a child of the directory.
|
||||
</p>
|
||||
<p>
|
||||
In contrast to the POSIX model, there are no <em>setuid</em> or <em>setgid</em> bits for files as there is no notion of executable files.
|
||||
For directories, there are no <em>setuid</em> or <em>setgid</em> bits directory as a simplification. The <em>Sticky bit</em> can be set
|
||||
on directories, preventing anyone except the superuser, directory owner or file owner from deleting or moving the files within the directory.
|
||||
Setting the sticky bit for a file has no effect. Collectively, the permissions of a file or directory are its <em>mode</em>. In general, Unix
|
||||
customs for representing and displaying modes will be used, including the use of octal numbers in this description. When a file or directory
|
||||
is created, its owner is the user identity of the client process, and its group is the group of the parent directory (the BSD rule).
|
||||
</p>
|
||||
<p>
|
||||
Each client process that accesses HDFS has a two-part identity composed of the <em>user name</em>, and <em>groups list</em>.
|
||||
Whenever HDFS must do a permissions check for a file or directory <code>foo</code> accessed by a client process,
|
||||
</p>
|
||||
<ul>
|
||||
<li>
|
||||
If the user name matches the owner of <code>foo</code>, then the owner permissions are tested;
|
||||
</li>
|
||||
<li>
|
||||
Else if the group of <code>foo</code> matches any of member of the groups list, then the group permissions are tested;
|
||||
</li>
|
||||
<li>
|
||||
Otherwise the other permissions of <code>foo</code> are tested.
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
<p>
|
||||
If a permissions check fails, the client operation fails.
|
||||
</p>
|
||||
</section>
|
||||
|
||||
<section><title>User Identity</title>
|
||||
<p>
|
||||
As of Hadoop 0.22, Hadoop supports two different modes of operation to determine the user's identity, specified by the
|
||||
<code>hadoop.security.authentication</code> property:
|
||||
</p>
|
||||
<dl>
|
||||
<dt><code>simple</code></dt>
|
||||
<dd>In this mode of operation, the identity of a client process is determined by the host operating system. On Unix-like systems,
|
||||
the user name is the equivalent of <code>`whoami`</code>.</dd>
|
||||
<dt><code>kerberos</code></dt>
|
||||
<dd>In Kerberized operation, the identity of a client process is determined by its Kerberos credentials. For example, in a
|
||||
Kerberized environment, a user may use the <code>kinit</code> utility to obtain a Kerberos ticket-granting-ticket (TGT) and
|
||||
use <code>klist</code> to determine their current principal. When mapping a Kerberos principal to an HDFS username, all <em>components</em> except for the <em>primary</em> are dropped. For example, a principal <code>todd/foobar@CORP.COMPANY.COM</code> will act as the simple username <code>todd</code> on HDFS.
|
||||
</dd>
|
||||
</dl>
|
||||
<p>
|
||||
Regardless of the mode of operation, the user identity mechanism is extrinsic to HDFS itself.
|
||||
There is no provision within HDFS for creating user identities, establishing groups, or processing user credentials.
|
||||
</p>
|
||||
</section>
|
||||
|
||||
<section><title>Group Mapping</title>
|
||||
<p>
|
||||
Once a username has been determined as described above, the list of groups is
|
||||
determined by a <em>group mapping service</em>, configured by the
|
||||
<code>hadoop.security.group.mapping</code> property. Refer to the
|
||||
core-default.xml for details of the <code>hadoop.security.group.mapping</code>
|
||||
implementation.
|
||||
</p>
|
||||
<p>
|
||||
An alternate implementation, which connects directly to an LDAP server to resolve the list of groups, is available
|
||||
via <code>org.apache.hadoop.security.LdapGroupsMapping</code>. However, this provider should only be used if the
|
||||
required groups reside exclusively in LDAP, and are not materialized on the Unix servers. More information on
|
||||
configuring the group mapping service is available in the Javadocs.
|
||||
</p>
|
||||
<p>
|
||||
For HDFS, the mapping of users to groups is performed on the NameNode. Thus, the host system configuration of
|
||||
the NameNode determines the group mappings for the users.
|
||||
</p>
|
||||
<p>
|
||||
Note that HDFS stores the user and group of a file or directory as strings; there is no conversion from user and
|
||||
group identity numbers as is conventional in Unix.
|
||||
</p>
|
||||
|
||||
</section>
|
||||
|
||||
<section> <title>Understanding the Implementation</title>
|
||||
<p>
|
||||
Each file or directory operation passes the full path name to the name node, and the permissions checks are applied along the
|
||||
path for each operation. The client framework will implicitly associate the user identity with the connection to the name node,
|
||||
reducing the need for changes to the existing client API. It has always been the case that when one operation on a file succeeds,
|
||||
the operation might fail when repeated because the file, or some directory on the path, no longer exists. For instance, when the
|
||||
client first begins reading a file, it makes a first request to the name node to discover the location of the first blocks of the file.
|
||||
A second request made to find additional blocks may fail. On the other hand, deleting a file does not revoke access by a client
|
||||
that already knows the blocks of the file. With the addition of permissions, a client's access to a file may be withdrawn between
|
||||
requests. Again, changing permissions does not revoke the access of a client that already knows the file's blocks.
|
||||
</p>
|
||||
</section>
|
||||
|
||||
<section> <title>Changes to the File System API</title>
|
||||
<p>
|
||||
All methods that use a path parameter will throw <code>AccessControlException</code> if permission checking fails.
|
||||
</p>
|
||||
<p>New methods:</p>
|
||||
<ul>
|
||||
<li>
|
||||
<code>public FSDataOutputStream create(Path f, FsPermission permission, boolean overwrite, int bufferSize, short
|
||||
replication, long blockSize, Progressable progress) throws IOException;</code>
|
||||
</li>
|
||||
<li>
|
||||
<code>public boolean mkdirs(Path f, FsPermission permission) throws IOException;</code>
|
||||
</li>
|
||||
<li>
|
||||
<code>public void setPermission(Path p, FsPermission permission) throws IOException;</code>
|
||||
</li>
|
||||
<li>
|
||||
<code>public void setOwner(Path p, String username, String groupname) throws IOException;</code>
|
||||
</li>
|
||||
<li>
|
||||
<code>public FileStatus getFileStatus(Path f) throws IOException;</code> will additionally return the user,
|
||||
group and mode associated with the path.
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
<p>
|
||||
The mode of a new file or directory is restricted my the <code>umask</code> set as a configuration parameter.
|
||||
When the existing <code>create(path, …)</code> method (<em>without</em> the permission parameter)
|
||||
is used, the mode of the new file is <code>666 & ^umask</code>. When the
|
||||
new <code>create(path, </code><em>permission</em><code>, …)</code> method
|
||||
(<em>with</em> the permission parameter <em>P</em>) is used, the mode of the new file is
|
||||
<code>P & ^umask & 666</code>. When a new directory is
|
||||
created with the existing <code>mkdirs(path)</code> method (<em>without</em> the permission parameter),
|
||||
the mode of the new directory is <code>777 & ^umask</code>. When the
|
||||
new <code>mkdirs(path, </code><em>permission</em> <code>)</code> method (<em>with</em> the
|
||||
permission parameter <em>P</em>) is used, the mode of new directory is
|
||||
<code>P & ^umask & 777</code>.
|
||||
</p>
|
||||
</section>
|
||||
|
||||
|
||||
<section> <title>Changes to the Application Shell</title>
|
||||
<p>New operations:</p>
|
||||
<ul>
|
||||
<li><code>chmod [-R]</code> <em>mode file …</em>
|
||||
<br />Only the owner of a file or the super-user is permitted to change the mode of a file.
|
||||
</li>
|
||||
|
||||
<li><code>chgrp [-R]</code> <em>group file …</em>
|
||||
<br />The user invoking <code>chgrp</code> must belong to the specified group and be the owner of the file, or be the super-user.
|
||||
</li>
|
||||
|
||||
<li><code>chown [-R]</code> <em>[owner][:[group]] file …</em>
|
||||
<br />The owner of a file may only be altered by a super-user.
|
||||
</li>
|
||||
|
||||
<li><code>ls </code> <em>file …</em>
|
||||
</li>
|
||||
|
||||
<li><code>lsr </code> <em>file …</em>
|
||||
<br />The output is reformatted to display the owner, group and mode.
|
||||
</li>
|
||||
</ul>
|
||||
</section>
|
||||
|
||||
|
||||
<section> <title>The Super-User</title>
|
||||
<p>
|
||||
The super-user is the user with the same identity as name node process itself. Loosely, if you started the name
|
||||
node, then you are the super-user. The super-user can do anything in that permissions checks never fail for the
|
||||
super-user. There is no persistent notion of who <em>was</em> the super-user; when the name node is started
|
||||
the process identity determines who is the super-user <em>for now</em>. The HDFS super-user does not have
|
||||
to be the super-user of the name node host, nor is it necessary that all clusters have the same super-user. Also,
|
||||
an experimenter running HDFS on a personal workstation, conveniently becomes that installation's super-user
|
||||
without any configuration.
|
||||
</p>
|
||||
<p>
|
||||
In addition, the administrator my identify a distinguished group using a configuration parameter. If set, members
|
||||
of this group are also super-users.
|
||||
</p>
|
||||
</section>
|
||||
|
||||
<section> <title>The Web Server</title>
|
||||
<p>
|
||||
By default, the identity of the web server is a configuration parameter. That is, the name node has no notion of the identity of
|
||||
the <em>real</em> user, but the web server behaves as if it has the identity (user and groups) of a user chosen
|
||||
by the administrator. Unless the chosen identity matches the super-user, parts of the name space may be inaccessible
|
||||
to the web server.</p>
|
||||
</section>
|
||||
|
||||
<section> <title>Configuration Parameters</title>
|
||||
<ul>
|
||||
<li><code>dfs.permissions = true </code>
|
||||
<br />If <code>yes</code> use the permissions system as described here. If <code>no</code>, permission
|
||||
<em>checking</em> is turned off, but all other behavior is unchanged. Switching from one parameter
|
||||
value to the other does not change the mode, owner or group of files or directories.
|
||||
<br />Regardless of whether permissions are on or off, <code>chmod</code>, <code>chgrp</code> and
|
||||
<code>chown</code> <em>always</em> check permissions. These functions are only useful in the
|
||||
permissions context, and so there is no backwards compatibility issue. Furthermore, this allows
|
||||
administrators to reliably set owners and permissions in advance of turning on regular permissions checking.
|
||||
</li>
|
||||
|
||||
<li><code>dfs.web.ugi = webuser,webgroup</code>
|
||||
<br />The user name to be used by the web server. Setting this to the name of the super-user allows any
|
||||
web client to see everything. Changing this to an otherwise unused identity allows web clients to see
|
||||
only those things visible using "other" permissions. Additional groups may be added to the comma-separated list.
|
||||
</li>
|
||||
|
||||
<li><code>dfs.permissions.superusergroup = supergroup</code>
|
||||
<br />The name of the group of super-users.
|
||||
</li>
|
||||
|
||||
<li><code>fs.permissions.umask-mode = 022</code>
|
||||
<br />The <code>umask</code> used when creating files and directories. For configuration files, the decimal
|
||||
value <em>18<sub>10</sub></em> may be used.
|
||||
</li>
|
||||
|
||||
<li><code>dfs.cluster.administrators = ACL-for-admins></code>
|
||||
<br />The administrators for the cluster specified as an ACL. This
|
||||
controls who can access the default servlets, etc. in the
|
||||
HDFS.
|
||||
</li>
|
||||
</ul>
|
||||
</section>
|
||||
|
||||
|
||||
</body>
|
||||
</document>
|
||||
|
||||
|
|
@ -1,113 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" "http://forrest.apache.org/dtd/document-v20.dtd">
|
||||
|
||||
<document>
|
||||
|
||||
<header> <title>Quotas Guide</title> </header>
|
||||
|
||||
<body>
|
||||
|
||||
<section> <title>Overview</title>
|
||||
|
||||
<p> The Hadoop Distributed File System (HDFS) allows the <strong>administrator</strong> to set quotas for the number of names used and the
|
||||
amount of space used for individual directories. Name quotas and space quotas operate independently, but the administration and
|
||||
implementation of the two types of quotas are closely parallel. </p>
|
||||
</section>
|
||||
|
||||
<section> <title>Name Quotas</title>
|
||||
|
||||
<p> The name quota is a hard limit on the number of file and directory names in the tree rooted at that directory. File and
|
||||
directory creations fail if the quota would be exceeded. Quotas stick with renamed directories; the rename operation fails if
|
||||
operation would result in a quota violation. The attempt to set a quota will still succeed even if the directory would be in violation of the new
|
||||
quota. A newly created directory has no associated quota. The largest quota is <code>Long.Max_Value</code>. A quota of one
|
||||
forces a directory to remain empty. (Yes, a directory counts against its own quota!) </p>
|
||||
|
||||
<p> Quotas are persistent with the <code>fsimage</code>. When starting, if the <code>fsimage</code> is immediately in
|
||||
violation of a quota (perhaps the <code>fsimage</code> was surreptitiously modified),
|
||||
a warning is printed for each of such violations. Setting or removing a quota creates a journal entry. </p> </section>
|
||||
|
||||
<section> <title>Space Quotas</title>
|
||||
|
||||
<p> The space quota is a hard limit on the number of bytes used by files in the tree rooted at that directory. Block
|
||||
allocations fail if the quota would not allow a full block to be written. Each replica of a block counts against the quota. Quotas
|
||||
stick with renamed directories; the rename operation fails if the operation would result in a quota violation. A newly created directory has no associated quota.
|
||||
The largest quota is <code>Long.Max_Value</code>. A quota of zero still permits files to be created, but no blocks can be added to the files.
|
||||
Directories don't use host file system space and don't count against the space quota. The host file system space used to save
|
||||
the file meta data is not counted against the quota. Quotas are charged at the intended replication factor for the file;
|
||||
changing the replication factor for a file will credit or debit quotas. </p>
|
||||
|
||||
<p> Quotas are persistent with the <code>fsimage</code>. When starting, if the <code>fsimage</code> is immediately in
|
||||
violation of a quota (perhaps the <code>fsimage</code> was surreptitiously modified), a warning is printed for
|
||||
each of such violations. Setting or removing a quota creates a journal entry. </p>
|
||||
|
||||
</section>
|
||||
|
||||
<section>
|
||||
|
||||
<title>Administrative Commands</title>
|
||||
|
||||
<p> Quotas are managed by a set of commands available only to the administrator. </p>
|
||||
|
||||
<ul>
|
||||
|
||||
<li> <code>dfsadmin -setQuota <N> <directory>...<directory></code> <br /> Set the name quota to be <code>N</code> for
|
||||
each directory. Best effort for each directory, with faults reported if <code>N</code> is not a positive long integer, the
|
||||
directory does not exist or it is a file, or the directory would immediately exceed the new quota. </li>
|
||||
|
||||
<li> <code>dfsadmin -clrQuota <directory>...<director></code><br /> Remove any name quota for each directory. Best
|
||||
effort for each directory, with faults reported if the directory does not exist or it is a file. It is not a fault if the
|
||||
directory has no quota. </li>
|
||||
|
||||
<li> <code>dfsadmin -setSpaceQuota <N> <directory>...<directory></code> <br /> Set the space quota to be
|
||||
N bytes for each directory. This is a hard limit on total size of all the files under the directory tree.
|
||||
The space quota takes replication also into account, i.e. one GB of data with replication of 3 consumes 3GB of quota. N can also be specified with a binary prefix for convenience, for e.g. 50g for 50 gigabytes and
|
||||
2t for 2 terabytes etc. Best effort for each directory, with faults reported if <code>N</code> is
|
||||
neither zero nor a positive integer, the directory does not exist or it is a file, or the directory would immediately exceed
|
||||
the new quota. </li>
|
||||
|
||||
<li> <code>dfsadmin -clrSpaceQuota <directory>...<director></code><br /> Remove any space quota for each directory. Best
|
||||
effort for each directory, with faults reported if the directory does not exist or it is a file. It is not a fault if the
|
||||
directory has no quota. </li>
|
||||
|
||||
</ul>
|
||||
|
||||
</section>
|
||||
|
||||
<section>
|
||||
|
||||
<title>Reporting Command</title>
|
||||
|
||||
<p> An an extension to the <code>count</code> command of the HDFS shell reports quota values and the current count of names and bytes in use. </p>
|
||||
|
||||
<ul>
|
||||
|
||||
<li>
|
||||
|
||||
<code>fs -count -q <directory>...<directory></code><br /> With the <code>-q</code> option, also report the name quota
|
||||
value set for each directory, the available name quota remaining, the space quota value set, and the available space quota
|
||||
remaining. If the directory does not have a quota set, the reported values are <code>none</code> and <code>inf</code>.
|
||||
|
||||
</li>
|
||||
|
||||
</ul> </section>
|
||||
|
||||
</body>
|
||||
|
||||
</document>
|
|
@ -1,681 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN"
|
||||
"http://forrest.apache.org/dtd/document-v20.dtd">
|
||||
|
||||
|
||||
<document>
|
||||
|
||||
<header>
|
||||
<title>
|
||||
HDFS Users Guide
|
||||
</title>
|
||||
</header>
|
||||
|
||||
<body>
|
||||
<section> <title>Purpose</title>
|
||||
<p>
|
||||
This document is a starting point for users working with
|
||||
Hadoop Distributed File System (HDFS) either as a part of a Hadoop cluster
|
||||
or as a stand-alone general purpose distributed file system.
|
||||
While HDFS is designed to "just work" in many environments, a working
|
||||
knowledge of HDFS helps greatly with configuration improvements and
|
||||
diagnostics on a specific cluster.
|
||||
</p>
|
||||
</section>
|
||||
|
||||
<section> <title> Overview </title>
|
||||
<p>
|
||||
HDFS is the primary distributed storage used by Hadoop applications. A
|
||||
HDFS cluster primarily consists of a NameNode that manages the
|
||||
file system metadata and DataNodes that store the actual data. The
|
||||
<a href="hdfs_design.html">HDFS Architecture Guide</a> describes HDFS in detail. This user guide primarily deals with
|
||||
the interaction of users and administrators with HDFS clusters.
|
||||
The <a href="images/hdfsarchitecture.gif">HDFS architecture diagram</a> depicts
|
||||
basic interactions among NameNode, the DataNodes, and the clients.
|
||||
Clients contact NameNode for file metadata or file modifications and perform
|
||||
actual file I/O directly with the DataNodes.
|
||||
</p>
|
||||
<p>
|
||||
The following are some of the salient features that could be of
|
||||
interest to many users.
|
||||
</p>
|
||||
<ul>
|
||||
<li>
|
||||
Hadoop, including HDFS, is well suited for distributed storage
|
||||
and distributed processing using commodity hardware. It is fault
|
||||
tolerant, scalable, and extremely simple to expand. MapReduce,
|
||||
well known for its simplicity and applicability for large set of
|
||||
distributed applications, is an integral part of Hadoop.
|
||||
</li>
|
||||
<li>
|
||||
HDFS is highly configurable with a default configuration well
|
||||
suited for many installations. Most of the time, configuration
|
||||
needs to be tuned only for very large clusters.
|
||||
</li>
|
||||
<li>
|
||||
Hadoop is written in Java and is supported on all major platforms.
|
||||
</li>
|
||||
<li>
|
||||
Hadoop supports shell-like commands to interact with HDFS directly.
|
||||
</li>
|
||||
<li>
|
||||
The NameNode and Datanodes have built in web servers that makes it
|
||||
easy to check current status of the cluster.
|
||||
</li>
|
||||
<li>
|
||||
New features and improvements are regularly implemented in HDFS.
|
||||
The following is a subset of useful features in HDFS:
|
||||
<ul>
|
||||
<li>
|
||||
File permissions and authentication.
|
||||
</li>
|
||||
<li>
|
||||
<em>Rack awareness</em>: to take a node's physical location into
|
||||
account while scheduling tasks and allocating storage.
|
||||
</li>
|
||||
<li>
|
||||
Safemode: an administrative mode for maintenance.
|
||||
</li>
|
||||
<li>
|
||||
<code>fsck</code>: a utility to diagnose health of the file system, to
|
||||
find missing files or blocks.
|
||||
</li>
|
||||
<li>
|
||||
<code>fetchdt</code>: a utility to fetch DelegationToken and store it
|
||||
in a file on the local system.
|
||||
</li>
|
||||
<li>
|
||||
Rebalancer: tool to balance the cluster when the data is
|
||||
unevenly distributed among DataNodes.
|
||||
</li>
|
||||
<li>
|
||||
Upgrade and rollback: after a software upgrade,
|
||||
it is possible to
|
||||
rollback to HDFS' state before the upgrade in case of unexpected
|
||||
problems.
|
||||
</li>
|
||||
<li>
|
||||
Secondary NameNode: performs periodic checkpoints of the
|
||||
namespace and helps keep the size of file containing log of HDFS
|
||||
modifications within certain limits at the NameNode.
|
||||
</li>
|
||||
|
||||
<li>
|
||||
Checkpoint node: performs periodic checkpoints of the namespace and
|
||||
helps minimize the size of the log stored at the NameNode
|
||||
containing changes to the HDFS.
|
||||
Replaces the role previously filled by the Secondary NameNode,
|
||||
though is not yet battle hardened.
|
||||
The NameNode allows multiple Checkpoint nodes simultaneously,
|
||||
as long as there are no Backup nodes registered with the system.
|
||||
</li>
|
||||
<li>
|
||||
Backup node: An extension to the Checkpoint node.
|
||||
In addition to checkpointing it also receives a stream of edits
|
||||
from the NameNode and maintains its own in-memory copy of the namespace,
|
||||
which is always in sync with the active NameNode namespace state.
|
||||
Only one Backup node may be registered with the NameNode at once.
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
</section> <section> <title> Prerequisites </title>
|
||||
<p>
|
||||
The following documents describe how to install and set up a Hadoop cluster:
|
||||
</p>
|
||||
<ul>
|
||||
<li>
|
||||
<a href="http://hadoop.apache.org/common/docs/current/single_node_setup.html">Single Node Setup</a>
|
||||
for first-time users.
|
||||
</li>
|
||||
<li>
|
||||
<a href="http://hadoop.apache.org/common/docs/current/cluster_setup.html">Cluster Setup</a>
|
||||
for large, distributed clusters.
|
||||
</li>
|
||||
</ul>
|
||||
<p>
|
||||
The rest of this document assumes the user is able to set up and run a
|
||||
HDFS with at least one DataNode. For the purpose of this document,
|
||||
both the NameNode and DataNode could be running on the same physical
|
||||
machine.
|
||||
</p>
|
||||
|
||||
</section> <section> <title> Web Interface </title>
|
||||
<p>
|
||||
NameNode and DataNode each run an internal web server in order to
|
||||
display basic information about the current status of the cluster.
|
||||
With the default configuration, the NameNode front page is at
|
||||
<code>http://namenode-name:50070/</code>.
|
||||
It lists the DataNodes in the cluster and basic statistics of the
|
||||
cluster. The web interface can also be used to browse the file
|
||||
system (using "Browse the file system" link on the NameNode front
|
||||
page).
|
||||
</p>
|
||||
|
||||
</section> <section> <title>Shell Commands</title>
|
||||
<p>
|
||||
Hadoop includes various shell-like commands that directly
|
||||
interact with HDFS and other file systems that Hadoop supports.
|
||||
The command
|
||||
<code>bin/hdfs dfs -help</code>
|
||||
lists the commands supported by Hadoop
|
||||
shell. Furthermore, the command
|
||||
<code>bin/hdfs dfs -help command-name</code>
|
||||
displays more detailed help for a command. These commands support
|
||||
most of the normal files system operations like copying files,
|
||||
changing file permissions, etc. It also supports a few HDFS
|
||||
specific operations like changing replication of files.
|
||||
For more information see <a href="http://hadoop.apache.org/common/docs/current/file_system_shell.html">File System Shell Guide</a>.
|
||||
</p>
|
||||
|
||||
<section> <title> DFSAdmin Command </title>
|
||||
<p>
|
||||
The <code>bin/hadoop dfsadmin</code>
|
||||
command supports a few HDFS administration related operations.
|
||||
The <code>bin/hadoop dfsadmin -help</code> command
|
||||
lists all the commands currently supported. For e.g.:
|
||||
</p>
|
||||
<ul>
|
||||
<li>
|
||||
<code>-report</code>
|
||||
: reports basic statistics of HDFS. Some of this information is
|
||||
also available on the NameNode front page.
|
||||
</li>
|
||||
<li>
|
||||
<code>-safemode</code>
|
||||
: though usually not required, an administrator can manually enter
|
||||
or leave Safemode.
|
||||
</li>
|
||||
<li>
|
||||
<code>-finalizeUpgrade</code>
|
||||
: removes previous backup of the cluster made during last upgrade.
|
||||
</li>
|
||||
<li>
|
||||
<code>-refreshNodes</code>
|
||||
: Updates the namenode with the set of datanodes allowed to
|
||||
connect to the namenode. Namenodes re-read datanode hostnames
|
||||
in the file defined by dfs.hosts, dfs.hosts.exclude. Hosts defined
|
||||
in dfs.hosts are the datanodes that are part of the cluster.
|
||||
If there are entries in dfs.hosts, only the hosts in it are
|
||||
allowed to register with the namenode. Entries in dfs.hosts.exclude
|
||||
are datanodes that need to be decommissioned. Datanodes complete
|
||||
decommissioning when all the replicas from them are replicated
|
||||
to other datanodes. Decommissioned nodes are not automatically
|
||||
shutdown and are not chosen for writing for new replicas.
|
||||
</li>
|
||||
<li>
|
||||
<code>-printTopology</code>
|
||||
: Print the topology of the cluster. Display a tree of racks and
|
||||
datanodes attached to the tracks as viewed by the NameNode.
|
||||
</li>
|
||||
</ul>
|
||||
<p>
|
||||
For command usage, see
|
||||
<a href="http://hadoop.apache.org/common/docs/current/commands_manual.html#dfsadmin">dfsadmin</a>.
|
||||
</p>
|
||||
</section>
|
||||
|
||||
</section>
|
||||
<section> <title>Secondary NameNode</title>
|
||||
<p>
|
||||
The NameNode stores modifications to the file system as a log
|
||||
appended to a native file system file, <code>edits</code>.
|
||||
When a NameNode starts up, it reads HDFS state from an image
|
||||
file, <code>fsimage</code>, and then applies edits from the
|
||||
edits log file. It then writes new HDFS state to the <code>fsimage</code>
|
||||
and starts normal
|
||||
operation with an empty edits file. Since NameNode merges
|
||||
<code>fsimage</code> and <code>edits</code> files only during start up,
|
||||
the edits log file could get very large over time on a busy cluster.
|
||||
Another side effect of a larger edits file is that next
|
||||
restart of NameNode takes longer.
|
||||
</p>
|
||||
<p>
|
||||
The secondary NameNode merges the fsimage and the edits log files periodically
|
||||
and keeps edits log size within a limit. It is usually run on a
|
||||
different machine than the primary NameNode since its memory requirements
|
||||
are on the same order as the primary NameNode.
|
||||
</p>
|
||||
<p>
|
||||
The start of the checkpoint process on the secondary NameNode is
|
||||
controlled by two configuration parameters.
|
||||
</p>
|
||||
<ul>
|
||||
<li>
|
||||
<code>dfs.namenode.checkpoint.period</code>, set to 1 hour by default, specifies
|
||||
the maximum delay between two consecutive checkpoints, and
|
||||
</li>
|
||||
<li>
|
||||
<code>dfs.namenode.checkpoint.txns</code>, set to 40000 default, defines the
|
||||
number of uncheckpointed transactions on the NameNode which will force
|
||||
an urgent checkpoint, even if the checkpoint period has not been reached.
|
||||
</li>
|
||||
</ul>
|
||||
<p>
|
||||
The secondary NameNode stores the latest checkpoint in a
|
||||
directory which is structured the same way as the primary NameNode's
|
||||
directory. So that the check pointed image is always ready to be
|
||||
read by the primary NameNode if necessary.
|
||||
</p>
|
||||
<p>
|
||||
For command usage, see
|
||||
<a href="http://hadoop.apache.org/common/docs/current/commands_manual.html#secondarynamenode">secondarynamenode</a>.
|
||||
</p>
|
||||
|
||||
</section>
|
||||
|
||||
<section> <title> Checkpoint Node </title>
|
||||
<p>NameNode persists its namespace using two files: <code>fsimage</code>,
|
||||
which is the latest checkpoint of the namespace and <code>edits</code>,
|
||||
a journal (log) of changes to the namespace since the checkpoint.
|
||||
When a NameNode starts up, it merges the <code>fsimage</code> and
|
||||
<code>edits</code> journal to provide an up-to-date view of the
|
||||
file system metadata.
|
||||
The NameNode then overwrites <code>fsimage</code> with the new HDFS state
|
||||
and begins a new <code>edits</code> journal.
|
||||
</p>
|
||||
<p>
|
||||
The Checkpoint node periodically creates checkpoints of the namespace.
|
||||
It downloads <code>fsimage</code> and <code>edits</code> from the active
|
||||
NameNode, merges them locally, and uploads the new image back to the
|
||||
active NameNode.
|
||||
The Checkpoint node usually runs on a different machine than the NameNode
|
||||
since its memory requirements are on the same order as the NameNode.
|
||||
The Checkpoint node is started by
|
||||
<code>bin/hdfs namenode -checkpoint</code> on the node
|
||||
specified in the configuration file.
|
||||
</p>
|
||||
<p>The location of the Checkpoint (or Backup) node and its accompanying
|
||||
web interface are configured via the <code>dfs.namenode.backup.address</code>
|
||||
and <code>dfs.namenode.backup.http-address</code> configuration variables.
|
||||
</p>
|
||||
<p>
|
||||
The start of the checkpoint process on the Checkpoint node is
|
||||
controlled by two configuration parameters.
|
||||
</p>
|
||||
<ul>
|
||||
<li>
|
||||
<code>dfs.namenode.checkpoint.period</code>, set to 1 hour by default, specifies
|
||||
the maximum delay between two consecutive checkpoints
|
||||
</li>
|
||||
<li>
|
||||
<code>dfs.namenode.checkpoint.txns</code>, set to 40000 default, defines the
|
||||
number of uncheckpointed transactions on the NameNode which will force
|
||||
an urgent checkpoint, even if the checkpoint period has not been reached.
|
||||
</li>
|
||||
</ul>
|
||||
<p>
|
||||
The Checkpoint node stores the latest checkpoint in a
|
||||
directory that is structured the same as the NameNode's
|
||||
directory. This allows the checkpointed image to be always available for
|
||||
reading by the NameNode if necessary.
|
||||
See <a href="hdfs_user_guide.html#Import+checkpoint">Import checkpoint</a>.
|
||||
</p>
|
||||
<p>Multiple checkpoint nodes may be specified in the cluster configuration file.</p>
|
||||
<p>
|
||||
For command usage, see
|
||||
<a href="http://hadoop.apache.org/common/docs/current/commands_manual.html#namenode">namenode</a>.
|
||||
</p>
|
||||
</section>
|
||||
|
||||
<section> <title> Backup Node </title>
|
||||
<p>
|
||||
The Backup node provides the same checkpointing functionality as the
|
||||
Checkpoint node, as well as maintaining an in-memory, up-to-date copy of the
|
||||
file system namespace that is always synchronized with the active NameNode state.
|
||||
Along with accepting a journal stream of file system edits from
|
||||
the NameNode and persisting this to disk, the Backup node also applies
|
||||
those edits into its own copy of the namespace in memory, thus creating
|
||||
a backup of the namespace.
|
||||
</p>
|
||||
<p>
|
||||
The Backup node does not need to download
|
||||
<code>fsimage</code> and <code>edits</code> files from the active NameNode
|
||||
in order to create a checkpoint, as would be required with a
|
||||
Checkpoint node or Secondary NameNode, since it already has an up-to-date
|
||||
state of the namespace state in memory.
|
||||
The Backup node checkpoint process is more efficient as it only needs to
|
||||
save the namespace into the local <code>fsimage</code> file and reset
|
||||
<code>edits</code>.
|
||||
</p>
|
||||
<p>
|
||||
As the Backup node maintains a copy of the
|
||||
namespace in memory, its RAM requirements are the same as the NameNode.
|
||||
</p>
|
||||
<p>
|
||||
The NameNode supports one Backup node at a time. No Checkpoint nodes may be
|
||||
registered if a Backup node is in use. Using multiple Backup nodes
|
||||
concurrently will be supported in the future.
|
||||
</p>
|
||||
<p>
|
||||
The Backup node is configured in the same manner as the Checkpoint node.
|
||||
It is started with <code>bin/hdfs namenode -backup</code>.
|
||||
</p>
|
||||
<p>The location of the Backup (or Checkpoint) node and its accompanying
|
||||
web interface are configured via the <code>dfs.namenode.backup.address</code>
|
||||
and <code>dfs.namenode.backup.http-address</code> configuration variables.
|
||||
</p>
|
||||
<p>
|
||||
Use of a Backup node provides the option of running the NameNode with no
|
||||
persistent storage, delegating all responsibility for persisting the state
|
||||
of the namespace to the Backup node.
|
||||
To do this, start the NameNode with the
|
||||
<code>-importCheckpoint</code> option, along with specifying no persistent
|
||||
storage directories of type edits <code>dfs.namenode.edits.dir</code>
|
||||
for the NameNode configuration.
|
||||
</p>
|
||||
<p>
|
||||
For a complete discussion of the motivation behind the creation of the
|
||||
Backup node and Checkpoint node, see
|
||||
<a href="https://issues.apache.org/jira/browse/HADOOP-4539">HADOOP-4539</a>.
|
||||
For command usage, see
|
||||
<a href="http://hadoop.apache.org/common/docs/current/commands_manual.html#namenode">namenode</a>.
|
||||
</p>
|
||||
</section>
|
||||
|
||||
<section> <title> Import Checkpoint </title>
|
||||
<p>
|
||||
The latest checkpoint can be imported to the NameNode if
|
||||
all other copies of the image and the edits files are lost.
|
||||
In order to do that one should:
|
||||
</p>
|
||||
<ul>
|
||||
<li>
|
||||
Create an empty directory specified in the
|
||||
<code>dfs.namenode.name.dir</code> configuration variable;
|
||||
</li>
|
||||
<li>
|
||||
Specify the location of the checkpoint directory in the
|
||||
configuration variable <code>dfs.namenode.checkpoint.dir</code>;
|
||||
</li>
|
||||
<li>
|
||||
and start the NameNode with <code>-importCheckpoint</code> option.
|
||||
</li>
|
||||
</ul>
|
||||
<p>
|
||||
The NameNode will upload the checkpoint from the
|
||||
<code>dfs.namenode.checkpoint.dir</code> directory and then save it to the NameNode
|
||||
directory(s) set in <code>dfs.namenode.name.dir</code>.
|
||||
The NameNode will fail if a legal image is contained in
|
||||
<code>dfs.namenode.name.dir</code>.
|
||||
The NameNode verifies that the image in <code>dfs.namenode.checkpoint.dir</code> is
|
||||
consistent, but does not modify it in any way.
|
||||
</p>
|
||||
<p>
|
||||
For command usage, see
|
||||
<a href="http://hadoop.apache.org/common/docs/current/commands_manual.html#namenode">namenode</a>.
|
||||
</p>
|
||||
</section>
|
||||
|
||||
<section> <title> Rebalancer </title>
|
||||
<p>
|
||||
HDFS data might not always be be placed uniformly across the
|
||||
DataNode. One common reason is addition of new DataNodes to an
|
||||
existing cluster. While placing new blocks (data for a file is
|
||||
stored as a series of blocks), NameNode considers various
|
||||
parameters before choosing the DataNodes to receive these blocks.
|
||||
Some of the considerations are:
|
||||
</p>
|
||||
<ul>
|
||||
<li>
|
||||
Policy to keep one of the replicas of a block on the same node
|
||||
as the node that is writing the block.
|
||||
</li>
|
||||
<li>
|
||||
Need to spread different replicas of a block across the racks so
|
||||
that cluster can survive loss of whole rack.
|
||||
</li>
|
||||
<li>
|
||||
One of the replicas is usually placed on the same rack as the
|
||||
node writing to the file so that cross-rack network I/O is
|
||||
reduced.
|
||||
</li>
|
||||
<li>
|
||||
Spread HDFS data uniformly across the DataNodes in the cluster.
|
||||
</li>
|
||||
</ul>
|
||||
<p>
|
||||
Due to multiple competing considerations, data might not be
|
||||
uniformly placed across the DataNodes.
|
||||
HDFS provides a tool for administrators that analyzes block
|
||||
placement and rebalanaces data across the DataNode. A brief
|
||||
administrator's guide for rebalancer as a
|
||||
<a href="http://issues.apache.org/jira/secure/attachment/12368261/RebalanceDesign6.pdf">PDF</a>
|
||||
is attached to
|
||||
<a href="http://issues.apache.org/jira/browse/HADOOP-1652">HADOOP-1652</a>.
|
||||
</p>
|
||||
<p>
|
||||
For command usage, see
|
||||
<a href="http://hadoop.apache.org/common/docs/current/commands_manual.html#balancer">balancer</a>.
|
||||
</p>
|
||||
|
||||
</section> <section> <title> Rack Awareness </title>
|
||||
<p>
|
||||
Typically large Hadoop clusters are arranged in racks and
|
||||
network traffic between different nodes with in the same rack is
|
||||
much more desirable than network traffic across the racks. In
|
||||
addition NameNode tries to place replicas of block on
|
||||
multiple racks for improved fault tolerance. Hadoop lets the
|
||||
cluster administrators decide which rack a node belongs to
|
||||
through configuration variable <code>net.topology.script.file.name</code>. When this
|
||||
script is configured, each node runs the script to determine its
|
||||
rack id. A default installation assumes all the nodes belong to
|
||||
the same rack. This feature and configuration is further described
|
||||
in <a href="http://issues.apache.org/jira/secure/attachment/12345251/Rack_aware_HDFS_proposal.pdf">PDF</a>
|
||||
attached to
|
||||
<a href="http://issues.apache.org/jira/browse/HADOOP-692">HADOOP-692</a>.
|
||||
</p>
|
||||
|
||||
</section> <section> <title> Safemode </title>
|
||||
<p>
|
||||
During start up the NameNode loads the file system state from the
|
||||
fsimage and the edits log file. It then waits for DataNodes
|
||||
to report their blocks so that it does not prematurely start
|
||||
replicating the blocks though enough replicas already exist in the
|
||||
cluster. During this time NameNode stays in Safemode.
|
||||
Safemode
|
||||
for the NameNode is essentially a read-only mode for the HDFS cluster,
|
||||
where it does not allow any modifications to file system or blocks.
|
||||
Normally the NameNode leaves Safemode automatically after the DataNodes
|
||||
have reported that most file system blocks are available.
|
||||
If required, HDFS could be placed in Safemode explicitly
|
||||
using <code>'bin/hadoop dfsadmin -safemode'</code> command. NameNode front
|
||||
page shows whether Safemode is on or off. A more detailed
|
||||
description and configuration is maintained as JavaDoc for
|
||||
<a href="http://hadoop.apache.org/core/docs/current/api/org/apache/hadoop/dfs/NameNode.html#setSafeMode(org.apache.hadoop.dfs.HdfsConstants.SafeModeAction)"><code>setSafeMode()</code></a>.
|
||||
</p>
|
||||
|
||||
</section> <section> <title> fsck </title>
|
||||
<p>
|
||||
HDFS supports the <code>fsck</code> command to check for various
|
||||
inconsistencies.
|
||||
It it is designed for reporting problems with various
|
||||
files, for example, missing blocks for a file or under-replicated
|
||||
blocks. Unlike a traditional <code>fsck</code> utility for native file systems,
|
||||
this command does not correct the errors it detects. Normally NameNode
|
||||
automatically corrects most of the recoverable failures. By default
|
||||
<code>fsck</code> ignores open files but provides an option to select all files during reporting.
|
||||
The HDFS <code>fsck</code> command is not a
|
||||
Hadoop shell command. It can be run as '<code>bin/hadoop fsck</code>'.
|
||||
For command usage, see
|
||||
<a href="http://hadoop.apache.org/common/docs/current/commands_manual.html#fsck">fsck</a>.
|
||||
<code>fsck</code> can be run on the whole file system or on a subset of files.
|
||||
</p>
|
||||
|
||||
</section> <section> <title> fetchdt </title>
|
||||
<p>
|
||||
HDFS supports the <code>fetchdt</code> command to fetch Delegation Token
|
||||
and store it in a file on the local system. This token can be later used to
|
||||
access secure server (NameNode for example) from a non secure client.
|
||||
Utility uses either RPC or HTTPS (over Kerberos) to get the token, and thus
|
||||
requires kerberos tickets to be present before the run (run kinit to get
|
||||
the tickets).
|
||||
The HDFS <code>fetchdt</code> command is not a
|
||||
Hadoop shell command. It can be run as '<code>bin/hadoop fetchdt DTfile </code>'.
|
||||
After you got the token you can run an HDFS command without having Kerberos
|
||||
tickets, by pointing HADOOP_TOKEN_FILE_LOCATION environmental variable to
|
||||
the delegation token file.
|
||||
For command usage, see <a href="http://hadoop.apache.org/common/docs/current/commands_manual.html#fetchdt"><code>fetchdt</code> command</a>.
|
||||
</p>
|
||||
|
||||
</section>
|
||||
<section> <title>Recovery Mode</title>
|
||||
<p>Typically, you will configure multiple metadata storage locations.
|
||||
Then, if one storage location is corrupt, you can read the
|
||||
metadata from one of the other storage locations.</p>
|
||||
|
||||
<p>However, what can you do if the only storage locations available are
|
||||
corrupt? In this case, there is a special NameNode startup mode called
|
||||
Recovery mode that may allow you to recover most of your data.</p>
|
||||
|
||||
<p>You can start the NameNode in recovery mode like so:
|
||||
<code>namenode -recover</code></p>
|
||||
|
||||
<p>When in recovery mode, the NameNode will interactively prompt you at
|
||||
the command line about possible courses of action you can take to
|
||||
recover your data.</p>
|
||||
|
||||
<p>If you don't want to be prompted, you can give the
|
||||
<code>-force</code> option. This option will force
|
||||
recovery mode to always select the first choice. Normally, this
|
||||
will be the most reasonable choice.</p>
|
||||
|
||||
<p>Because Recovery mode can cause you to lose data, you should always
|
||||
back up your edit log and fsimage before using it.</p>
|
||||
</section>
|
||||
<section> <title> Upgrade and Rollback </title>
|
||||
<p>
|
||||
When Hadoop is upgraded on an existing cluster, as with any
|
||||
software upgrade, it is possible there are new bugs or
|
||||
incompatible changes that affect existing applications and were
|
||||
not discovered earlier. In any non-trivial HDFS installation, it
|
||||
is not an option to loose any data, let alone to restart HDFS from
|
||||
scratch. HDFS allows administrators to go back to earlier version
|
||||
of Hadoop and rollback the cluster to the state it was in
|
||||
before
|
||||
the upgrade. HDFS upgrade is described in more detail in
|
||||
<a href="http://wiki.apache.org/hadoop/Hadoop_Upgrade">Hadoop Upgrade</a> Wiki page.
|
||||
HDFS can have one such backup at a time. Before upgrading,
|
||||
administrators need to remove existing backup using <code>bin/hadoop
|
||||
dfsadmin -finalizeUpgrade</code> command. The following
|
||||
briefly describes the typical upgrade procedure:
|
||||
</p>
|
||||
<ul>
|
||||
<li>
|
||||
Before upgrading Hadoop software,
|
||||
<em>finalize</em> if there an existing backup.
|
||||
<code>dfsadmin -upgradeProgress status</code>
|
||||
can tell if the cluster needs to be <em>finalized</em>.
|
||||
</li>
|
||||
<li>Stop the cluster and distribute new version of Hadoop.</li>
|
||||
<li>
|
||||
Run the new version with <code>-upgrade</code> option
|
||||
(<code>bin/start-dfs.sh -upgrade</code>).
|
||||
</li>
|
||||
<li>
|
||||
Most of the time, cluster works just fine. Once the new HDFS is
|
||||
considered working well (may be after a few days of operation),
|
||||
finalize the upgrade. Note that until the cluster is finalized,
|
||||
deleting the files that existed before the upgrade does not free
|
||||
up real disk space on the DataNodes.
|
||||
</li>
|
||||
<li>
|
||||
If there is a need to move back to the old version,
|
||||
<ul>
|
||||
<li> stop the cluster and distribute earlier version of Hadoop. </li>
|
||||
<li> start the cluster with rollback option.
|
||||
(<code>bin/start-dfs.h -rollback</code>).
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
</section> <section> <title> File Permissions and Security </title>
|
||||
<p>
|
||||
The file permissions are designed to be similar to file permissions on
|
||||
other familiar platforms like Linux. Currently, security is limited
|
||||
to simple file permissions. The user that starts NameNode is
|
||||
treated as the superuser for HDFS. Future versions of HDFS will
|
||||
support network authentication protocols like Kerberos for user
|
||||
authentication and encryption of data transfers. The details are discussed in the
|
||||
<a href="hdfs_permissions_guide.html">Permissions Guide</a>.
|
||||
</p>
|
||||
|
||||
</section> <section> <title> Scalability </title>
|
||||
<p>
|
||||
Hadoop currently runs on clusters with thousands of nodes. The
|
||||
<a href="http://wiki.apache.org/hadoop/PoweredBy">PoweredBy</a> Wiki page
|
||||
lists some of the organizations that deploy Hadoop on large
|
||||
clusters. HDFS has one NameNode for each cluster. Currently
|
||||
the total memory available on NameNode is the primary scalability
|
||||
limitation. On very large clusters, increasing average size of
|
||||
files stored in HDFS helps with increasing cluster size without
|
||||
increasing memory requirements on NameNode.
|
||||
|
||||
The default configuration may not suite very large clustes. The
|
||||
<a href="http://wiki.apache.org/hadoop/FAQ">FAQ</a> Wiki page lists
|
||||
suggested configuration improvements for large Hadoop clusters.
|
||||
</p>
|
||||
|
||||
</section> <section> <title> Related Documentation </title>
|
||||
<p>
|
||||
This user guide is a good starting point for
|
||||
working with HDFS. While the user guide continues to improve,
|
||||
there is a large wealth of documentation about Hadoop and HDFS.
|
||||
The following list is a starting point for further exploration:
|
||||
</p>
|
||||
<ul>
|
||||
<li>
|
||||
<a href="http://hadoop.apache.org/">Hadoop Site</a>: The home page for the Apache Hadoop site.
|
||||
</li>
|
||||
<li>
|
||||
<a href="http://wiki.apache.org/hadoop/FrontPage">Hadoop Wiki</a>:
|
||||
The home page (FrontPage) for the Hadoop Wiki. Unlike the released documentation,
|
||||
which is part of Hadoop source tree, Hadoop Wiki is
|
||||
regularly edited by Hadoop Community.
|
||||
</li>
|
||||
<li> <a href="http://wiki.apache.org/hadoop/FAQ">FAQ</a>:
|
||||
The FAQ Wiki page.
|
||||
</li>
|
||||
<li>
|
||||
Hadoop <a href="http://hadoop.apache.org/core/docs/current/api/">
|
||||
JavaDoc API</a>.
|
||||
</li>
|
||||
<li>
|
||||
Hadoop User Mailing List :
|
||||
<a href="mailto:core-user@hadoop.apache.org">core-user[at]hadoop.apache.org</a>.
|
||||
</li>
|
||||
<li>
|
||||
Explore <code>src/hdfs/hdfs-default.xml</code>.
|
||||
It includes brief
|
||||
description of most of the configuration variables available.
|
||||
</li>
|
||||
<li>
|
||||
<a href="http://hadoop.apache.org/common/docs/current/commands_manual.html">Hadoop Commands Guide</a>: Hadoop commands usage.
|
||||
</li>
|
||||
</ul>
|
||||
</section>
|
||||
|
||||
</body>
|
||||
</document>
|
||||
|
||||
|
|
@ -1,69 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" "http://forrest.apache.org/dtd/document-v20.dtd">
|
||||
|
||||
|
||||
<document>
|
||||
|
||||
<header>
|
||||
<title> HFTP Guide</title>
|
||||
</header>
|
||||
|
||||
<body>
|
||||
<section>
|
||||
<title> Introduction </title>
|
||||
<p> HFTP is a Hadoop filesystem implementation that lets you read data from a remote Hadoop HDFS cluster.
|
||||
The reads are done via HTTP, and data is sourced from DataNodes.
|
||||
HFTP is a read-only filesystem, and will throw exceptions if you try to use it to write data or modify
|
||||
the filesystem state.</p>
|
||||
|
||||
<p>HFTP is primarily useful if you have multiple HDFS clusters with different versions and you need to move data from one to another. HFTP is wire-compatible even between different versions of HDFS. For example, you can do things like:
|
||||
<code>hadoop distcp -i hftp://sourceFS:50070/src hdfs://destFS:50070/dest</code>. Note that HFTP is read-only so the destination must be an HDFS filesystem. (Also, in this example, the <code>distcp</code> should be run using the configuraton of the new filesystem.)</p>
|
||||
|
||||
<p>An extension, HSFTP, uses HTTPS by default. This means that data will be encrypted in transit.</p>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<title>Implementation</title>
|
||||
<p>The code for HFTP lives in the Java class <code>org.apache.hadoop.hdfs.HftpFileSystem</code>. Likewise,
|
||||
HSFTP is implemented in <code>org.apache.hadoop.hdfs.HsftpFileSystem</code>.
|
||||
</p>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<title> Configuration Options </title>
|
||||
<table>
|
||||
<tr>
|
||||
<th>Name</th>
|
||||
<th>Description</th>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>dfs.hftp.https.port</td>
|
||||
<td>the HTTPS port on the remote cluster. If not set, HFTP will fall back on
|
||||
<code>dfs.https.port</code>.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>hdfs.service.host_<strong>ip:port</strong></td>
|
||||
<td>Specifies the service name (for the security subsystem) associated with the HFTP filesystem
|
||||
running at <strong>ip:port.</strong></td>
|
||||
</tr>
|
||||
</table>
|
||||
</section>
|
||||
</body>
|
||||
</document>
|
|
@ -1,110 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN"
|
||||
"http://forrest.apache.org/dtd/document-v20.dtd">
|
||||
|
||||
<document>
|
||||
<header>
|
||||
<title>C API libhdfs</title>
|
||||
<meta name="http-equiv">Content-Type</meta>
|
||||
<meta name="content">text/html;</meta>
|
||||
<meta name="charset">utf-8</meta>
|
||||
</header>
|
||||
<body>
|
||||
<section>
|
||||
<title>Overview</title>
|
||||
|
||||
<p>
|
||||
libhdfs is a JNI based C API for Hadoop's Distributed File System (HDFS).
|
||||
It provides C APIs to a subset of the HDFS APIs to manipulate HDFS files and
|
||||
the filesystem. libhdfs is part of the Hadoop distribution and comes
|
||||
pre-compiled in ${HADOOP_PREFIX}/libhdfs/libhdfs.so .
|
||||
</p>
|
||||
|
||||
</section>
|
||||
<section>
|
||||
<title>The APIs</title>
|
||||
|
||||
<p>
|
||||
The libhdfs APIs are a subset of: <a href="api/org/apache/hadoop/fs/FileSystem.html" >hadoop fs APIs</a>.
|
||||
</p>
|
||||
<p>
|
||||
The header file for libhdfs describes each API in detail and is available in ${HADOOP_PREFIX}/src/c++/libhdfs/hdfs.h
|
||||
</p>
|
||||
</section>
|
||||
<section>
|
||||
<title>A Sample Program</title>
|
||||
|
||||
<source>
|
||||
#include "hdfs.h"
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
|
||||
hdfsFS fs = hdfsConnect("default", 0);
|
||||
const char* writePath = "/tmp/testfile.txt";
|
||||
hdfsFile writeFile = hdfsOpenFile(fs, writePath, O_WRONLY|O_CREAT, 0, 0, 0);
|
||||
if(!writeFile) {
|
||||
fprintf(stderr, "Failed to open %s for writing!\n", writePath);
|
||||
exit(-1);
|
||||
}
|
||||
char* buffer = "Hello, World!";
|
||||
tSize num_written_bytes = hdfsWrite(fs, writeFile, (void*)buffer, strlen(buffer)+1);
|
||||
if (hdfsFlush(fs, writeFile)) {
|
||||
fprintf(stderr, "Failed to 'flush' %s\n", writePath);
|
||||
exit(-1);
|
||||
}
|
||||
hdfsCloseFile(fs, writeFile);
|
||||
}
|
||||
</source>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<title>How To Link With The Library</title>
|
||||
<p>
|
||||
See the Makefile for hdfs_test.c in the libhdfs source directory (${HADOOP_PREFIX}/src/c++/libhdfs/Makefile) or something like:<br />
|
||||
gcc above_sample.c -I${HADOOP_PREFIX}/src/c++/libhdfs -L${HADOOP_PREFIX}/libhdfs -lhdfs -o above_sample
|
||||
</p>
|
||||
</section>
|
||||
<section>
|
||||
<title>Common Problems</title>
|
||||
<p>
|
||||
The most common problem is the CLASSPATH is not set properly when calling a program that uses libhdfs.
|
||||
Make sure you set it to all the Hadoop jars needed to run Hadoop itself. Currently, there is no way to
|
||||
programmatically generate the classpath, but a good bet is to include all the jar files in ${HADOOP_PREFIX}
|
||||
and ${HADOOP_PREFIX}/lib as well as the right configuration directory containing hdfs-site.xml
|
||||
</p>
|
||||
</section>
|
||||
<section>
|
||||
<title>Thread Safe</title>
|
||||
<p>libdhfs is thread safe.</p>
|
||||
<ul>
|
||||
<li>Concurrency and Hadoop FS "handles"
|
||||
<br />The Hadoop FS implementation includes a FS handle cache which caches based on the URI of the
|
||||
namenode along with the user connecting. So, all calls to hdfsConnect will return the same handle but
|
||||
calls to hdfsConnectAsUser with different users will return different handles. But, since HDFS client
|
||||
handles are completely thread safe, this has no bearing on concurrency.
|
||||
</li>
|
||||
<li>Concurrency and libhdfs/JNI
|
||||
<br />The libhdfs calls to JNI should always be creating thread local storage, so (in theory), libhdfs
|
||||
should be as thread safe as the underlying calls to the Hadoop FS.
|
||||
</li>
|
||||
</ul>
|
||||
</section>
|
||||
</body>
|
||||
</document>
|
|
@ -311,10 +311,9 @@ public class DistributedFileSystem extends FileSystem {
|
|||
}
|
||||
|
||||
/**
|
||||
* THIS IS DFS only operations, it is not part of FileSystem
|
||||
* move blocks from srcs to trg
|
||||
* Move blocks from srcs to trg
|
||||
* and delete srcs afterwards
|
||||
* all blocks should be the same size
|
||||
* RESTRICTION: all blocks should be the same size
|
||||
* @param trg existing file to append to
|
||||
* @param psrcs list of files (same block size, same replication)
|
||||
* @throws IOException
|
||||
|
|
|
@ -1333,8 +1333,9 @@ public class Balancer {
|
|||
|
||||
// Exit status
|
||||
enum ReturnStatus {
|
||||
SUCCESS(1),
|
||||
IN_PROGRESS(0),
|
||||
// These int values will map directly to the balancer process's exit code.
|
||||
SUCCESS(0),
|
||||
IN_PROGRESS(1),
|
||||
ALREADY_RUNNING(-1),
|
||||
NO_MOVE_BLOCK(-2),
|
||||
NO_MOVE_PROGRESS(-3),
|
||||
|
@ -1507,7 +1508,12 @@ public class Balancer {
|
|||
}
|
||||
|
||||
static class Cli extends Configured implements Tool {
|
||||
/** Parse arguments and then run Balancer */
|
||||
/**
|
||||
* Parse arguments and then run Balancer.
|
||||
*
|
||||
* @param args command specific arguments.
|
||||
* @return exit code. 0 indicates success, non-zero indicates failure.
|
||||
*/
|
||||
@Override
|
||||
public int run(String[] args) {
|
||||
final long startTime = Time.now();
|
||||
|
|
|
@ -75,6 +75,7 @@ import org.apache.hadoop.hdfs.server.datanode.fsdataset.VolumeChoosingPolicy;
|
|||
import org.apache.hadoop.hdfs.server.datanode.metrics.FSDatasetMBean;
|
||||
import org.apache.hadoop.hdfs.server.protocol.BlockRecoveryCommand.RecoveringBlock;
|
||||
import org.apache.hadoop.hdfs.server.protocol.ReplicaRecoveryInfo;
|
||||
import org.apache.hadoop.io.nativeio.NativeIO;
|
||||
import org.apache.hadoop.metrics2.util.MBeans;
|
||||
import org.apache.hadoop.util.DataChecksum;
|
||||
import org.apache.hadoop.util.DiskChecker.DiskErrorException;
|
||||
|
@ -398,13 +399,17 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
|
|||
final File dstfile = new File(destdir, b.getBlockName());
|
||||
final File srcmeta = FsDatasetUtil.getMetaFile(srcfile, b.getGenerationStamp());
|
||||
final File dstmeta = FsDatasetUtil.getMetaFile(dstfile, b.getGenerationStamp());
|
||||
if (!srcmeta.renameTo(dstmeta)) {
|
||||
try {
|
||||
NativeIO.renameTo(srcmeta, dstmeta);
|
||||
} catch (IOException e) {
|
||||
throw new IOException("Failed to move meta file for " + b
|
||||
+ " from " + srcmeta + " to " + dstmeta);
|
||||
+ " from " + srcmeta + " to " + dstmeta, e);
|
||||
}
|
||||
if (!srcfile.renameTo(dstfile)) {
|
||||
try {
|
||||
NativeIO.renameTo(srcfile, dstfile);
|
||||
} catch (IOException e) {
|
||||
throw new IOException("Failed to move block file for " + b
|
||||
+ " from " + srcfile + " to " + dstfile.getAbsolutePath());
|
||||
+ " from " + srcfile + " to " + dstfile.getAbsolutePath(), e);
|
||||
}
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("addBlock: Moved " + srcmeta + " to " + dstmeta
|
||||
|
@ -531,10 +536,12 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
|
|||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("Renaming " + oldmeta + " to " + newmeta);
|
||||
}
|
||||
if (!oldmeta.renameTo(newmeta)) {
|
||||
try {
|
||||
NativeIO.renameTo(oldmeta, newmeta);
|
||||
} catch (IOException e) {
|
||||
throw new IOException("Block " + replicaInfo + " reopen failed. " +
|
||||
" Unable to move meta file " + oldmeta +
|
||||
" to rbw dir " + newmeta);
|
||||
" to rbw dir " + newmeta, e);
|
||||
}
|
||||
|
||||
// rename block file to rbw directory
|
||||
|
@ -542,14 +549,18 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
|
|||
LOG.debug("Renaming " + blkfile + " to " + newBlkFile
|
||||
+ ", file length=" + blkfile.length());
|
||||
}
|
||||
if (!blkfile.renameTo(newBlkFile)) {
|
||||
if (!newmeta.renameTo(oldmeta)) { // restore the meta file
|
||||
try {
|
||||
NativeIO.renameTo(blkfile, newBlkFile);
|
||||
} catch (IOException e) {
|
||||
try {
|
||||
NativeIO.renameTo(newmeta, oldmeta);
|
||||
} catch (IOException ex) {
|
||||
LOG.warn("Cannot move meta file " + newmeta +
|
||||
"back to the finalized directory " + oldmeta);
|
||||
"back to the finalized directory " + oldmeta, ex);
|
||||
}
|
||||
throw new IOException("Block " + replicaInfo + " reopen failed. " +
|
||||
" Unable to move block file " + blkfile +
|
||||
" to rbw dir " + newBlkFile);
|
||||
" to rbw dir " + newBlkFile, e);
|
||||
}
|
||||
|
||||
// Replace finalized replica by a RBW replica in replicas map
|
||||
|
@ -656,11 +667,13 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
|
|||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("Renaming " + oldmeta + " to " + newmeta);
|
||||
}
|
||||
if (!oldmeta.renameTo(newmeta)) {
|
||||
try {
|
||||
NativeIO.renameTo(oldmeta, newmeta);
|
||||
} catch (IOException e) {
|
||||
replicaInfo.setGenerationStamp(oldGS); // restore old GS
|
||||
throw new IOException("Block " + replicaInfo + " reopen failed. " +
|
||||
" Unable to move meta file " + oldmeta +
|
||||
" to " + newmeta);
|
||||
" to " + newmeta, e);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -70,6 +70,7 @@ import org.apache.hadoop.hdfs.web.WebHdfsFileSystem;
|
|||
import org.apache.hadoop.hdfs.web.resources.AccessTimeParam;
|
||||
import org.apache.hadoop.hdfs.web.resources.BlockSizeParam;
|
||||
import org.apache.hadoop.hdfs.web.resources.BufferSizeParam;
|
||||
import org.apache.hadoop.hdfs.web.resources.ConcatSourcesParam;
|
||||
import org.apache.hadoop.hdfs.web.resources.CreateParentParam;
|
||||
import org.apache.hadoop.hdfs.web.resources.DelegationParam;
|
||||
import org.apache.hadoop.hdfs.web.resources.DeleteOpParam;
|
||||
|
@ -483,10 +484,12 @@ public class NamenodeWebHdfsMethods {
|
|||
final DoAsParam doAsUser,
|
||||
@QueryParam(PostOpParam.NAME) @DefaultValue(PostOpParam.DEFAULT)
|
||||
final PostOpParam op,
|
||||
@QueryParam(ConcatSourcesParam.NAME) @DefaultValue(ConcatSourcesParam.DEFAULT)
|
||||
final ConcatSourcesParam concatSrcs,
|
||||
@QueryParam(BufferSizeParam.NAME) @DefaultValue(BufferSizeParam.DEFAULT)
|
||||
final BufferSizeParam bufferSize
|
||||
) throws IOException, InterruptedException {
|
||||
return post(ugi, delegation, username, doAsUser, ROOT, op, bufferSize);
|
||||
return post(ugi, delegation, username, doAsUser, ROOT, op, concatSrcs, bufferSize);
|
||||
}
|
||||
|
||||
/** Handle HTTP POST request. */
|
||||
|
@ -505,11 +508,13 @@ public class NamenodeWebHdfsMethods {
|
|||
@PathParam(UriFsPathParam.NAME) final UriFsPathParam path,
|
||||
@QueryParam(PostOpParam.NAME) @DefaultValue(PostOpParam.DEFAULT)
|
||||
final PostOpParam op,
|
||||
@QueryParam(ConcatSourcesParam.NAME) @DefaultValue(ConcatSourcesParam.DEFAULT)
|
||||
final ConcatSourcesParam concatSrcs,
|
||||
@QueryParam(BufferSizeParam.NAME) @DefaultValue(BufferSizeParam.DEFAULT)
|
||||
final BufferSizeParam bufferSize
|
||||
) throws IOException, InterruptedException {
|
||||
|
||||
init(ugi, delegation, username, doAsUser, path, op, bufferSize);
|
||||
init(ugi, delegation, username, doAsUser, path, op, concatSrcs, bufferSize);
|
||||
|
||||
return ugi.doAs(new PrivilegedExceptionAction<Response>() {
|
||||
@Override
|
||||
|
@ -517,7 +522,7 @@ public class NamenodeWebHdfsMethods {
|
|||
REMOTE_ADDRESS.set(request.getRemoteAddr());
|
||||
try {
|
||||
return post(ugi, delegation, username, doAsUser,
|
||||
path.getAbsolutePath(), op, bufferSize);
|
||||
path.getAbsolutePath(), op, concatSrcs, bufferSize);
|
||||
} finally {
|
||||
REMOTE_ADDRESS.set(null);
|
||||
}
|
||||
|
@ -532,6 +537,7 @@ public class NamenodeWebHdfsMethods {
|
|||
final DoAsParam doAsUser,
|
||||
final String fullpath,
|
||||
final PostOpParam op,
|
||||
final ConcatSourcesParam concatSrcs,
|
||||
final BufferSizeParam bufferSize
|
||||
) throws IOException, URISyntaxException {
|
||||
final NameNode namenode = (NameNode)context.getAttribute("name.node");
|
||||
|
@ -543,6 +549,11 @@ public class NamenodeWebHdfsMethods {
|
|||
fullpath, op.getValue(), -1L, -1L, bufferSize);
|
||||
return Response.temporaryRedirect(uri).type(MediaType.APPLICATION_OCTET_STREAM).build();
|
||||
}
|
||||
case CONCAT:
|
||||
{
|
||||
namenode.getRpcServer().concat(fullpath, concatSrcs.getAbsolutePaths());
|
||||
return Response.ok().build();
|
||||
}
|
||||
default:
|
||||
throw new UnsupportedOperationException(op + " is not supported");
|
||||
}
|
||||
|
|
|
@ -29,7 +29,9 @@ import java.net.MalformedURLException;
|
|||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.StringTokenizer;
|
||||
|
||||
|
@ -65,6 +67,7 @@ import org.apache.hadoop.hdfs.server.namenode.SafeModeException;
|
|||
import org.apache.hadoop.hdfs.web.resources.AccessTimeParam;
|
||||
import org.apache.hadoop.hdfs.web.resources.BlockSizeParam;
|
||||
import org.apache.hadoop.hdfs.web.resources.BufferSizeParam;
|
||||
import org.apache.hadoop.hdfs.web.resources.ConcatSourcesParam;
|
||||
import org.apache.hadoop.hdfs.web.resources.CreateParentParam;
|
||||
import org.apache.hadoop.hdfs.web.resources.DeleteOpParam;
|
||||
import org.apache.hadoop.hdfs.web.resources.DestinationParam;
|
||||
|
@ -103,6 +106,7 @@ import org.apache.hadoop.security.token.TokenIdentifier;
|
|||
import org.apache.hadoop.security.token.TokenRenewer;
|
||||
import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSelector;
|
||||
import org.apache.hadoop.util.Progressable;
|
||||
import org.apache.hadoop.util.StringUtils;
|
||||
import org.mortbay.util.ajax.JSON;
|
||||
|
||||
import com.google.common.base.Charsets;
|
||||
|
@ -716,6 +720,22 @@ public class WebHdfsFileSystem extends FileSystem
|
|||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void concat(final Path trg, final Path [] psrcs) throws IOException {
|
||||
statistics.incrementWriteOps(1);
|
||||
final HttpOpParam.Op op = PostOpParam.Op.CONCAT;
|
||||
|
||||
List<String> strPaths = new ArrayList<String>(psrcs.length);
|
||||
for(Path psrc : psrcs) {
|
||||
strPaths.add(psrc.toUri().getPath());
|
||||
}
|
||||
|
||||
String srcs = StringUtils.join(",", strPaths);
|
||||
|
||||
ConcatSourcesParam param = new ConcatSourcesParam(srcs);
|
||||
run(op, trg, param);
|
||||
}
|
||||
|
||||
@Override
|
||||
public FSDataOutputStream create(final Path f, final FsPermission permission,
|
||||
final boolean overwrite, final int bufferSize, final short replication,
|
||||
|
|
|
@ -0,0 +1,48 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hdfs.web.resources;
|
||||
|
||||
/** The concat source paths parameter. */
|
||||
public class ConcatSourcesParam extends StringParam {
|
||||
/** Parameter name. */
|
||||
public static final String NAME = "srcs";
|
||||
|
||||
public static final String DEFAULT = NULL;
|
||||
|
||||
private static final Domain DOMAIN = new Domain(NAME, null);
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
* @param str a string representation of the parameter value.
|
||||
*/
|
||||
public ConcatSourcesParam(String str) {
|
||||
super(DOMAIN, str);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getName() {
|
||||
return NAME;
|
||||
}
|
||||
|
||||
/** @return the absolute path. */
|
||||
public final String[] getAbsolutePaths() {
|
||||
final String[] paths = getValue().split(",");
|
||||
return paths;
|
||||
}
|
||||
}
|
|
@ -23,13 +23,17 @@ import java.net.HttpURLConnection;
|
|||
public class PostOpParam extends HttpOpParam<PostOpParam.Op> {
|
||||
/** Post operations. */
|
||||
public static enum Op implements HttpOpParam.Op {
|
||||
APPEND(HttpURLConnection.HTTP_OK),
|
||||
APPEND(true, HttpURLConnection.HTTP_OK),
|
||||
|
||||
NULL(HttpURLConnection.HTTP_NOT_IMPLEMENTED);
|
||||
CONCAT(false, HttpURLConnection.HTTP_OK),
|
||||
|
||||
NULL(false, HttpURLConnection.HTTP_NOT_IMPLEMENTED);
|
||||
|
||||
final boolean doOutputAndRedirect;
|
||||
final int expectedHttpResponseCode;
|
||||
|
||||
Op(final int expectedHttpResponseCode) {
|
||||
Op(final boolean doOutputAndRedirect, final int expectedHttpResponseCode) {
|
||||
this.doOutputAndRedirect = doOutputAndRedirect;
|
||||
this.expectedHttpResponseCode = expectedHttpResponseCode;
|
||||
}
|
||||
|
||||
|
@ -40,12 +44,12 @@ public class PostOpParam extends HttpOpParam<PostOpParam.Op> {
|
|||
|
||||
@Override
|
||||
public boolean getDoOutput() {
|
||||
return true;
|
||||
return doOutputAndRedirect;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean getRedirect() {
|
||||
return true;
|
||||
return doOutputAndRedirect;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -0,0 +1,312 @@
|
|||
~~ Licensed under the Apache License, Version 2.0 (the "License");
|
||||
~~ you may not use this file except in compliance with the License.
|
||||
~~ You may obtain a copy of the License at
|
||||
~~
|
||||
~~ http://www.apache.org/licenses/LICENSE-2.0
|
||||
~~
|
||||
~~ Unless required by applicable law or agreed to in writing, software
|
||||
~~ distributed under the License is distributed on an "AS IS" BASIS,
|
||||
~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
~~ See the License for the specific language governing permissions and
|
||||
~~ limitations under the License. See accompanying LICENSE file.
|
||||
|
||||
---
|
||||
Fault Injection Framework and Development Guide
|
||||
---
|
||||
---
|
||||
${maven.build.timestamp}
|
||||
|
||||
Fault Injection Framework and Development Guide
|
||||
|
||||
%{toc|section=1|fromDepth=0}
|
||||
|
||||
* Introduction
|
||||
|
||||
This guide provides an overview of the Hadoop Fault Injection (FI)
|
||||
framework for those who will be developing their own faults (aspects).
|
||||
|
||||
The idea of fault injection is fairly simple: it is an infusion of
|
||||
errors and exceptions into an application's logic to achieve a higher
|
||||
coverage and fault tolerance of the system. Different implementations
|
||||
of this idea are available today. Hadoop's FI framework is built on top
|
||||
of Aspect Oriented Paradigm (AOP) implemented by AspectJ toolkit.
|
||||
|
||||
* Assumptions
|
||||
|
||||
The current implementation of the FI framework assumes that the faults
|
||||
it will be emulating are of non-deterministic nature. That is, the
|
||||
moment of a fault's happening isn't known in advance and is a coin-flip
|
||||
based.
|
||||
|
||||
* Architecture of the Fault Injection Framework
|
||||
|
||||
Components layout
|
||||
|
||||
** Configuration Management
|
||||
|
||||
This piece of the FI framework allows you to set expectations for
|
||||
faults to happen. The settings can be applied either statically (in
|
||||
advance) or in runtime. The desired level of faults in the framework
|
||||
can be configured two ways:
|
||||
|
||||
* editing src/aop/fi-site.xml configuration file. This file is
|
||||
similar to other Hadoop's config files
|
||||
|
||||
* setting system properties of JVM through VM startup parameters or
|
||||
in build.properties file
|
||||
|
||||
** Probability Model
|
||||
|
||||
This is fundamentally a coin flipper. The methods of this class are
|
||||
getting a random number between 0.0 and 1.0 and then checking if a new
|
||||
number has happened in the range of 0.0 and a configured level for the
|
||||
fault in question. If that condition is true then the fault will occur.
|
||||
|
||||
Thus, to guarantee the happening of a fault one needs to set an
|
||||
appropriate level to 1.0. To completely prevent a fault from happening
|
||||
its probability level has to be set to 0.0.
|
||||
|
||||
Note: The default probability level is set to 0 (zero) unless the level
|
||||
is changed explicitly through the configuration file or in the runtime.
|
||||
The name of the default level's configuration parameter is fi.*
|
||||
|
||||
** Fault Injection Mechanism: AOP and AspectJ
|
||||
|
||||
The foundation of Hadoop's FI framework includes a cross-cutting
|
||||
concept implemented by AspectJ. The following basic terms are important
|
||||
to remember:
|
||||
|
||||
* A cross-cutting concept (aspect) is behavior, and often data, that
|
||||
is used across the scope of a piece of software
|
||||
|
||||
* In AOP, the aspects provide a mechanism by which a cross-cutting
|
||||
concern can be specified in a modular way
|
||||
|
||||
* Advice is the code that is executed when an aspect is invoked
|
||||
|
||||
* Join point (or pointcut) is a specific point within the application
|
||||
that may or not invoke some advice
|
||||
|
||||
** Existing Join Points
|
||||
|
||||
The following readily available join points are provided by AspectJ:
|
||||
|
||||
* Join when a method is called
|
||||
|
||||
* Join during a method's execution
|
||||
|
||||
* Join when a constructor is invoked
|
||||
|
||||
* Join during a constructor's execution
|
||||
|
||||
* Join during aspect advice execution
|
||||
|
||||
* Join before an object is initialized
|
||||
|
||||
* Join during object initialization
|
||||
|
||||
* Join during static initializer execution
|
||||
|
||||
* Join when a class's field is referenced
|
||||
|
||||
* Join when a class's field is assigned
|
||||
|
||||
* Join when a handler is executed
|
||||
|
||||
* Aspect Example
|
||||
|
||||
----
|
||||
package org.apache.hadoop.hdfs.server.datanode;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.fi.ProbabilityModel;
|
||||
import org.apache.hadoop.hdfs.server.datanode.DataNode;
|
||||
import org.apache.hadoop.util.DiskChecker.*;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.io.DataOutputStream;
|
||||
|
||||
/**
|
||||
* This aspect takes care about faults injected into datanode.BlockReceiver
|
||||
* class
|
||||
*/
|
||||
public aspect BlockReceiverAspects {
|
||||
public static final Log LOG = LogFactory.getLog(BlockReceiverAspects.class);
|
||||
|
||||
public static final String BLOCK_RECEIVER_FAULT="hdfs.datanode.BlockReceiver";
|
||||
pointcut callReceivePacket() : call (* OutputStream.write(..))
|
||||
&& withincode (* BlockReceiver.receivePacket(..))
|
||||
// to further limit the application of this aspect a very narrow 'target' can be used as follows
|
||||
// && target(DataOutputStream)
|
||||
&& !within(BlockReceiverAspects +);
|
||||
|
||||
before () throws IOException : callReceivePacket () {
|
||||
if (ProbabilityModel.injectCriteria(BLOCK_RECEIVER_FAULT)) {
|
||||
LOG.info("Before the injection point");
|
||||
Thread.dumpStack();
|
||||
throw new DiskOutOfSpaceException ("FI: injected fault point at " +
|
||||
thisJoinPoint.getStaticPart( ).getSourceLocation());
|
||||
}
|
||||
}
|
||||
}
|
||||
----
|
||||
|
||||
The aspect has two main parts:
|
||||
|
||||
* The join point pointcut callReceivepacket() which servers as an
|
||||
identification mark of a specific point (in control and/or data
|
||||
flow) in the life of an application.
|
||||
|
||||
* A call to the advice - before () throws IOException :
|
||||
callReceivepacket() - will be injected (see Putting It All
|
||||
Together) before that specific spot of the application's code.
|
||||
|
||||
The pointcut identifies an invocation of class' java.io.OutputStream
|
||||
write() method with any number of parameters and any return type. This
|
||||
invoke should take place within the body of method receivepacket() from
|
||||
classBlockReceiver. The method can have any parameters and any return
|
||||
type. Possible invocations of write() method happening anywhere within
|
||||
the aspect BlockReceiverAspects or its heirs will be ignored.
|
||||
|
||||
Note 1: This short example doesn't illustrate the fact that you can
|
||||
have more than a single injection point per class. In such a case the
|
||||
names of the faults have to be different if a developer wants to
|
||||
trigger them separately.
|
||||
|
||||
Note 2: After the injection step (see Putting It All Together) you can
|
||||
verify that the faults were properly injected by searching for ajc
|
||||
keywords in a disassembled class file.
|
||||
|
||||
* Fault Naming Convention and Namespaces
|
||||
|
||||
For the sake of a unified naming convention the following two types of
|
||||
names are recommended for a new aspects development:
|
||||
|
||||
* Activity specific notation (when we don't care about a particular
|
||||
location of a fault's happening). In this case the name of the
|
||||
fault is rather abstract: fi.hdfs.DiskError
|
||||
|
||||
* Location specific notation. Here, the fault's name is mnemonic as
|
||||
in: fi.hdfs.datanode.BlockReceiver[optional location details]
|
||||
|
||||
* Development Tools
|
||||
|
||||
* The Eclipse AspectJ Development Toolkit may help you when
|
||||
developing aspects
|
||||
|
||||
* IntelliJ IDEA provides AspectJ weaver and Spring-AOP plugins
|
||||
|
||||
* Putting It All Together
|
||||
|
||||
Faults (aspects) have to injected (or woven) together before they can
|
||||
be used. Follow these instructions:
|
||||
* To weave aspects in place use:
|
||||
|
||||
----
|
||||
% ant injectfaults
|
||||
----
|
||||
|
||||
* If you misidentified the join point of your aspect you will see a
|
||||
warning (similar to the one shown here) when 'injectfaults' target
|
||||
is completed:
|
||||
|
||||
----
|
||||
[iajc] warning at
|
||||
src/test/aop/org/apache/hadoop/hdfs/server/datanode/ \
|
||||
BlockReceiverAspects.aj:44::0
|
||||
advice defined in org.apache.hadoop.hdfs.server.datanode.BlockReceiverAspects
|
||||
has not been applied [Xlint:adviceDidNotMatch]
|
||||
----
|
||||
|
||||
* It isn't an error, so the build will report the successful result.
|
||||
To prepare dev.jar file with all your faults weaved in place
|
||||
(HDFS-475 pending) use:
|
||||
|
||||
----
|
||||
% ant jar-fault-inject
|
||||
----
|
||||
|
||||
* To create test jars use:
|
||||
|
||||
----
|
||||
% ant jar-test-fault-inject
|
||||
----
|
||||
|
||||
* To run HDFS tests with faults injected use:
|
||||
|
||||
----
|
||||
% ant run-test-hdfs-fault-inject
|
||||
----
|
||||
|
||||
** How to Use the Fault Injection Framework
|
||||
|
||||
Faults can be triggered as follows:
|
||||
|
||||
* During runtime:
|
||||
|
||||
----
|
||||
% ant run-test-hdfs -Dfi.hdfs.datanode.BlockReceiver=0.12
|
||||
----
|
||||
|
||||
To set a certain level, for example 25%, of all injected faults
|
||||
use:
|
||||
|
||||
----
|
||||
% ant run-test-hdfs-fault-inject -Dfi.*=0.25
|
||||
----
|
||||
|
||||
* From a program:
|
||||
|
||||
----
|
||||
package org.apache.hadoop.fs;
|
||||
|
||||
import org.junit.Test;
|
||||
import org.junit.Before;
|
||||
|
||||
public class DemoFiTest {
|
||||
public static final String BLOCK_RECEIVER_FAULT="hdfs.datanode.BlockReceiver";
|
||||
@Override
|
||||
@Before
|
||||
public void setUp() {
|
||||
//Setting up the test's environment as required
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFI() {
|
||||
// It triggers the fault, assuming that there's one called 'hdfs.datanode.BlockReceiver'
|
||||
System.setProperty("fi." + BLOCK_RECEIVER_FAULT, "0.12");
|
||||
//
|
||||
// The main logic of your tests goes here
|
||||
//
|
||||
// Now set the level back to 0 (zero) to prevent this fault from happening again
|
||||
System.setProperty("fi." + BLOCK_RECEIVER_FAULT, "0.0");
|
||||
// or delete its trigger completely
|
||||
System.getProperties().remove("fi." + BLOCK_RECEIVER_FAULT);
|
||||
}
|
||||
|
||||
@Override
|
||||
@After
|
||||
public void tearDown() {
|
||||
//Cleaning up test test environment
|
||||
}
|
||||
}
|
||||
----
|
||||
|
||||
As you can see above these two methods do the same thing. They are
|
||||
setting the probability level of <<<hdfs.datanode.BlockReceiver>>> at 12%.
|
||||
The difference, however, is that the program provides more flexibility
|
||||
and allows you to turn a fault off when a test no longer needs it.
|
||||
|
||||
* Additional Information and Contacts
|
||||
|
||||
These two sources of information are particularly interesting and worth
|
||||
reading:
|
||||
|
||||
* {{http://www.eclipse.org/aspectj/doc/next/devguide/}}
|
||||
|
||||
* AspectJ Cookbook (ISBN-13: 978-0-596-00654-9)
|
||||
|
||||
If you have additional comments or questions for the author check
|
||||
{{{https://issues.apache.org/jira/browse/HDFS-435}HDFS-435}}.
|
|
@ -0,0 +1,106 @@
|
|||
~~ Licensed under the Apache License, Version 2.0 (the "License");
|
||||
~~ you may not use this file except in compliance with the License.
|
||||
~~ You may obtain a copy of the License at
|
||||
~~
|
||||
~~ http://www.apache.org/licenses/LICENSE-2.0
|
||||
~~
|
||||
~~ Unless required by applicable law or agreed to in writing, software
|
||||
~~ distributed under the License is distributed on an "AS IS" BASIS,
|
||||
~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
~~ See the License for the specific language governing permissions and
|
||||
~~ limitations under the License. See accompanying LICENSE file.
|
||||
|
||||
|
||||
---
|
||||
Offline Edits Viewer Guide
|
||||
---
|
||||
Erik Steffl
|
||||
---
|
||||
${maven.build.timestamp}
|
||||
|
||||
Offline Edits Viewer Guide
|
||||
|
||||
\[ {{{./index.html}Go Back}} \]
|
||||
|
||||
%{toc|section=1|fromDepth=0}
|
||||
|
||||
* Overview
|
||||
|
||||
Offline Edits Viewer is a tool to parse the Edits log file. The current
|
||||
processors are mostly useful for conversion between different formats,
|
||||
including XML which is human readable and easier to edit than native
|
||||
binary format.
|
||||
|
||||
The tool can parse the edits formats -18 (roughly Hadoop 0.19) and
|
||||
later. The tool operates on files only, it does not need Hadoop cluster
|
||||
to be running.
|
||||
|
||||
Input formats supported:
|
||||
|
||||
[[1]] <<binary>>: native binary format that Hadoop uses internally
|
||||
|
||||
[[2]] <<xml>>: XML format, as produced by xml processor, used if filename
|
||||
has <<<.xml>>> (case insensitive) extension
|
||||
|
||||
The Offline Edits Viewer provides several output processors (unless
|
||||
stated otherwise the output of the processor can be converted back to
|
||||
original edits file):
|
||||
|
||||
[[1]] <<binary>>: native binary format that Hadoop uses internally
|
||||
|
||||
[[2]] <<xml>>: XML format
|
||||
|
||||
[[3]] <<stats>>: prints out statistics, this cannot be converted back to
|
||||
Edits file
|
||||
|
||||
* Usage
|
||||
|
||||
----
|
||||
bash$ bin/hdfs oev -i edits -o edits.xml
|
||||
----
|
||||
|
||||
*-----------------------:-----------------------------------+
|
||||
| Flag | Description |
|
||||
*-----------------------:-----------------------------------+
|
||||
|[<<<-i>>> ; <<<--inputFile>>>] <input file> | Specify the input edits log file to
|
||||
| | process. Xml (case insensitive) extension means XML format otherwise
|
||||
| | binary format is assumed. Required.
|
||||
*-----------------------:-----------------------------------+
|
||||
|[<<-o>> ; <<--outputFile>>] <output file> | Specify the output filename, if the
|
||||
| | specified output processor generates one. If the specified file already
|
||||
| | exists, it is silently overwritten. Required.
|
||||
*-----------------------:-----------------------------------+
|
||||
|[<<-p>> ; <<--processor>>] <processor> | Specify the image processor to apply
|
||||
| | against the image file. Currently valid options are
|
||||
| | <<<binary>>>, <<<xml>>> (default) and <<<stats>>>.
|
||||
*-----------------------:-----------------------------------+
|
||||
|<<[-v ; --verbose] >> | Print the input and output filenames and pipe output of
|
||||
| | processor to console as well as specified file. On extremely large
|
||||
| | files, this may increase processing time by an order of magnitude.
|
||||
*-----------------------:-----------------------------------+
|
||||
|<<[-h ; --help] >> | Display the tool usage and help information and exit.
|
||||
*-----------------------:-----------------------------------+
|
||||
|
||||
* Case study: Hadoop cluster recovery
|
||||
|
||||
In case there is some problem with hadoop cluster and the edits file is
|
||||
corrupted it is possible to save at least part of the edits file that
|
||||
is correct. This can be done by converting the binary edits to XML,
|
||||
edit it manually and then convert it back to binary. The most common
|
||||
problem is that the edits file is missing the closing record (record
|
||||
that has opCode -1). This should be recognized by the tool and the XML
|
||||
format should be properly closed.
|
||||
|
||||
If there is no closing record in the XML file you can add one after
|
||||
last correct record. Anything after the record with opCode -1 is
|
||||
ignored.
|
||||
|
||||
Example of a closing record (with opCode -1):
|
||||
|
||||
+----
|
||||
<RECORD>
|
||||
<OPCODE>-1</OPCODE>
|
||||
<DATA>
|
||||
</DATA>
|
||||
</RECORD>
|
||||
+----
|
|
@ -0,0 +1,418 @@
|
|||
~~ Licensed under the Apache License, Version 2.0 (the "License");
|
||||
~~ you may not use this file except in compliance with the License.
|
||||
~~ You may obtain a copy of the License at
|
||||
~~
|
||||
~~ http://www.apache.org/licenses/LICENSE-2.0
|
||||
~~
|
||||
~~ Unless required by applicable law or agreed to in writing, software
|
||||
~~ distributed under the License is distributed on an "AS IS" BASIS,
|
||||
~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
~~ See the License for the specific language governing permissions and
|
||||
~~ limitations under the License. See accompanying LICENSE file.
|
||||
|
||||
---
|
||||
Offline Image Viewer Guide
|
||||
---
|
||||
---
|
||||
${maven.build.timestamp}
|
||||
|
||||
Offline Image Viewer Guide
|
||||
|
||||
\[ {{{./index.html}Go Back}} \]
|
||||
|
||||
%{toc|section=1|fromDepth=0}
|
||||
|
||||
* Overview
|
||||
|
||||
The Offline Image Viewer is a tool to dump the contents of hdfs fsimage
|
||||
files to human-readable formats in order to allow offline analysis and
|
||||
examination of an Hadoop cluster's namespace. The tool is able to
|
||||
process very large image files relatively quickly, converting them to
|
||||
one of several output formats. The tool handles the layout formats that
|
||||
were included with Hadoop versions 16 and up. If the tool is not able
|
||||
to process an image file, it will exit cleanly. The Offline Image
|
||||
Viewer does not require an Hadoop cluster to be running; it is entirely
|
||||
offline in its operation.
|
||||
|
||||
The Offline Image Viewer provides several output processors:
|
||||
|
||||
[[1]] Ls is the default output processor. It closely mimics the format of
|
||||
the lsr command. It includes the same fields, in the same order, as
|
||||
lsr : directory or file flag, permissions, replication, owner,
|
||||
group, file size, modification date, and full path. Unlike the lsr
|
||||
command, the root path is included. One important difference
|
||||
between the output of the lsr command this processor, is that this
|
||||
output is not sorted by directory name and contents. Rather, the
|
||||
files are listed in the order in which they are stored in the
|
||||
fsimage file. Therefore, it is not possible to directly compare the
|
||||
output of the lsr command this this tool. The Ls processor uses
|
||||
information contained within the Inode blocks to calculate file
|
||||
sizes and ignores the -skipBlocks option.
|
||||
|
||||
[[2]] Indented provides a more complete view of the fsimage's contents,
|
||||
including all of the information included in the image, such as
|
||||
image version, generation stamp and inode- and block-specific
|
||||
listings. This processor uses indentation to organize the output
|
||||
into a hierarchal manner. The lsr format is suitable for easy human
|
||||
comprehension.
|
||||
|
||||
[[3]] Delimited provides one file per line consisting of the path,
|
||||
replication, modification time, access time, block size, number of
|
||||
blocks, file size, namespace quota, diskspace quota, permissions,
|
||||
username and group name. If run against an fsimage that does not
|
||||
contain any of these fields, the field's column will be included,
|
||||
but no data recorded. The default record delimiter is a tab, but
|
||||
this may be changed via the -delimiter command line argument. This
|
||||
processor is designed to create output that is easily analyzed by
|
||||
other tools, such as [36]Apache Pig. See the [37]Analyzing Results
|
||||
section for further information on using this processor to analyze
|
||||
the contents of fsimage files.
|
||||
|
||||
[[4]] XML creates an XML document of the fsimage and includes all of the
|
||||
information within the fsimage, similar to the lsr processor. The
|
||||
output of this processor is amenable to automated processing and
|
||||
analysis with XML tools. Due to the verbosity of the XML syntax,
|
||||
this processor will also generate the largest amount of output.
|
||||
|
||||
[[5]] FileDistribution is the tool for analyzing file sizes in the
|
||||
namespace image. In order to run the tool one should define a range
|
||||
of integers [0, maxSize] by specifying maxSize and a step. The
|
||||
range of integers is divided into segments of size step: [0, s[1],
|
||||
..., s[n-1], maxSize], and the processor calculates how many files
|
||||
in the system fall into each segment [s[i-1], s[i]). Note that
|
||||
files larger than maxSize always fall into the very last segment.
|
||||
The output file is formatted as a tab separated two column table:
|
||||
Size and NumFiles. Where Size represents the start of the segment,
|
||||
and numFiles is the number of files form the image which size falls
|
||||
in this segment.
|
||||
|
||||
* Usage
|
||||
|
||||
** Basic
|
||||
|
||||
The simplest usage of the Offline Image Viewer is to provide just an
|
||||
input and output file, via the -i and -o command-line switches:
|
||||
|
||||
----
|
||||
bash$ bin/hdfs oiv -i fsimage -o fsimage.txt
|
||||
----
|
||||
|
||||
This will create a file named fsimage.txt in the current directory
|
||||
using the Ls output processor. For very large image files, this process
|
||||
may take several minutes.
|
||||
|
||||
One can specify which output processor via the command-line switch -p.
|
||||
For instance:
|
||||
|
||||
----
|
||||
bash$ bin/hdfs oiv -i fsimage -o fsimage.xml -p XML
|
||||
----
|
||||
|
||||
or
|
||||
|
||||
----
|
||||
bash$ bin/hdfs oiv -i fsimage -o fsimage.txt -p Indented
|
||||
----
|
||||
|
||||
This will run the tool using either the XML or Indented output
|
||||
processor, respectively.
|
||||
|
||||
One command-line option worth considering is -skipBlocks, which
|
||||
prevents the tool from explicitly enumerating all of the blocks that
|
||||
make up a file in the namespace. This is useful for file systems that
|
||||
have very large files. Enabling this option can significantly decrease
|
||||
the size of the resulting output, as individual blocks are not
|
||||
included. Note, however, that the Ls processor needs to enumerate the
|
||||
blocks and so overrides this option.
|
||||
|
||||
Example
|
||||
|
||||
Consider the following contrived namespace:
|
||||
|
||||
----
|
||||
drwxr-xr-x - theuser supergroup 0 2009-03-16 21:17 /anotherDir
|
||||
-rw-r--r-- 3 theuser supergroup 286631664 2009-03-16 21:15 /anotherDir/biggerfile
|
||||
-rw-r--r-- 3 theuser supergroup 8754 2009-03-16 21:17 /anotherDir/smallFile
|
||||
drwxr-xr-x - theuser supergroup 0 2009-03-16 21:11 /mapredsystem
|
||||
drwxr-xr-x - theuser supergroup 0 2009-03-16 21:11 /mapredsystem/theuser
|
||||
drwxr-xr-x - theuser supergroup 0 2009-03-16 21:11 /mapredsystem/theuser/mapredsystem
|
||||
drwx-wx-wx - theuser supergroup 0 2009-03-16 21:11 /mapredsystem/theuser/mapredsystem/ip.redacted.com
|
||||
drwxr-xr-x - theuser supergroup 0 2009-03-16 21:12 /one
|
||||
drwxr-xr-x - theuser supergroup 0 2009-03-16 21:12 /one/two
|
||||
drwxr-xr-x - theuser supergroup 0 2009-03-16 21:16 /user
|
||||
drwxr-xr-x - theuser supergroup 0 2009-03-16 21:19 /user/theuser
|
||||
----
|
||||
|
||||
Applying the Offline Image Processor against this file with default
|
||||
options would result in the following output:
|
||||
|
||||
----
|
||||
machine:hadoop-0.21.0-dev theuser$ bin/hdfs oiv -i fsimagedemo -o fsimage.txt
|
||||
|
||||
drwxr-xr-x - theuser supergroup 0 2009-03-16 14:16 /
|
||||
drwxr-xr-x - theuser supergroup 0 2009-03-16 14:17 /anotherDir
|
||||
drwxr-xr-x - theuser supergroup 0 2009-03-16 14:11 /mapredsystem
|
||||
drwxr-xr-x - theuser supergroup 0 2009-03-16 14:12 /one
|
||||
drwxr-xr-x - theuser supergroup 0 2009-03-16 14:16 /user
|
||||
-rw-r--r-- 3 theuser supergroup 286631664 2009-03-16 14:15 /anotherDir/biggerfile
|
||||
-rw-r--r-- 3 theuser supergroup 8754 2009-03-16 14:17 /anotherDir/smallFile
|
||||
drwxr-xr-x - theuser supergroup 0 2009-03-16 14:11 /mapredsystem/theuser
|
||||
drwxr-xr-x - theuser supergroup 0 2009-03-16 14:11 /mapredsystem/theuser/mapredsystem
|
||||
drwx-wx-wx - theuser supergroup 0 2009-03-16 14:11 /mapredsystem/theuser/mapredsystem/ip.redacted.com
|
||||
drwxr-xr-x - theuser supergroup 0 2009-03-16 14:12 /one/two
|
||||
drwxr-xr-x - theuser supergroup 0 2009-03-16 14:19 /user/theuser
|
||||
----
|
||||
|
||||
Similarly, applying the Indented processor would generate output that
|
||||
begins with:
|
||||
|
||||
----
|
||||
machine:hadoop-0.21.0-dev theuser$ bin/hdfs oiv -i fsimagedemo -p Indented -o fsimage.txt
|
||||
|
||||
FSImage
|
||||
ImageVersion = -19
|
||||
NamespaceID = 2109123098
|
||||
GenerationStamp = 1003
|
||||
INodes [NumInodes = 12]
|
||||
Inode
|
||||
INodePath =
|
||||
Replication = 0
|
||||
ModificationTime = 2009-03-16 14:16
|
||||
AccessTime = 1969-12-31 16:00
|
||||
BlockSize = 0
|
||||
Blocks [NumBlocks = -1]
|
||||
NSQuota = 2147483647
|
||||
DSQuota = -1
|
||||
Permissions
|
||||
Username = theuser
|
||||
GroupName = supergroup
|
||||
PermString = rwxr-xr-x
|
||||
...remaining output omitted...
|
||||
----
|
||||
|
||||
* Options
|
||||
|
||||
*-----------------------:-----------------------------------+
|
||||
| <<Flag>> | <<Description>> |
|
||||
*-----------------------:-----------------------------------+
|
||||
| <<<-i>>>\|<<<--inputFile>>> <input file> | Specify the input fsimage file to
|
||||
| | process. Required.
|
||||
*-----------------------:-----------------------------------+
|
||||
| <<<-o>>>\|<<<--outputFile>>> <output file> | Specify the output filename, if the
|
||||
| | specified output processor generates one. If the specified file already
|
||||
| | exists, it is silently overwritten. Required.
|
||||
*-----------------------:-----------------------------------+
|
||||
| <<<-p>>>\|<<<--processor>>> <processor> | Specify the image processor to apply
|
||||
| | against the image file. Currently valid options are Ls (default), XML
|
||||
| | and Indented..
|
||||
*-----------------------:-----------------------------------+
|
||||
| <<<-skipBlocks>>> | Do not enumerate individual blocks within files. This may
|
||||
| | save processing time and outfile file space on namespaces with very
|
||||
| | large files. The Ls processor reads the blocks to correctly determine
|
||||
| | file sizes and ignores this option.
|
||||
*-----------------------:-----------------------------------+
|
||||
| <<<-printToScreen>>> | Pipe output of processor to console as well as specified
|
||||
| | file. On extremely large namespaces, this may increase processing time
|
||||
| | by an order of magnitude.
|
||||
*-----------------------:-----------------------------------+
|
||||
| <<<-delimiter>>> <arg>| When used in conjunction with the Delimited processor,
|
||||
| | replaces the default tab delimiter with the string specified by arg.
|
||||
*-----------------------:-----------------------------------+
|
||||
| <<<-h>>>\|<<<--help>>>| Display the tool usage and help information and exit.
|
||||
*-----------------------:-----------------------------------+
|
||||
|
||||
* Analyzing Results
|
||||
|
||||
The Offline Image Viewer makes it easy to gather large amounts of data
|
||||
about the hdfs namespace. This information can then be used to explore
|
||||
file system usage patterns or find specific files that match arbitrary
|
||||
criteria, along with other types of namespace analysis. The Delimited
|
||||
image processor in particular creates output that is amenable to
|
||||
further processing by tools such as [38]Apache Pig. Pig provides a
|
||||
particularly good choice for analyzing these data as it is able to deal
|
||||
with the output generated from a small fsimage but also scales up to
|
||||
consume data from extremely large file systems.
|
||||
|
||||
The Delimited image processor generates lines of text separated, by
|
||||
default, by tabs and includes all of the fields that are common between
|
||||
constructed files and files that were still under constructed when the
|
||||
fsimage was generated. Examples scripts are provided demonstrating how
|
||||
to use this output to accomplish three tasks: determine the number of
|
||||
files each user has created on the file system, find files were created
|
||||
but have not accessed, and find probable duplicates of large files by
|
||||
comparing the size of each file.
|
||||
|
||||
Each of the following scripts assumes you have generated an output file
|
||||
using the Delimited processor named foo and will be storing the results
|
||||
of the Pig analysis in a file named results.
|
||||
|
||||
** Total Number of Files for Each User
|
||||
|
||||
This script processes each path within the namespace, groups them by
|
||||
the file owner and determines the total number of files each user owns.
|
||||
|
||||
----
|
||||
numFilesOfEachUser.pig:
|
||||
-- This script determines the total number of files each user has in
|
||||
-- the namespace. Its output is of the form:
|
||||
-- username, totalNumFiles
|
||||
|
||||
-- Load all of the fields from the file
|
||||
A = LOAD '$inputFile' USING PigStorage('\t') AS (path:chararray,
|
||||
replication:int,
|
||||
modTime:chararray,
|
||||
accessTime:chararray,
|
||||
blockSize:long,
|
||||
numBlocks:int,
|
||||
fileSize:long,
|
||||
NamespaceQuota:int,
|
||||
DiskspaceQuota:int,
|
||||
perms:chararray,
|
||||
username:chararray,
|
||||
groupname:chararray);
|
||||
|
||||
|
||||
-- Grab just the path and username
|
||||
B = FOREACH A GENERATE path, username;
|
||||
|
||||
-- Generate the sum of the number of paths for each user
|
||||
C = FOREACH (GROUP B BY username) GENERATE group, COUNT(B.path);
|
||||
|
||||
-- Save results
|
||||
STORE C INTO '$outputFile';
|
||||
----
|
||||
|
||||
This script can be run against pig with the following command:
|
||||
|
||||
----
|
||||
bin/pig -x local -param inputFile=../foo -param outputFile=../results ../numFilesOfEachUser.pig
|
||||
----
|
||||
|
||||
The output file's content will be similar to that below:
|
||||
|
||||
----
|
||||
bart 1
|
||||
lisa 16
|
||||
homer 28
|
||||
marge 2456
|
||||
----
|
||||
|
||||
** Files That Have Never Been Accessed
|
||||
|
||||
This script finds files that were created but whose access times were
|
||||
never changed, meaning they were never opened or viewed.
|
||||
|
||||
----
|
||||
neverAccessed.pig:
|
||||
-- This script generates a list of files that were created but never
|
||||
-- accessed, based on their AccessTime
|
||||
|
||||
-- Load all of the fields from the file
|
||||
A = LOAD '$inputFile' USING PigStorage('\t') AS (path:chararray,
|
||||
replication:int,
|
||||
modTime:chararray,
|
||||
accessTime:chararray,
|
||||
blockSize:long,
|
||||
numBlocks:int,
|
||||
fileSize:long,
|
||||
NamespaceQuota:int,
|
||||
DiskspaceQuota:int,
|
||||
perms:chararray,
|
||||
username:chararray,
|
||||
groupname:chararray);
|
||||
|
||||
-- Grab just the path and last time the file was accessed
|
||||
B = FOREACH A GENERATE path, accessTime;
|
||||
|
||||
-- Drop all the paths that don't have the default assigned last-access time
|
||||
C = FILTER B BY accessTime == '1969-12-31 16:00';
|
||||
|
||||
-- Drop the accessTimes, since they're all the same
|
||||
D = FOREACH C GENERATE path;
|
||||
|
||||
-- Save results
|
||||
STORE D INTO '$outputFile';
|
||||
----
|
||||
|
||||
This script can be run against pig with the following command and its
|
||||
output file's content will be a list of files that were created but
|
||||
never viewed afterwards.
|
||||
|
||||
----
|
||||
bin/pig -x local -param inputFile=../foo -param outputFile=../results ../neverAccessed.pig
|
||||
----
|
||||
|
||||
** Probable Duplicated Files Based on File Size
|
||||
|
||||
This script groups files together based on their size, drops any that
|
||||
are of less than 100mb and returns a list of the file size, number of
|
||||
files found and a tuple of the file paths. This can be used to find
|
||||
likely duplicates within the filesystem namespace.
|
||||
|
||||
----
|
||||
probableDuplicates.pig:
|
||||
-- This script finds probable duplicate files greater than 100 MB by
|
||||
-- grouping together files based on their byte size. Files of this size
|
||||
-- with exactly the same number of bytes can be considered probable
|
||||
-- duplicates, but should be checked further, either by comparing the
|
||||
-- contents directly or by another proxy, such as a hash of the contents.
|
||||
-- The scripts output is of the type:
|
||||
-- fileSize numProbableDuplicates {(probableDup1), (probableDup2)}
|
||||
|
||||
-- Load all of the fields from the file
|
||||
A = LOAD '$inputFile' USING PigStorage('\t') AS (path:chararray,
|
||||
replication:int,
|
||||
modTime:chararray,
|
||||
accessTime:chararray,
|
||||
blockSize:long,
|
||||
numBlocks:int,
|
||||
fileSize:long,
|
||||
NamespaceQuota:int,
|
||||
DiskspaceQuota:int,
|
||||
perms:chararray,
|
||||
username:chararray,
|
||||
groupname:chararray);
|
||||
|
||||
-- Grab the pathname and filesize
|
||||
B = FOREACH A generate path, fileSize;
|
||||
|
||||
-- Drop files smaller than 100 MB
|
||||
C = FILTER B by fileSize > 100L * 1024L * 1024L;
|
||||
|
||||
-- Gather all the files of the same byte size
|
||||
D = GROUP C by fileSize;
|
||||
|
||||
-- Generate path, num of duplicates, list of duplicates
|
||||
E = FOREACH D generate group AS fileSize, COUNT(C) as numDupes, C.path AS files;
|
||||
|
||||
-- Drop all the files where there are only one of them
|
||||
F = FILTER E by numDupes > 1L;
|
||||
|
||||
-- Sort by the size of the files
|
||||
G = ORDER F by fileSize;
|
||||
|
||||
-- Save results
|
||||
STORE G INTO '$outputFile';
|
||||
----
|
||||
|
||||
This script can be run against pig with the following command:
|
||||
|
||||
----
|
||||
bin/pig -x local -param inputFile=../foo -param outputFile=../results ../probableDuplicates.pig
|
||||
----
|
||||
|
||||
The output file's content will be similar to that below:
|
||||
|
||||
----
|
||||
1077288632 2 {(/user/tennant/work1/part-00501),(/user/tennant/work1/part-00993)}
|
||||
1077288664 4 {(/user/tennant/work0/part-00567),(/user/tennant/work0/part-03980),(/user/tennant/work1/part-00725),(/user/eccelston/output/part-03395)}
|
||||
1077288668 3 {(/user/tennant/work0/part-03705),(/user/tennant/work0/part-04242),(/user/tennant/work1/part-03839)}
|
||||
1077288698 2 {(/user/tennant/work0/part-00435),(/user/eccelston/output/part-01382)}
|
||||
1077288702 2 {(/user/tennant/work0/part-03864),(/user/eccelston/output/part-03234)}
|
||||
----
|
||||
|
||||
Each line includes the file size in bytes that was found to be
|
||||
duplicated, the number of duplicates found, and a list of the
|
||||
duplicated paths. Files less than 100MB are ignored, providing a
|
||||
reasonable likelihood that files of these exact sizes may be
|
||||
duplicates.
|
|
@ -0,0 +1,257 @@
|
|||
~~ Licensed under the Apache License, Version 2.0 (the "License");
|
||||
~~ you may not use this file except in compliance with the License.
|
||||
~~ You may obtain a copy of the License at
|
||||
~~
|
||||
~~ http://www.apache.org/licenses/LICENSE-2.0
|
||||
~~
|
||||
~~ Unless required by applicable law or agreed to in writing, software
|
||||
~~ distributed under the License is distributed on an "AS IS" BASIS,
|
||||
~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
~~ See the License for the specific language governing permissions and
|
||||
~~ limitations under the License. See accompanying LICENSE file.
|
||||
|
||||
---
|
||||
HDFS Permissions Guide
|
||||
---
|
||||
---
|
||||
${maven.build.timestamp}
|
||||
|
||||
HDFS Permissions Guide
|
||||
|
||||
\[ {{{./index.html}Go Back}} \]
|
||||
|
||||
%{toc|section=1|fromDepth=0}
|
||||
|
||||
* Overview
|
||||
|
||||
The Hadoop Distributed File System (HDFS) implements a permissions
|
||||
model for files and directories that shares much of the POSIX model.
|
||||
Each file and directory is associated with an owner and a group. The
|
||||
file or directory has separate permissions for the user that is the
|
||||
owner, for other users that are members of the group, and for all other
|
||||
users. For files, the r permission is required to read the file, and
|
||||
the w permission is required to write or append to the file. For
|
||||
directories, the r permission is required to list the contents of the
|
||||
directory, the w permission is required to create or delete files or
|
||||
directories, and the x permission is required to access a child of the
|
||||
directory.
|
||||
|
||||
In contrast to the POSIX model, there are no setuid or setgid bits for
|
||||
files as there is no notion of executable files. For directories, there
|
||||
are no setuid or setgid bits directory as a simplification. The Sticky
|
||||
bit can be set on directories, preventing anyone except the superuser,
|
||||
directory owner or file owner from deleting or moving the files within
|
||||
the directory. Setting the sticky bit for a file has no effect.
|
||||
Collectively, the permissions of a file or directory are its mode. In
|
||||
general, Unix customs for representing and displaying modes will be
|
||||
used, including the use of octal numbers in this description. When a
|
||||
file or directory is created, its owner is the user identity of the
|
||||
client process, and its group is the group of the parent directory (the
|
||||
BSD rule).
|
||||
|
||||
Each client process that accesses HDFS has a two-part identity composed
|
||||
of the user name, and groups list. Whenever HDFS must do a permissions
|
||||
check for a file or directory foo accessed by a client process,
|
||||
|
||||
* If the user name matches the owner of foo, then the owner
|
||||
permissions are tested;
|
||||
* Else if the group of foo matches any of member of the groups list,
|
||||
then the group permissions are tested;
|
||||
* Otherwise the other permissions of foo are tested.
|
||||
|
||||
If a permissions check fails, the client operation fails.
|
||||
|
||||
* User Identity
|
||||
|
||||
As of Hadoop 0.22, Hadoop supports two different modes of operation to
|
||||
determine the user's identity, specified by the
|
||||
hadoop.security.authentication property:
|
||||
|
||||
* <<simple>>
|
||||
|
||||
In this mode of operation, the identity of a client process is
|
||||
determined by the host operating system. On Unix-like systems,
|
||||
the user name is the equivalent of `whoami`.
|
||||
|
||||
* <<kerberos>>
|
||||
|
||||
In Kerberized operation, the identity of a client process is
|
||||
determined by its Kerberos credentials. For example, in a
|
||||
Kerberized environment, a user may use the kinit utility to
|
||||
obtain a Kerberos ticket-granting-ticket (TGT) and use klist to
|
||||
determine their current principal. When mapping a Kerberos
|
||||
principal to an HDFS username, all components except for the
|
||||
primary are dropped. For example, a principal
|
||||
todd/foobar@CORP.COMPANY.COM will act as the simple username
|
||||
todd on HDFS.
|
||||
|
||||
Regardless of the mode of operation, the user identity mechanism is
|
||||
extrinsic to HDFS itself. There is no provision within HDFS for
|
||||
creating user identities, establishing groups, or processing user
|
||||
credentials.
|
||||
|
||||
* Group Mapping
|
||||
|
||||
Once a username has been determined as described above, the list of
|
||||
groups is determined by a group mapping service, configured by the
|
||||
hadoop.security.group.mapping property. The default implementation,
|
||||
org.apache.hadoop.security.ShellBasedUnixGroupsMapping, will shell out
|
||||
to the Unix bash -c groups command to resolve a list of groups for a
|
||||
user.
|
||||
|
||||
An alternate implementation, which connects directly to an LDAP server
|
||||
to resolve the list of groups, is available via
|
||||
org.apache.hadoop.security.LdapGroupsMapping. However, this provider
|
||||
should only be used if the required groups reside exclusively in LDAP,
|
||||
and are not materialized on the Unix servers. More information on
|
||||
configuring the group mapping service is available in the Javadocs.
|
||||
|
||||
For HDFS, the mapping of users to groups is performed on the NameNode.
|
||||
Thus, the host system configuration of the NameNode determines the
|
||||
group mappings for the users.
|
||||
|
||||
Note that HDFS stores the user and group of a file or directory as
|
||||
strings; there is no conversion from user and group identity numbers as
|
||||
is conventional in Unix.
|
||||
|
||||
* Understanding the Implementation
|
||||
|
||||
Each file or directory operation passes the full path name to the name
|
||||
node, and the permissions checks are applied along the path for each
|
||||
operation. The client framework will implicitly associate the user
|
||||
identity with the connection to the name node, reducing the need for
|
||||
changes to the existing client API. It has always been the case that
|
||||
when one operation on a file succeeds, the operation might fail when
|
||||
repeated because the file, or some directory on the path, no longer
|
||||
exists. For instance, when the client first begins reading a file, it
|
||||
makes a first request to the name node to discover the location of the
|
||||
first blocks of the file. A second request made to find additional
|
||||
blocks may fail. On the other hand, deleting a file does not revoke
|
||||
access by a client that already knows the blocks of the file. With the
|
||||
addition of permissions, a client's access to a file may be withdrawn
|
||||
between requests. Again, changing permissions does not revoke the
|
||||
access of a client that already knows the file's blocks.
|
||||
|
||||
* Changes to the File System API
|
||||
|
||||
All methods that use a path parameter will throw <<<AccessControlException>>>
|
||||
if permission checking fails.
|
||||
|
||||
New methods:
|
||||
|
||||
* <<<public FSDataOutputStream create(Path f, FsPermission permission,
|
||||
boolean overwrite, int bufferSize, short replication, long
|
||||
blockSize, Progressable progress) throws IOException;>>>
|
||||
|
||||
* <<<public boolean mkdirs(Path f, FsPermission permission) throws
|
||||
IOException;>>>
|
||||
|
||||
* <<<public void setPermission(Path p, FsPermission permission) throws
|
||||
IOException;>>>
|
||||
|
||||
* <<<public void setOwner(Path p, String username, String groupname)
|
||||
throws IOException;>>>
|
||||
|
||||
* <<<public FileStatus getFileStatus(Path f) throws IOException;>>>
|
||||
|
||||
will additionally return the user, group and mode associated with the
|
||||
path.
|
||||
|
||||
The mode of a new file or directory is restricted my the umask set as a
|
||||
configuration parameter. When the existing <<<create(path, …)>>> method
|
||||
(without the permission parameter) is used, the mode of the new file is
|
||||
<<<0666 & ^umask>>>. When the new <<<create(path, permission, …)>>> method
|
||||
(with the permission parameter P) is used, the mode of the new file is
|
||||
<<<P & ^umask & 0666>>>. When a new directory is created with the existing
|
||||
<<<mkdirs(path)>>>
|
||||
method (without the permission parameter), the mode of the new
|
||||
directory is <<<0777 & ^umask>>>. When the new <<<mkdirs(path, permission)>>>
|
||||
method (with the permission parameter P) is used, the mode of new
|
||||
directory is <<<P & ^umask & 0777>>>.
|
||||
|
||||
* Changes to the Application Shell
|
||||
|
||||
New operations:
|
||||
|
||||
* <<<chmod [-R] mode file …>>>
|
||||
|
||||
Only the owner of a file or the super-user is permitted to change
|
||||
the mode of a file.
|
||||
|
||||
* <<<chgrp [-R] group file …>>>
|
||||
|
||||
The user invoking chgrp must belong to the specified group and be
|
||||
the owner of the file, or be the super-user.
|
||||
|
||||
* <<<chown [-R] [owner][:[group]] file …>>>
|
||||
|
||||
The owner of a file may only be altered by a super-user.
|
||||
|
||||
* <<<ls file …>>>
|
||||
|
||||
* <<<lsr file …>>>
|
||||
|
||||
The output is reformatted to display the owner, group and mode.
|
||||
|
||||
* The Super-User
|
||||
|
||||
The super-user is the user with the same identity as name node process
|
||||
itself. Loosely, if you started the name node, then you are the
|
||||
super-user. The super-user can do anything in that permissions checks
|
||||
never fail for the super-user. There is no persistent notion of who was
|
||||
the super-user; when the name node is started the process identity
|
||||
determines who is the super-user for now. The HDFS super-user does not
|
||||
have to be the super-user of the name node host, nor is it necessary
|
||||
that all clusters have the same super-user. Also, an experimenter
|
||||
running HDFS on a personal workstation, conveniently becomes that
|
||||
installation's super-user without any configuration.
|
||||
|
||||
In addition, the administrator my identify a distinguished group using
|
||||
a configuration parameter. If set, members of this group are also
|
||||
super-users.
|
||||
|
||||
* The Web Server
|
||||
|
||||
By default, the identity of the web server is a configuration
|
||||
parameter. That is, the name node has no notion of the identity of the
|
||||
real user, but the web server behaves as if it has the identity (user
|
||||
and groups) of a user chosen by the administrator. Unless the chosen
|
||||
identity matches the super-user, parts of the name space may be
|
||||
inaccessible to the web server.
|
||||
|
||||
* Configuration Parameters
|
||||
|
||||
* <<<dfs.permissions = true>>>
|
||||
|
||||
If yes use the permissions system as described here. If no,
|
||||
permission checking is turned off, but all other behavior is
|
||||
unchanged. Switching from one parameter value to the other does not
|
||||
change the mode, owner or group of files or directories.
|
||||
Regardless of whether permissions are on or off, chmod, chgrp and
|
||||
chown always check permissions. These functions are only useful in
|
||||
the permissions context, and so there is no backwards compatibility
|
||||
issue. Furthermore, this allows administrators to reliably set
|
||||
owners and permissions in advance of turning on regular permissions
|
||||
checking.
|
||||
|
||||
* <<<dfs.web.ugi = webuser,webgroup>>>
|
||||
|
||||
The user name to be used by the web server. Setting this to the
|
||||
name of the super-user allows any web client to see everything.
|
||||
Changing this to an otherwise unused identity allows web clients to
|
||||
see only those things visible using "other" permissions. Additional
|
||||
groups may be added to the comma-separated list.
|
||||
|
||||
* <<<dfs.permissions.superusergroup = supergroup>>>
|
||||
|
||||
The name of the group of super-users.
|
||||
|
||||
* <<<fs.permissions.umask-mode = 0022>>>
|
||||
|
||||
The umask used when creating files and directories. For
|
||||
configuration files, the decimal value 18 may be used.
|
||||
|
||||
* <<<dfs.cluster.administrators = ACL-for-admins>>>
|
||||
|
||||
The administrators for the cluster specified as an ACL. This
|
||||
controls who can access the default servlets, etc. in the HDFS.
|
|
@ -0,0 +1,118 @@
|
|||
~~ Licensed under the Apache License, Version 2.0 (the "License");
|
||||
~~ you may not use this file except in compliance with the License.
|
||||
~~ You may obtain a copy of the License at
|
||||
~~
|
||||
~~ http://www.apache.org/licenses/LICENSE-2.0
|
||||
~~
|
||||
~~ Unless required by applicable law or agreed to in writing, software
|
||||
~~ distributed under the License is distributed on an "AS IS" BASIS,
|
||||
~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
~~ See the License for the specific language governing permissions and
|
||||
~~ limitations under the License. See accompanying LICENSE file.
|
||||
|
||||
---
|
||||
HDFS Quotas Guide
|
||||
---
|
||||
---
|
||||
${maven.build.timestamp}
|
||||
|
||||
HDFS Quotas Guide
|
||||
|
||||
\[ {{{./index.html}Go Back}} \]
|
||||
|
||||
%{toc|section=1|fromDepth=0}
|
||||
|
||||
* Overview
|
||||
|
||||
The Hadoop Distributed File System (HDFS) allows the administrator to
|
||||
set quotas for the number of names used and the amount of space used
|
||||
for individual directories. Name quotas and space quotas operate
|
||||
independently, but the administration and implementation of the two
|
||||
types of quotas are closely parallel.
|
||||
|
||||
* Name Quotas
|
||||
|
||||
The name quota is a hard limit on the number of file and directory
|
||||
names in the tree rooted at that directory. File and directory
|
||||
creations fail if the quota would be exceeded. Quotas stick with
|
||||
renamed directories; the rename operation fails if operation would
|
||||
result in a quota violation. The attempt to set a quota will still
|
||||
succeed even if the directory would be in violation of the new quota. A
|
||||
newly created directory has no associated quota. The largest quota is
|
||||
Long.Max_Value. A quota of one forces a directory to remain empty.
|
||||
(Yes, a directory counts against its own quota!)
|
||||
|
||||
Quotas are persistent with the fsimage. When starting, if the fsimage
|
||||
is immediately in violation of a quota (perhaps the fsimage was
|
||||
surreptitiously modified), a warning is printed for each of such
|
||||
violations. Setting or removing a quota creates a journal entry.
|
||||
|
||||
* Space Quotas
|
||||
|
||||
The space quota is a hard limit on the number of bytes used by files in
|
||||
the tree rooted at that directory. Block allocations fail if the quota
|
||||
would not allow a full block to be written. Each replica of a block
|
||||
counts against the quota. Quotas stick with renamed directories; the
|
||||
rename operation fails if the operation would result in a quota
|
||||
violation. A newly created directory has no associated quota. The
|
||||
largest quota is <<<Long.Max_Value>>>. A quota of zero still permits files
|
||||
to be created, but no blocks can be added to the files. Directories don't
|
||||
use host file system space and don't count against the space quota. The
|
||||
host file system space used to save the file meta data is not counted
|
||||
against the quota. Quotas are charged at the intended replication
|
||||
factor for the file; changing the replication factor for a file will
|
||||
credit or debit quotas.
|
||||
|
||||
Quotas are persistent with the fsimage. When starting, if the fsimage
|
||||
is immediately in violation of a quota (perhaps the fsimage was
|
||||
surreptitiously modified), a warning is printed for each of such
|
||||
violations. Setting or removing a quota creates a journal entry.
|
||||
|
||||
* Administrative Commands
|
||||
|
||||
Quotas are managed by a set of commands available only to the
|
||||
administrator.
|
||||
|
||||
* <<<dfsadmin -setQuota <N> <directory>...<directory> >>>
|
||||
|
||||
Set the name quota to be N for each directory. Best effort for each
|
||||
directory, with faults reported if N is not a positive long
|
||||
integer, the directory does not exist or it is a file, or the
|
||||
directory would immediately exceed the new quota.
|
||||
|
||||
* <<<dfsadmin -clrQuota <directory>...<directory> >>>
|
||||
|
||||
Remove any name quota for each directory. Best effort for each
|
||||
directory, with faults reported if the directory does not exist or
|
||||
it is a file. It is not a fault if the directory has no quota.
|
||||
|
||||
* <<<dfsadmin -setSpaceQuota <N> <directory>...<directory> >>>
|
||||
|
||||
Set the space quota to be N bytes for each directory. This is a
|
||||
hard limit on total size of all the files under the directory tree.
|
||||
The space quota takes replication also into account, i.e. one GB of
|
||||
data with replication of 3 consumes 3GB of quota. N can also be
|
||||
specified with a binary prefix for convenience, for e.g. 50g for 50
|
||||
gigabytes and 2t for 2 terabytes etc. Best effort for each
|
||||
directory, with faults reported if N is neither zero nor a positive
|
||||
integer, the directory does not exist or it is a file, or the
|
||||
directory would immediately exceed the new quota.
|
||||
|
||||
* <<<dfsadmin -clrSpaceQuota <directory>...<director> >>>
|
||||
|
||||
Remove any space quota for each directory. Best effort for each
|
||||
directory, with faults reported if the directory does not exist or
|
||||
it is a file. It is not a fault if the directory has no quota.
|
||||
|
||||
* Reporting Command
|
||||
|
||||
An an extension to the count command of the HDFS shell reports quota
|
||||
values and the current count of names and bytes in use.
|
||||
|
||||
* <<<fs -count -q <directory>...<directory> >>>
|
||||
|
||||
With the -q option, also report the name quota value set for each
|
||||
directory, the available name quota remaining, the space quota
|
||||
value set, and the available space quota remaining. If the
|
||||
directory does not have a quota set, the reported values are <<<none>>>
|
||||
and <<<inf>>>.
|
|
@ -0,0 +1,499 @@
|
|||
~~ Licensed under the Apache License, Version 2.0 (the "License");
|
||||
~~ you may not use this file except in compliance with the License.
|
||||
~~ You may obtain a copy of the License at
|
||||
~~
|
||||
~~ http://www.apache.org/licenses/LICENSE-2.0
|
||||
~~
|
||||
~~ Unless required by applicable law or agreed to in writing, software
|
||||
~~ distributed under the License is distributed on an "AS IS" BASIS,
|
||||
~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
~~ See the License for the specific language governing permissions and
|
||||
~~ limitations under the License. See accompanying LICENSE file.
|
||||
|
||||
---
|
||||
HDFS Users Guide
|
||||
---
|
||||
---
|
||||
${maven.build.timestamp}
|
||||
|
||||
HDFS Users Guide
|
||||
|
||||
%{toc|section=1|fromDepth=0}
|
||||
|
||||
* Purpose
|
||||
|
||||
This document is a starting point for users working with Hadoop
|
||||
Distributed File System (HDFS) either as a part of a Hadoop cluster or
|
||||
as a stand-alone general purpose distributed file system. While HDFS is
|
||||
designed to "just work" in many environments, a working knowledge of
|
||||
HDFS helps greatly with configuration improvements and diagnostics on a
|
||||
specific cluster.
|
||||
|
||||
* Overview
|
||||
|
||||
HDFS is the primary distributed storage used by Hadoop applications. A
|
||||
HDFS cluster primarily consists of a NameNode that manages the file
|
||||
system metadata and DataNodes that store the actual data. The HDFS
|
||||
Architecture Guide describes HDFS in detail. This user guide primarily
|
||||
deals with the interaction of users and administrators with HDFS
|
||||
clusters. The HDFS architecture diagram depicts basic interactions
|
||||
among NameNode, the DataNodes, and the clients. Clients contact
|
||||
NameNode for file metadata or file modifications and perform actual
|
||||
file I/O directly with the DataNodes.
|
||||
|
||||
The following are some of the salient features that could be of
|
||||
interest to many users.
|
||||
|
||||
* Hadoop, including HDFS, is well suited for distributed storage and
|
||||
distributed processing using commodity hardware. It is fault
|
||||
tolerant, scalable, and extremely simple to expand. MapReduce, well
|
||||
known for its simplicity and applicability for large set of
|
||||
distributed applications, is an integral part of Hadoop.
|
||||
|
||||
* HDFS is highly configurable with a default configuration well
|
||||
suited for many installations. Most of the time, configuration
|
||||
needs to be tuned only for very large clusters.
|
||||
|
||||
* Hadoop is written in Java and is supported on all major platforms.
|
||||
|
||||
* Hadoop supports shell-like commands to interact with HDFS directly.
|
||||
|
||||
* The NameNode and Datanodes have built in web servers that makes it
|
||||
easy to check current status of the cluster.
|
||||
|
||||
* New features and improvements are regularly implemented in HDFS.
|
||||
The following is a subset of useful features in HDFS:
|
||||
|
||||
* File permissions and authentication.
|
||||
|
||||
* Rack awareness: to take a node's physical location into
|
||||
account while scheduling tasks and allocating storage.
|
||||
|
||||
* Safemode: an administrative mode for maintenance.
|
||||
|
||||
* <<<fsck>>>: a utility to diagnose health of the file system, to find
|
||||
missing files or blocks.
|
||||
|
||||
* <<<fetchdt>>>: a utility to fetch DelegationToken and store it in a
|
||||
file on the local system.
|
||||
|
||||
* Rebalancer: tool to balance the cluster when the data is
|
||||
unevenly distributed among DataNodes.
|
||||
|
||||
* Upgrade and rollback: after a software upgrade, it is possible
|
||||
to rollback to HDFS' state before the upgrade in case of
|
||||
unexpected problems.
|
||||
|
||||
* Secondary NameNode: performs periodic checkpoints of the
|
||||
namespace and helps keep the size of file containing log of
|
||||
HDFS modifications within certain limits at the NameNode.
|
||||
|
||||
* Checkpoint node: performs periodic checkpoints of the
|
||||
namespace and helps minimize the size of the log stored at the
|
||||
NameNode containing changes to the HDFS. Replaces the role
|
||||
previously filled by the Secondary NameNode, though is not yet
|
||||
battle hardened. The NameNode allows multiple Checkpoint nodes
|
||||
simultaneously, as long as there are no Backup nodes
|
||||
registered with the system.
|
||||
|
||||
* Backup node: An extension to the Checkpoint node. In addition
|
||||
to checkpointing it also receives a stream of edits from the
|
||||
NameNode and maintains its own in-memory copy of the
|
||||
namespace, which is always in sync with the active NameNode
|
||||
namespace state. Only one Backup node may be registered with
|
||||
the NameNode at once.
|
||||
|
||||
* Prerequisites
|
||||
|
||||
The following documents describe how to install and set up a Hadoop
|
||||
cluster:
|
||||
|
||||
* {{Single Node Setup}} for first-time users.
|
||||
|
||||
* {{Cluster Setup}} for large, distributed clusters.
|
||||
|
||||
The rest of this document assumes the user is able to set up and run a
|
||||
HDFS with at least one DataNode. For the purpose of this document, both
|
||||
the NameNode and DataNode could be running on the same physical
|
||||
machine.
|
||||
|
||||
* Web Interface
|
||||
|
||||
NameNode and DataNode each run an internal web server in order to
|
||||
display basic information about the current status of the cluster. With
|
||||
the default configuration, the NameNode front page is at
|
||||
<<<http://namenode-name:50070/>>>. It lists the DataNodes in the cluster and
|
||||
basic statistics of the cluster. The web interface can also be used to
|
||||
browse the file system (using "Browse the file system" link on the
|
||||
NameNode front page).
|
||||
|
||||
* Shell Commands
|
||||
|
||||
Hadoop includes various shell-like commands that directly interact with
|
||||
HDFS and other file systems that Hadoop supports. The command <<<bin/hdfs dfs -help>>>
|
||||
lists the commands supported by Hadoop shell. Furthermore,
|
||||
the command <<<bin/hdfs dfs -help command-name>>> displays more detailed help
|
||||
for a command. These commands support most of the normal files system
|
||||
operations like copying files, changing file permissions, etc. It also
|
||||
supports a few HDFS specific operations like changing replication of
|
||||
files. For more information see {{{File System Shell Guide}}}.
|
||||
|
||||
** DFSAdmin Command
|
||||
|
||||
The <<<bin/hadoop dfsadmin>>> command supports a few HDFS administration
|
||||
related operations. The <<<bin/hadoop dfsadmin -help>>> command lists all the
|
||||
commands currently supported. For e.g.:
|
||||
|
||||
* <<<-report>>>: reports basic statistics of HDFS. Some of this
|
||||
information is also available on the NameNode front page.
|
||||
|
||||
* <<<-safemode>>>: though usually not required, an administrator can
|
||||
manually enter or leave Safemode.
|
||||
|
||||
* <<<-finalizeUpgrade>>>: removes previous backup of the cluster made
|
||||
during last upgrade.
|
||||
|
||||
* <<<-refreshNodes>>>: Updates the namenode with the set of datanodes
|
||||
allowed to connect to the namenode. Namenodes re-read datanode
|
||||
hostnames in the file defined by <<<dfs.hosts>>>, <<<dfs.hosts.exclude>>>.
|
||||
Hosts defined in <<<dfs.hosts>>> are the datanodes that are part of the
|
||||
cluster. If there are entries in <<<dfs.hosts>>>, only the hosts in it
|
||||
are allowed to register with the namenode. Entries in
|
||||
<<<dfs.hosts.exclude>>> are datanodes that need to be decommissioned.
|
||||
Datanodes complete decommissioning when all the replicas from them
|
||||
are replicated to other datanodes. Decommissioned nodes are not
|
||||
automatically shutdown and are not chosen for writing for new
|
||||
replicas.
|
||||
|
||||
* <<<-printTopology>>> : Print the topology of the cluster. Display a tree
|
||||
of racks and datanodes attached to the tracks as viewed by the
|
||||
NameNode.
|
||||
|
||||
For command usage, see {{{dfsadmin}}}.
|
||||
|
||||
* Secondary NameNode
|
||||
|
||||
The NameNode stores modifications to the file system as a log appended
|
||||
to a native file system file, edits. When a NameNode starts up, it
|
||||
reads HDFS state from an image file, fsimage, and then applies edits
|
||||
from the edits log file. It then writes new HDFS state to the fsimage
|
||||
and starts normal operation with an empty edits file. Since NameNode
|
||||
merges fsimage and edits files only during start up, the edits log file
|
||||
could get very large over time on a busy cluster. Another side effect
|
||||
of a larger edits file is that next restart of NameNode takes longer.
|
||||
|
||||
The secondary NameNode merges the fsimage and the edits log files
|
||||
periodically and keeps edits log size within a limit. It is usually run
|
||||
on a different machine than the primary NameNode since its memory
|
||||
requirements are on the same order as the primary NameNode.
|
||||
|
||||
The start of the checkpoint process on the secondary NameNode is
|
||||
controlled by two configuration parameters.
|
||||
|
||||
* <<<dfs.namenode.checkpoint.period>>>, set to 1 hour by default, specifies
|
||||
the maximum delay between two consecutive checkpoints, and
|
||||
|
||||
* <<<dfs.namenode.checkpoint.txns>>>, set to 40000 default, defines the
|
||||
number of uncheckpointed transactions on the NameNode which will
|
||||
force an urgent checkpoint, even if the checkpoint period has not
|
||||
been reached.
|
||||
|
||||
The secondary NameNode stores the latest checkpoint in a directory
|
||||
which is structured the same way as the primary NameNode's directory.
|
||||
So that the check pointed image is always ready to be read by the
|
||||
primary NameNode if necessary.
|
||||
|
||||
For command usage, see {{{secondarynamenode}}}.
|
||||
|
||||
* Checkpoint Node
|
||||
|
||||
NameNode persists its namespace using two files: fsimage, which is the
|
||||
latest checkpoint of the namespace and edits, a journal (log) of
|
||||
changes to the namespace since the checkpoint. When a NameNode starts
|
||||
up, it merges the fsimage and edits journal to provide an up-to-date
|
||||
view of the file system metadata. The NameNode then overwrites fsimage
|
||||
with the new HDFS state and begins a new edits journal.
|
||||
|
||||
The Checkpoint node periodically creates checkpoints of the namespace.
|
||||
It downloads fsimage and edits from the active NameNode, merges them
|
||||
locally, and uploads the new image back to the active NameNode. The
|
||||
Checkpoint node usually runs on a different machine than the NameNode
|
||||
since its memory requirements are on the same order as the NameNode.
|
||||
The Checkpoint node is started by bin/hdfs namenode -checkpoint on the
|
||||
node specified in the configuration file.
|
||||
|
||||
The location of the Checkpoint (or Backup) node and its accompanying
|
||||
web interface are configured via the <<<dfs.namenode.backup.address>>> and
|
||||
<<<dfs.namenode.backup.http-address>>> configuration variables.
|
||||
|
||||
The start of the checkpoint process on the Checkpoint node is
|
||||
controlled by two configuration parameters.
|
||||
|
||||
* <<<dfs.namenode.checkpoint.period>>>, set to 1 hour by default, specifies
|
||||
the maximum delay between two consecutive checkpoints
|
||||
|
||||
* <<<dfs.namenode.checkpoint.txns>>>, set to 40000 default, defines the
|
||||
number of uncheckpointed transactions on the NameNode which will
|
||||
force an urgent checkpoint, even if the checkpoint period has not
|
||||
been reached.
|
||||
|
||||
The Checkpoint node stores the latest checkpoint in a directory that is
|
||||
structured the same as the NameNode's directory. This allows the
|
||||
checkpointed image to be always available for reading by the NameNode
|
||||
if necessary. See Import checkpoint.
|
||||
|
||||
Multiple checkpoint nodes may be specified in the cluster configuration
|
||||
file.
|
||||
|
||||
For command usage, see {{{namenode}}}.
|
||||
|
||||
* Backup Node
|
||||
|
||||
The Backup node provides the same checkpointing functionality as the
|
||||
Checkpoint node, as well as maintaining an in-memory, up-to-date copy
|
||||
of the file system namespace that is always synchronized with the
|
||||
active NameNode state. Along with accepting a journal stream of file
|
||||
system edits from the NameNode and persisting this to disk, the Backup
|
||||
node also applies those edits into its own copy of the namespace in
|
||||
memory, thus creating a backup of the namespace.
|
||||
|
||||
The Backup node does not need to download fsimage and edits files from
|
||||
the active NameNode in order to create a checkpoint, as would be
|
||||
required with a Checkpoint node or Secondary NameNode, since it already
|
||||
has an up-to-date state of the namespace state in memory. The Backup
|
||||
node checkpoint process is more efficient as it only needs to save the
|
||||
namespace into the local fsimage file and reset edits.
|
||||
|
||||
As the Backup node maintains a copy of the namespace in memory, its RAM
|
||||
requirements are the same as the NameNode.
|
||||
|
||||
The NameNode supports one Backup node at a time. No Checkpoint nodes
|
||||
may be registered if a Backup node is in use. Using multiple Backup
|
||||
nodes concurrently will be supported in the future.
|
||||
|
||||
The Backup node is configured in the same manner as the Checkpoint
|
||||
node. It is started with <<<bin/hdfs namenode -backup>>>.
|
||||
|
||||
The location of the Backup (or Checkpoint) node and its accompanying
|
||||
web interface are configured via the <<<dfs.namenode.backup.address>>> and
|
||||
<<<dfs.namenode.backup.http-address>>> configuration variables.
|
||||
|
||||
Use of a Backup node provides the option of running the NameNode with
|
||||
no persistent storage, delegating all responsibility for persisting the
|
||||
state of the namespace to the Backup node. To do this, start the
|
||||
NameNode with the <<<-importCheckpoint>>> option, along with specifying no
|
||||
persistent storage directories of type edits <<<dfs.namenode.edits.dir>>> for
|
||||
the NameNode configuration.
|
||||
|
||||
For a complete discussion of the motivation behind the creation of the
|
||||
Backup node and Checkpoint node, see {{{https://issues.apache.org/jira/browse/HADOOP-4539}HADOOP-4539}}.
|
||||
For command usage, see {{{namenode}}}.
|
||||
|
||||
* Import Checkpoint
|
||||
|
||||
The latest checkpoint can be imported to the NameNode if all other
|
||||
copies of the image and the edits files are lost. In order to do that
|
||||
one should:
|
||||
|
||||
* Create an empty directory specified in the <<<dfs.namenode.name.dir>>>
|
||||
configuration variable;
|
||||
|
||||
* Specify the location of the checkpoint directory in the
|
||||
configuration variable <<<dfs.namenode.checkpoint.dir>>>;
|
||||
|
||||
* and start the NameNode with <<<-importCheckpoint>>> option.
|
||||
|
||||
The NameNode will upload the checkpoint from the
|
||||
<<<dfs.namenode.checkpoint.dir>>> directory and then save it to the NameNode
|
||||
directory(s) set in <<<dfs.namenode.name.dir>>>. The NameNode will fail if a
|
||||
legal image is contained in <<<dfs.namenode.name.dir>>>. The NameNode
|
||||
verifies that the image in <<<dfs.namenode.checkpoint.dir>>> is consistent,
|
||||
but does not modify it in any way.
|
||||
|
||||
For command usage, see {{{namenode}}}.
|
||||
|
||||
* Rebalancer
|
||||
|
||||
HDFS data might not always be be placed uniformly across the DataNode.
|
||||
One common reason is addition of new DataNodes to an existing cluster.
|
||||
While placing new blocks (data for a file is stored as a series of
|
||||
blocks), NameNode considers various parameters before choosing the
|
||||
DataNodes to receive these blocks. Some of the considerations are:
|
||||
|
||||
* Policy to keep one of the replicas of a block on the same node as
|
||||
the node that is writing the block.
|
||||
|
||||
* Need to spread different replicas of a block across the racks so
|
||||
that cluster can survive loss of whole rack.
|
||||
|
||||
* One of the replicas is usually placed on the same rack as the node
|
||||
writing to the file so that cross-rack network I/O is reduced.
|
||||
|
||||
* Spread HDFS data uniformly across the DataNodes in the cluster.
|
||||
|
||||
Due to multiple competing considerations, data might not be uniformly
|
||||
placed across the DataNodes. HDFS provides a tool for administrators
|
||||
that analyzes block placement and rebalanaces data across the DataNode.
|
||||
A brief administrator's guide for rebalancer as a PDF is attached to
|
||||
{{{https://issues.apache.org/jira/browse/HADOOP-1652}HADOOP-1652}}.
|
||||
|
||||
For command usage, see {{{balancer}}}.
|
||||
|
||||
* Rack Awareness
|
||||
|
||||
Typically large Hadoop clusters are arranged in racks and network
|
||||
traffic between different nodes with in the same rack is much more
|
||||
desirable than network traffic across the racks. In addition NameNode
|
||||
tries to place replicas of block on multiple racks for improved fault
|
||||
tolerance. Hadoop lets the cluster administrators decide which rack a
|
||||
node belongs to through configuration variable
|
||||
<<<net.topology.script.file.name>>>. When this script is configured, each
|
||||
node runs the script to determine its rack id. A default installation
|
||||
assumes all the nodes belong to the same rack. This feature and
|
||||
configuration is further described in PDF attached to
|
||||
{{{https://issues.apache.org/jira/browse/HADOOP-692}HADOOP-692}}.
|
||||
|
||||
* Safemode
|
||||
|
||||
During start up the NameNode loads the file system state from the
|
||||
fsimage and the edits log file. It then waits for DataNodes to report
|
||||
their blocks so that it does not prematurely start replicating the
|
||||
blocks though enough replicas already exist in the cluster. During this
|
||||
time NameNode stays in Safemode. Safemode for the NameNode is
|
||||
essentially a read-only mode for the HDFS cluster, where it does not
|
||||
allow any modifications to file system or blocks. Normally the NameNode
|
||||
leaves Safemode automatically after the DataNodes have reported that
|
||||
most file system blocks are available. If required, HDFS could be
|
||||
placed in Safemode explicitly using <<<bin/hadoop dfsadmin -safemode>>>
|
||||
command. NameNode front page shows whether Safemode is on or off. A
|
||||
more detailed description and configuration is maintained as JavaDoc
|
||||
for <<<setSafeMode()>>>.
|
||||
|
||||
* fsck
|
||||
|
||||
HDFS supports the fsck command to check for various inconsistencies. It
|
||||
it is designed for reporting problems with various files, for example,
|
||||
missing blocks for a file or under-replicated blocks. Unlike a
|
||||
traditional fsck utility for native file systems, this command does not
|
||||
correct the errors it detects. Normally NameNode automatically corrects
|
||||
most of the recoverable failures. By default fsck ignores open files
|
||||
but provides an option to select all files during reporting. The HDFS
|
||||
fsck command is not a Hadoop shell command. It can be run as
|
||||
<<<bin/hadoop fsck>>>. For command usage, see {{{fsck}}}. fsck can be run on the
|
||||
whole file system or on a subset of files.
|
||||
|
||||
* fetchdt
|
||||
|
||||
HDFS supports the fetchdt command to fetch Delegation Token and store
|
||||
it in a file on the local system. This token can be later used to
|
||||
access secure server (NameNode for example) from a non secure client.
|
||||
Utility uses either RPC or HTTPS (over Kerberos) to get the token, and
|
||||
thus requires kerberos tickets to be present before the run (run kinit
|
||||
to get the tickets). The HDFS fetchdt command is not a Hadoop shell
|
||||
command. It can be run as <<<bin/hadoop fetchdt DTfile>>>. After you got
|
||||
the token you can run an HDFS command without having Kerberos tickets,
|
||||
by pointing <<<HADOOP_TOKEN_FILE_LOCATION>>> environmental variable to the
|
||||
delegation token file. For command usage, see {{{fetchdt}}} command.
|
||||
|
||||
* Recovery Mode
|
||||
|
||||
Typically, you will configure multiple metadata storage locations.
|
||||
Then, if one storage location is corrupt, you can read the metadata
|
||||
from one of the other storage locations.
|
||||
|
||||
However, what can you do if the only storage locations available are
|
||||
corrupt? In this case, there is a special NameNode startup mode called
|
||||
Recovery mode that may allow you to recover most of your data.
|
||||
|
||||
You can start the NameNode in recovery mode like so: <<<namenode -recover>>>
|
||||
|
||||
When in recovery mode, the NameNode will interactively prompt you at
|
||||
the command line about possible courses of action you can take to
|
||||
recover your data.
|
||||
|
||||
If you don't want to be prompted, you can give the <<<-force>>> option. This
|
||||
option will force recovery mode to always select the first choice.
|
||||
Normally, this will be the most reasonable choice.
|
||||
|
||||
Because Recovery mode can cause you to lose data, you should always
|
||||
back up your edit log and fsimage before using it.
|
||||
|
||||
* Upgrade and Rollback
|
||||
|
||||
When Hadoop is upgraded on an existing cluster, as with any software
|
||||
upgrade, it is possible there are new bugs or incompatible changes that
|
||||
affect existing applications and were not discovered earlier. In any
|
||||
non-trivial HDFS installation, it is not an option to loose any data,
|
||||
let alone to restart HDFS from scratch. HDFS allows administrators to
|
||||
go back to earlier version of Hadoop and rollback the cluster to the
|
||||
state it was in before the upgrade. HDFS upgrade is described in more
|
||||
detail in {{{Hadoop Upgrade}}} Wiki page. HDFS can have one such backup at a
|
||||
time. Before upgrading, administrators need to remove existing backup
|
||||
using bin/hadoop dfsadmin <<<-finalizeUpgrade>>> command. The following
|
||||
briefly describes the typical upgrade procedure:
|
||||
|
||||
* Before upgrading Hadoop software, finalize if there an existing
|
||||
backup. <<<dfsadmin -upgradeProgress>>> status can tell if the cluster
|
||||
needs to be finalized.
|
||||
|
||||
* Stop the cluster and distribute new version of Hadoop.
|
||||
|
||||
* Run the new version with <<<-upgrade>>> option (<<<bin/start-dfs.sh -upgrade>>>).
|
||||
|
||||
* Most of the time, cluster works just fine. Once the new HDFS is
|
||||
considered working well (may be after a few days of operation),
|
||||
finalize the upgrade. Note that until the cluster is finalized,
|
||||
deleting the files that existed before the upgrade does not free up
|
||||
real disk space on the DataNodes.
|
||||
|
||||
* If there is a need to move back to the old version,
|
||||
|
||||
* stop the cluster and distribute earlier version of Hadoop.
|
||||
|
||||
* start the cluster with rollback option. (<<<bin/start-dfs.h -rollback>>>).
|
||||
|
||||
* File Permissions and Security
|
||||
|
||||
The file permissions are designed to be similar to file permissions on
|
||||
other familiar platforms like Linux. Currently, security is limited to
|
||||
simple file permissions. The user that starts NameNode is treated as
|
||||
the superuser for HDFS. Future versions of HDFS will support network
|
||||
authentication protocols like Kerberos for user authentication and
|
||||
encryption of data transfers. The details are discussed in the
|
||||
Permissions Guide.
|
||||
|
||||
* Scalability
|
||||
|
||||
Hadoop currently runs on clusters with thousands of nodes. The
|
||||
{{{PoweredBy}}} Wiki page lists some of the organizations that deploy Hadoop
|
||||
on large clusters. HDFS has one NameNode for each cluster. Currently
|
||||
the total memory available on NameNode is the primary scalability
|
||||
limitation. On very large clusters, increasing average size of files
|
||||
stored in HDFS helps with increasing cluster size without increasing
|
||||
memory requirements on NameNode. The default configuration may not
|
||||
suite very large clustes. The {{{FAQ}}} Wiki page lists suggested
|
||||
configuration improvements for large Hadoop clusters.
|
||||
|
||||
* Related Documentation
|
||||
|
||||
This user guide is a good starting point for working with HDFS. While
|
||||
the user guide continues to improve, there is a large wealth of
|
||||
documentation about Hadoop and HDFS. The following list is a starting
|
||||
point for further exploration:
|
||||
|
||||
* {{{Hadoop Site}}}: The home page for the Apache Hadoop site.
|
||||
|
||||
* {{{Hadoop Wiki}}}: The home page (FrontPage) for the Hadoop Wiki. Unlike
|
||||
the released documentation, which is part of Hadoop source tree,
|
||||
Hadoop Wiki is regularly edited by Hadoop Community.
|
||||
|
||||
* {{{FAQ}}}: The FAQ Wiki page.
|
||||
|
||||
* {{{Hadoop JavaDoc API}}}.
|
||||
|
||||
* {{{Hadoop User Mailing List}}}: core-user[at]hadoop.apache.org.
|
||||
|
||||
* Explore {{{src/hdfs/hdfs-default.xml}}}. It includes brief description of
|
||||
most of the configuration variables available.
|
||||
|
||||
* {{{Hadoop Commands Guide}}}: Hadoop commands usage.
|
|
@ -0,0 +1,60 @@
|
|||
~~ Licensed under the Apache License, Version 2.0 (the "License");
|
||||
~~ you may not use this file except in compliance with the License.
|
||||
~~ You may obtain a copy of the License at
|
||||
~~
|
||||
~~ http://www.apache.org/licenses/LICENSE-2.0
|
||||
~~
|
||||
~~ Unless required by applicable law or agreed to in writing, software
|
||||
~~ distributed under the License is distributed on an "AS IS" BASIS,
|
||||
~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
~~ See the License for the specific language governing permissions and
|
||||
~~ limitations under the License. See accompanying LICENSE file.
|
||||
|
||||
---
|
||||
HFTP Guide
|
||||
---
|
||||
---
|
||||
${maven.build.timestamp}
|
||||
|
||||
HFTP Guide
|
||||
|
||||
\[ {{{./index.html}Go Back}} \]
|
||||
|
||||
%{toc|section=1|fromDepth=0}
|
||||
|
||||
* Introduction
|
||||
|
||||
HFTP is a Hadoop filesystem implementation that lets you read data from
|
||||
a remote Hadoop HDFS cluster. The reads are done via HTTP, and data is
|
||||
sourced from DataNodes. HFTP is a read-only filesystem, and will throw
|
||||
exceptions if you try to use it to write data or modify the filesystem
|
||||
state.
|
||||
|
||||
HFTP is primarily useful if you have multiple HDFS clusters with
|
||||
different versions and you need to move data from one to another. HFTP
|
||||
is wire-compatible even between different versions of HDFS. For
|
||||
example, you can do things like: <<<hadoop distcp -i hftp://sourceFS:50070/src hdfs://destFS:50070/dest>>>.
|
||||
Note that HFTP is read-only so the destination must be an HDFS filesystem.
|
||||
(Also, in this example, the distcp should be run using the configuraton of
|
||||
the new filesystem.)
|
||||
|
||||
An extension, HSFTP, uses HTTPS by default. This means that data will
|
||||
be encrypted in transit.
|
||||
|
||||
* Implementation
|
||||
|
||||
The code for HFTP lives in the Java class
|
||||
<<<org.apache.hadoop.hdfs.HftpFileSystem>>>. Likewise, HSFTP is implemented
|
||||
in <<<org.apache.hadoop.hdfs.HsftpFileSystem>>>.
|
||||
|
||||
* Configuration Options
|
||||
|
||||
*-----------------------:-----------------------------------+
|
||||
| <<Name>> | <<Description>> |
|
||||
*-----------------------:-----------------------------------+
|
||||
| <<<dfs.hftp.https.port>>> | the HTTPS port on the remote cluster. If not set,
|
||||
| | HFTP will fall back on <<<dfs.https.port>>>.
|
||||
*-----------------------:-----------------------------------+
|
||||
| <<<hdfs.service.host_ip:port>>> | Specifies the service name (for the security
|
||||
| | subsystem) associated with the HFTP filesystem running at ip:port.
|
||||
*-----------------------:-----------------------------------+
|
|
@ -0,0 +1,94 @@
|
|||
~~ Licensed under the Apache License, Version 2.0 (the "License");
|
||||
~~ you may not use this file except in compliance with the License.
|
||||
~~ You may obtain a copy of the License at
|
||||
~~
|
||||
~~ http://www.apache.org/licenses/LICENSE-2.0
|
||||
~~
|
||||
~~ Unless required by applicable law or agreed to in writing, software
|
||||
~~ distributed under the License is distributed on an "AS IS" BASIS,
|
||||
~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
~~ See the License for the specific language governing permissions and
|
||||
~~ limitations under the License. See accompanying LICENSE file.
|
||||
|
||||
---
|
||||
C API libhdfs
|
||||
---
|
||||
---
|
||||
${maven.build.timestamp}
|
||||
|
||||
C API libhdfs
|
||||
|
||||
%{toc|section=1|fromDepth=0}
|
||||
|
||||
* Overview
|
||||
|
||||
libhdfs is a JNI based C API for Hadoop's Distributed File System
|
||||
(HDFS). It provides C APIs to a subset of the HDFS APIs to manipulate
|
||||
HDFS files and the filesystem. libhdfs is part of the Hadoop
|
||||
distribution and comes pre-compiled in
|
||||
<<<${HADOOP_PREFIX}/libhdfs/libhdfs.so>>> .
|
||||
|
||||
* The APIs
|
||||
|
||||
The libhdfs APIs are a subset of: {{{hadoop fs APIs}}}.
|
||||
|
||||
The header file for libhdfs describes each API in detail and is
|
||||
available in <<<${HADOOP_PREFIX}/src/c++/libhdfs/hdfs.h>>>
|
||||
|
||||
* A Sample Program
|
||||
|
||||
----
|
||||
\#include "hdfs.h"
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
|
||||
hdfsFS fs = hdfsConnect("default", 0);
|
||||
const char* writePath = "/tmp/testfile.txt";
|
||||
hdfsFile writeFile = hdfsOpenFile(fs, writePath, O_WRONLY|O_CREAT, 0, 0, 0);
|
||||
if(!writeFile) {
|
||||
fprintf(stderr, "Failed to open %s for writing!\n", writePath);
|
||||
exit(-1);
|
||||
}
|
||||
char* buffer = "Hello, World!";
|
||||
tSize num_written_bytes = hdfsWrite(fs, writeFile, (void*)buffer, strlen(buffer)+1);
|
||||
if (hdfsFlush(fs, writeFile)) {
|
||||
fprintf(stderr, "Failed to 'flush' %s\n", writePath);
|
||||
exit(-1);
|
||||
}
|
||||
hdfsCloseFile(fs, writeFile);
|
||||
}
|
||||
----
|
||||
|
||||
* How To Link With The Library
|
||||
|
||||
See the Makefile for <<<hdfs_test.c>>> in the libhdfs source directory
|
||||
(<<<${HADOOP_PREFIX}/src/c++/libhdfs/Makefile>>>) or something like:
|
||||
<<<gcc above_sample.c -I${HADOOP_PREFIX}/src/c++/libhdfs -L${HADOOP_PREFIX}/libhdfs -lhdfs -o above_sample>>>
|
||||
|
||||
* Common Problems
|
||||
|
||||
The most common problem is the <<<CLASSPATH>>> is not set properly when
|
||||
calling a program that uses libhdfs. Make sure you set it to all the
|
||||
Hadoop jars needed to run Hadoop itself. Currently, there is no way to
|
||||
programmatically generate the classpath, but a good bet is to include
|
||||
all the jar files in <<<${HADOOP_PREFIX}>>> and <<<${HADOOP_PREFIX}/lib>>> as well
|
||||
as the right configuration directory containing <<<hdfs-site.xml>>>
|
||||
|
||||
* Thread Safe
|
||||
|
||||
libdhfs is thread safe.
|
||||
|
||||
* Concurrency and Hadoop FS "handles"
|
||||
|
||||
The Hadoop FS implementation includes a FS handle cache which
|
||||
caches based on the URI of the namenode along with the user
|
||||
connecting. So, all calls to <<<hdfsConnect>>> will return the same
|
||||
handle but calls to <<<hdfsConnectAsUser>>> with different users will
|
||||
return different handles. But, since HDFS client handles are
|
||||
completely thread safe, this has no bearing on concurrency.
|
||||
|
||||
* Concurrency and libhdfs/JNI
|
||||
|
||||
The libhdfs calls to JNI should always be creating thread local
|
||||
storage, so (in theory), libhdfs should be as thread safe as the
|
||||
underlying calls to the Hadoop FS.
|
|
@ -0,0 +1,195 @@
|
|||
~~ Licensed under the Apache License, Version 2.0 (the "License");
|
||||
~~ you may not use this file except in compliance with the License.
|
||||
~~ You may obtain a copy of the License at
|
||||
~~
|
||||
~~ http://www.apache.org/licenses/LICENSE-2.0
|
||||
~~
|
||||
~~ Unless required by applicable law or agreed to in writing, software
|
||||
~~ distributed under the License is distributed on an "AS IS" BASIS,
|
||||
~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
~~ See the License for the specific language governing permissions and
|
||||
~~ limitations under the License. See accompanying LICENSE file.
|
||||
|
||||
---
|
||||
Synthetic Load Generator Guide
|
||||
---
|
||||
---
|
||||
${maven.build.timestamp}
|
||||
|
||||
Synthetic Load Generator Guide
|
||||
|
||||
%{toc|section=1|fromDepth=0}
|
||||
|
||||
* Overview
|
||||
|
||||
The synthetic load generator (SLG) is a tool for testing NameNode
|
||||
behavior under different client loads. The user can generate different
|
||||
mixes of read, write, and list requests by specifying the probabilities
|
||||
of read and write. The user controls the intensity of the load by
|
||||
adjusting parameters for the number of worker threads and the delay
|
||||
between operations. While load generators are running, the user can
|
||||
profile and monitor the running of the NameNode. When a load generator
|
||||
exits, it prints some NameNode statistics like the average execution
|
||||
time of each kind of operation and the NameNode throughput.
|
||||
|
||||
* Synopsis
|
||||
|
||||
The synopsis of the command is:
|
||||
|
||||
----
|
||||
java LoadGenerator [options]
|
||||
----
|
||||
|
||||
Options include:
|
||||
|
||||
* <<<-readProbability>>> <read probability>
|
||||
|
||||
The probability of the read operation; default is 0.3333.
|
||||
|
||||
* <<<-writeProbability>>> <write probability>
|
||||
|
||||
The probability of the write operations; default is 0.3333.
|
||||
|
||||
* <<<-root>>> <test space root>
|
||||
|
||||
The root of the test space; default is /testLoadSpace.
|
||||
|
||||
* <<<-maxDelayBetweenOps>>> <maxDelayBetweenOpsInMillis>
|
||||
|
||||
The maximum delay between two consecutive operations in a thread;
|
||||
default is 0 indicating no delay.
|
||||
|
||||
* <<<-numOfThreads>>> <numOfThreads>
|
||||
|
||||
The number of threads to spawn; default is 200.
|
||||
|
||||
* <<<-elapsedTime>>> <elapsedTimeInSecs>
|
||||
|
||||
The number of seconds that the program will run; A value of zero
|
||||
indicates that the program runs forever. The default value is 0.
|
||||
|
||||
* <<<-startTime>>> <startTimeInMillis>
|
||||
|
||||
The time that all worker threads start to run. By default it is 10
|
||||
seconds after the main program starts running.This creates a
|
||||
barrier if more than one load generator is running.
|
||||
|
||||
* <<<-seed>>> <seed>
|
||||
|
||||
The random generator seed for repeating requests to NameNode when
|
||||
running with a single thread; default is the current time.
|
||||
|
||||
After command line argument parsing, the load generator traverses the
|
||||
test space and builds a table of all directories and another table of
|
||||
all files in the test space. It then waits until the start time to
|
||||
spawn the number of worker threads as specified by the user. Each
|
||||
thread sends a stream of requests to NameNode. At each iteration, it
|
||||
first decides if it is going to read a file, create a file, or list a
|
||||
directory following the read and write probabilities specified by the
|
||||
user. The listing probability is equal to 1-read probability-write
|
||||
probability. When reading, it randomly picks a file in the test space
|
||||
and reads the entire file. When writing, it randomly picks a directory
|
||||
in the test space and creates a file there.
|
||||
|
||||
To avoid two threads with the same load generator or from two different
|
||||
load generators creating the same file, the file name consists of the
|
||||
current machine's host name and the thread id. The length of the file
|
||||
follows Gaussian distribution with an average size of 2 blocks and the
|
||||
standard deviation of 1. The new file is filled with byte 'a'. To avoid
|
||||
the test space growing indefinitely, the file is deleted immediately
|
||||
after the file creation completes. While listing, it randomly picks a
|
||||
directory in the test space and lists its content.
|
||||
|
||||
After an operation completes, the thread pauses for a random amount of
|
||||
time in the range of [0, maxDelayBetweenOps] if the specified maximum
|
||||
delay is not zero. All threads are stopped when the specified elapsed
|
||||
time is passed. Before exiting, the program prints the average
|
||||
execution for each kind of NameNode operations, and the number of
|
||||
requests served by the NameNode per second.
|
||||
|
||||
* Test Space Population
|
||||
|
||||
The user needs to populate a test space before running a load
|
||||
generator. The structure generator generates a random test space
|
||||
structure and the data generator creates the files and directories of
|
||||
the test space in Hadoop distributed file system.
|
||||
|
||||
** Structure Generator
|
||||
|
||||
This tool generates a random namespace structure with the following
|
||||
constraints:
|
||||
|
||||
[[1]] The number of subdirectories that a directory can have is a random
|
||||
number in [minWidth, maxWidth].
|
||||
|
||||
[[2]] The maximum depth of each subdirectory is a random number
|
||||
[2*maxDepth/3, maxDepth].
|
||||
|
||||
[[3]] Files are randomly placed in leaf directories. The size of each
|
||||
file follows Gaussian distribution with an average size of 1 block
|
||||
and a standard deviation of 1.
|
||||
|
||||
The generated namespace structure is described by two files in the
|
||||
output directory. Each line of the first file contains the full name of
|
||||
a leaf directory. Each line of the second file contains the full name
|
||||
of a file and its size, separated by a blank.
|
||||
|
||||
The synopsis of the command is:
|
||||
|
||||
----
|
||||
java StructureGenerator [options]
|
||||
----
|
||||
|
||||
Options include:
|
||||
|
||||
* <<<-maxDepth>>> <maxDepth>
|
||||
|
||||
Maximum depth of the directory tree; default is 5.
|
||||
|
||||
* <<<-minWidth>>> <minWidth>
|
||||
|
||||
Minimum number of subdirectories per directories; default is 1.
|
||||
|
||||
* <<<-maxWidth>>> <maxWidth>
|
||||
|
||||
Maximum number of subdirectories per directories; default is 5.
|
||||
|
||||
* <<<-numOfFiles>>> <#OfFiles>
|
||||
|
||||
The total number of files in the test space; default is 10.
|
||||
|
||||
* <<<-avgFileSize>>> <avgFileSizeInBlocks>
|
||||
|
||||
Average size of blocks; default is 1.
|
||||
|
||||
* <<<-outDir>>> <outDir>
|
||||
|
||||
Output directory; default is the current directory.
|
||||
|
||||
* <<<-seed>>> <seed>
|
||||
|
||||
Random number generator seed; default is the current time.
|
||||
|
||||
** Data Generator
|
||||
|
||||
This tool reads the directory structure and file structure from the
|
||||
input directory and creates the namespace in Hadoop distributed file
|
||||
system. All files are filled with byte 'a'.
|
||||
|
||||
The synopsis of the command is:
|
||||
|
||||
----
|
||||
java DataGenerator [options]
|
||||
----
|
||||
|
||||
Options include:
|
||||
|
||||
* <<<-inDir>>> <inDir>
|
||||
|
||||
Input directory name where directory/file structures are stored;
|
||||
default is the current directory.
|
||||
|
||||
* <<<-root>>> <test space root>
|
||||
|
||||
The name of the root directory which the new namespace is going to
|
||||
be placed under; default is "/testLoadSpace".
|
|
@ -46,8 +46,10 @@ import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
|||
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
||||
import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
|
||||
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
|
||||
import org.apache.hadoop.hdfs.server.balancer.Balancer.Cli;
|
||||
import org.apache.hadoop.hdfs.server.datanode.SimulatedFSDataset;
|
||||
import org.apache.hadoop.util.Time;
|
||||
import org.apache.hadoop.util.Tool;
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
|
@ -95,7 +97,6 @@ public class TestBalancer {
|
|||
DFSTestUtil.waitReplication(fs, filePath, replicationFactor);
|
||||
}
|
||||
|
||||
|
||||
/* fill up a cluster with <code>numNodes</code> datanodes
|
||||
* whose used space to be <code>size</code>
|
||||
*/
|
||||
|
@ -301,10 +302,12 @@ public class TestBalancer {
|
|||
* @param racks - array of racks for original nodes in cluster
|
||||
* @param newCapacity - new node's capacity
|
||||
* @param newRack - new node's rack
|
||||
* @param useTool - if true run test via Cli with command-line argument
|
||||
* parsing, etc. Otherwise invoke balancer API directly.
|
||||
* @throws Exception
|
||||
*/
|
||||
private void doTest(Configuration conf, long[] capacities, String[] racks,
|
||||
long newCapacity, String newRack) throws Exception {
|
||||
long newCapacity, String newRack, boolean useTool) throws Exception {
|
||||
assertEquals(capacities.length, racks.length);
|
||||
int numOfDatanodes = capacities.length;
|
||||
cluster = new MiniDFSCluster.Builder(conf)
|
||||
|
@ -330,7 +333,11 @@ public class TestBalancer {
|
|||
totalCapacity += newCapacity;
|
||||
|
||||
// run balancer and validate results
|
||||
runBalancer(conf, totalUsedSpace, totalCapacity);
|
||||
if (useTool) {
|
||||
runBalancerCli(conf, totalUsedSpace, totalCapacity);
|
||||
} else {
|
||||
runBalancer(conf, totalUsedSpace, totalCapacity);
|
||||
}
|
||||
} finally {
|
||||
cluster.shutdown();
|
||||
}
|
||||
|
@ -350,22 +357,38 @@ public class TestBalancer {
|
|||
waitForBalancer(totalUsedSpace, totalCapacity, client, cluster);
|
||||
}
|
||||
|
||||
private void runBalancerCli(Configuration conf,
|
||||
long totalUsedSpace, long totalCapacity) throws Exception {
|
||||
waitForHeartBeat(totalUsedSpace, totalCapacity, client, cluster);
|
||||
|
||||
final String[] args = { "-policy", "datanode" };
|
||||
final Tool tool = new Cli();
|
||||
tool.setConf(conf);
|
||||
final int r = tool.run(args); // start rebalancing
|
||||
|
||||
assertEquals("Tools should exit 0 on success", 0, r);
|
||||
waitForHeartBeat(totalUsedSpace, totalCapacity, client, cluster);
|
||||
LOG.info("Rebalancing with default ctor.");
|
||||
waitForBalancer(totalUsedSpace, totalCapacity, client, cluster);
|
||||
}
|
||||
|
||||
/** one-node cluster test*/
|
||||
private void oneNodeTest(Configuration conf) throws Exception {
|
||||
private void oneNodeTest(Configuration conf, boolean useTool) throws Exception {
|
||||
// add an empty node with half of the CAPACITY & the same rack
|
||||
doTest(conf, new long[]{CAPACITY}, new String[]{RACK0}, CAPACITY/2, RACK0);
|
||||
doTest(conf, new long[]{CAPACITY}, new String[]{RACK0}, CAPACITY/2,
|
||||
RACK0, useTool);
|
||||
}
|
||||
|
||||
/** two-node cluster test */
|
||||
private void twoNodeTest(Configuration conf) throws Exception {
|
||||
doTest(conf, new long[]{CAPACITY, CAPACITY}, new String[]{RACK0, RACK1},
|
||||
CAPACITY, RACK2);
|
||||
CAPACITY, RACK2, false);
|
||||
}
|
||||
|
||||
/** test using a user-supplied conf */
|
||||
public void integrationTest(Configuration conf) throws Exception {
|
||||
initConf(conf);
|
||||
oneNodeTest(conf);
|
||||
oneNodeTest(conf, false);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -401,7 +424,7 @@ public class TestBalancer {
|
|||
|
||||
void testBalancer0Internal(Configuration conf) throws Exception {
|
||||
initConf(conf);
|
||||
oneNodeTest(conf);
|
||||
oneNodeTest(conf, false);
|
||||
twoNodeTest(conf);
|
||||
}
|
||||
|
||||
|
@ -495,7 +518,18 @@ public class TestBalancer {
|
|||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Verify balancer exits 0 on success.
|
||||
*/
|
||||
@Test(timeout=100000)
|
||||
public void testExitZeroOnSuccess() throws Exception {
|
||||
final Configuration conf = new HdfsConfiguration();
|
||||
|
||||
initConf(conf);
|
||||
|
||||
oneNodeTest(conf, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param args
|
||||
*/
|
||||
|
|
|
@ -27,11 +27,13 @@ import java.security.PrivilegedExceptionAction;
|
|||
import org.apache.commons.logging.impl.Log4JLogger;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FSMainOperationsBaseTest;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.FileSystemTestHelper;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.fs.permission.FsPermission;
|
||||
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
||||
import org.apache.hadoop.hdfs.DFSTestUtil;
|
||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||
import org.apache.hadoop.hdfs.server.datanode.web.resources.DatanodeWebHdfsMethods;
|
||||
import org.apache.hadoop.hdfs.web.resources.ExceptionHandler;
|
||||
|
@ -60,6 +62,7 @@ public class TestFSMainOperationsWebHdfs extends FSMainOperationsBaseTest {
|
|||
|
||||
final Configuration conf = new Configuration();
|
||||
conf.setBoolean(DFSConfigKeys.DFS_WEBHDFS_ENABLED_KEY, true);
|
||||
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 1024);
|
||||
try {
|
||||
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build();
|
||||
cluster.waitActive();
|
||||
|
@ -101,6 +104,30 @@ public class TestFSMainOperationsWebHdfs extends FSMainOperationsBaseTest {
|
|||
return defaultWorkingDirectory;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testConcat() throws Exception {
|
||||
Path[] paths = {new Path("/test/hadoop/file1"),
|
||||
new Path("/test/hadoop/file2"),
|
||||
new Path("/test/hadoop/file3")};
|
||||
|
||||
DFSTestUtil.createFile(fSys, paths[0], 1024, (short) 3, 0);
|
||||
DFSTestUtil.createFile(fSys, paths[1], 1024, (short) 3, 0);
|
||||
DFSTestUtil.createFile(fSys, paths[2], 1024, (short) 3, 0);
|
||||
|
||||
Path catPath = new Path("/test/hadoop/catFile");
|
||||
DFSTestUtil.createFile(fSys, catPath, 1024, (short) 3, 0);
|
||||
Assert.assertTrue(exists(fSys, catPath));
|
||||
|
||||
fSys.concat(catPath, paths);
|
||||
|
||||
Assert.assertFalse(exists(fSys, paths[0]));
|
||||
Assert.assertFalse(exists(fSys, paths[1]));
|
||||
Assert.assertFalse(exists(fSys, paths[2]));
|
||||
|
||||
FileStatus fileStatus = fSys.getFileStatus(catPath);
|
||||
Assert.assertEquals(1024*4, fileStatus.getLen());
|
||||
}
|
||||
|
||||
@Override
|
||||
@Test
|
||||
public void testMkdirsFailsForSubdirectoryOfExistingFile() throws Exception {
|
||||
|
|
|
@ -217,6 +217,9 @@ Release 2.0.3-alpha - Unreleased
|
|||
|
||||
OPTIMIZATIONS
|
||||
|
||||
MAPREDUCE-4893. Fixed MR ApplicationMaster to do optimal assignment of
|
||||
containers to get maximum locality. (Bikas Saha via vinodkv)
|
||||
|
||||
BUG FIXES
|
||||
|
||||
MAPREDUCE-4607. Race condition in ReduceTask completion can result in Task
|
||||
|
@ -278,6 +281,9 @@ Release 2.0.3-alpha - Unreleased
|
|||
MAPREDUCE-2264. Job status exceeds 100% in some cases.
|
||||
(devaraj.k and sandyr via tucu)
|
||||
|
||||
MAPREDUCE-4969. TestKeyValueTextInputFormat test fails with Open JDK 7.
|
||||
(Arpit Agarwal via suresh)
|
||||
|
||||
Release 2.0.2-alpha - 2012-09-07
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
|
|
@ -747,7 +747,7 @@ public class RMContainerAllocator extends RMContainerRequestor
|
|||
addContainerReq(req);
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
// this method will change the list of allocatedContainers.
|
||||
private void assign(List<Container> allocatedContainers) {
|
||||
Iterator<Container> it = allocatedContainers.iterator();
|
||||
LOG.info("Got allocated containers " + allocatedContainers.size());
|
||||
|
@ -788,84 +788,97 @@ public class RMContainerAllocator extends RMContainerRequestor
|
|||
+ reduces.isEmpty());
|
||||
isAssignable = false;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
LOG.warn("Container allocated at unwanted priority: " + priority +
|
||||
". Returning to RM...");
|
||||
isAssignable = false;
|
||||
}
|
||||
|
||||
boolean blackListed = false;
|
||||
ContainerRequest assigned = null;
|
||||
if(!isAssignable) {
|
||||
// release container if we could not assign it
|
||||
containerNotAssigned(allocated);
|
||||
it.remove();
|
||||
continue;
|
||||
}
|
||||
|
||||
if (isAssignable) {
|
||||
// do not assign if allocated container is on a
|
||||
// blacklisted host
|
||||
String allocatedHost = allocated.getNodeId().getHost();
|
||||
blackListed = isNodeBlacklisted(allocatedHost);
|
||||
if (blackListed) {
|
||||
// we need to request for a new container
|
||||
// and release the current one
|
||||
LOG.info("Got allocated container on a blacklisted "
|
||||
+ " host "+allocatedHost
|
||||
+". Releasing container " + allocated);
|
||||
// do not assign if allocated container is on a
|
||||
// blacklisted host
|
||||
String allocatedHost = allocated.getNodeId().getHost();
|
||||
if (isNodeBlacklisted(allocatedHost)) {
|
||||
// we need to request for a new container
|
||||
// and release the current one
|
||||
LOG.info("Got allocated container on a blacklisted "
|
||||
+ " host "+allocatedHost
|
||||
+". Releasing container " + allocated);
|
||||
|
||||
// find the request matching this allocated container
|
||||
// and replace it with a new one
|
||||
ContainerRequest toBeReplacedReq =
|
||||
getContainerReqToReplace(allocated);
|
||||
if (toBeReplacedReq != null) {
|
||||
LOG.info("Placing a new container request for task attempt "
|
||||
+ toBeReplacedReq.attemptID);
|
||||
ContainerRequest newReq =
|
||||
getFilteredContainerRequest(toBeReplacedReq);
|
||||
decContainerReq(toBeReplacedReq);
|
||||
if (toBeReplacedReq.attemptID.getTaskId().getTaskType() ==
|
||||
TaskType.MAP) {
|
||||
maps.put(newReq.attemptID, newReq);
|
||||
}
|
||||
else {
|
||||
reduces.put(newReq.attemptID, newReq);
|
||||
}
|
||||
addContainerReq(newReq);
|
||||
// find the request matching this allocated container
|
||||
// and replace it with a new one
|
||||
ContainerRequest toBeReplacedReq =
|
||||
getContainerReqToReplace(allocated);
|
||||
if (toBeReplacedReq != null) {
|
||||
LOG.info("Placing a new container request for task attempt "
|
||||
+ toBeReplacedReq.attemptID);
|
||||
ContainerRequest newReq =
|
||||
getFilteredContainerRequest(toBeReplacedReq);
|
||||
decContainerReq(toBeReplacedReq);
|
||||
if (toBeReplacedReq.attemptID.getTaskId().getTaskType() ==
|
||||
TaskType.MAP) {
|
||||
maps.put(newReq.attemptID, newReq);
|
||||
}
|
||||
else {
|
||||
LOG.info("Could not map allocated container to a valid request."
|
||||
+ " Releasing allocated container " + allocated);
|
||||
reduces.put(newReq.attemptID, newReq);
|
||||
}
|
||||
addContainerReq(newReq);
|
||||
}
|
||||
else {
|
||||
assigned = assign(allocated);
|
||||
if (assigned != null) {
|
||||
// Update resource requests
|
||||
decContainerReq(assigned);
|
||||
|
||||
// send the container-assigned event to task attempt
|
||||
eventHandler.handle(new TaskAttemptContainerAssignedEvent(
|
||||
assigned.attemptID, allocated, applicationACLs));
|
||||
|
||||
assignedRequests.add(allocated, assigned.attemptID);
|
||||
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.info("Assigned container (" + allocated + ") "
|
||||
+ " to task " + assigned.attemptID + " on node "
|
||||
+ allocated.getNodeId().toString());
|
||||
}
|
||||
}
|
||||
else {
|
||||
//not assigned to any request, release the container
|
||||
LOG.info("Releasing unassigned and invalid container "
|
||||
+ allocated + ". RM has gone crazy, someone go look!"
|
||||
+ " Hey RM, if you are so rich, go donate to non-profits!");
|
||||
}
|
||||
LOG.info("Could not map allocated container to a valid request."
|
||||
+ " Releasing allocated container " + allocated);
|
||||
}
|
||||
|
||||
// release container if we could not assign it
|
||||
containerNotAssigned(allocated);
|
||||
it.remove();
|
||||
continue;
|
||||
}
|
||||
|
||||
// release container if it was blacklisted
|
||||
// or if we could not assign it
|
||||
if (blackListed || assigned == null) {
|
||||
containersReleased++;
|
||||
release(allocated.getId());
|
||||
}
|
||||
}
|
||||
|
||||
assignContainers(allocatedContainers);
|
||||
|
||||
// release container if we could not assign it
|
||||
it = allocatedContainers.iterator();
|
||||
while (it.hasNext()) {
|
||||
Container allocated = it.next();
|
||||
LOG.info("Releasing unassigned and invalid container "
|
||||
+ allocated + ". RM may have assignment issues");
|
||||
containerNotAssigned(allocated);
|
||||
}
|
||||
}
|
||||
|
||||
private ContainerRequest assign(Container allocated) {
|
||||
@SuppressWarnings("unchecked")
|
||||
private void containerAssigned(Container allocated,
|
||||
ContainerRequest assigned) {
|
||||
// Update resource requests
|
||||
decContainerReq(assigned);
|
||||
|
||||
// send the container-assigned event to task attempt
|
||||
eventHandler.handle(new TaskAttemptContainerAssignedEvent(
|
||||
assigned.attemptID, allocated, applicationACLs));
|
||||
|
||||
assignedRequests.add(allocated, assigned.attemptID);
|
||||
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.info("Assigned container (" + allocated + ") "
|
||||
+ " to task " + assigned.attemptID + " on node "
|
||||
+ allocated.getNodeId().toString());
|
||||
}
|
||||
}
|
||||
|
||||
private void containerNotAssigned(Container allocated) {
|
||||
containersReleased++;
|
||||
release(allocated.getId());
|
||||
}
|
||||
|
||||
private ContainerRequest assignWithoutLocality(Container allocated) {
|
||||
ContainerRequest assigned = null;
|
||||
|
||||
Priority priority = allocated.getPriority();
|
||||
|
@ -877,18 +890,24 @@ public class RMContainerAllocator extends RMContainerRequestor
|
|||
LOG.debug("Assigning container " + allocated + " to reduce");
|
||||
}
|
||||
assigned = assignToReduce(allocated);
|
||||
} else if (PRIORITY_MAP.equals(priority)) {
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("Assigning container " + allocated + " to map");
|
||||
}
|
||||
assigned = assignToMap(allocated);
|
||||
} else {
|
||||
LOG.warn("Container allocated at unwanted priority: " + priority +
|
||||
". Returning to RM...");
|
||||
}
|
||||
|
||||
return assigned;
|
||||
}
|
||||
|
||||
private void assignContainers(List<Container> allocatedContainers) {
|
||||
Iterator<Container> it = allocatedContainers.iterator();
|
||||
while (it.hasNext()) {
|
||||
Container allocated = it.next();
|
||||
ContainerRequest assigned = assignWithoutLocality(allocated);
|
||||
if (assigned != null) {
|
||||
containerAssigned(allocated, assigned);
|
||||
it.remove();
|
||||
}
|
||||
}
|
||||
|
||||
assignMapsWithLocality(allocatedContainers);
|
||||
}
|
||||
|
||||
private ContainerRequest getContainerReqToReplace(Container allocated) {
|
||||
LOG.info("Finding containerReq for allocated container: " + allocated);
|
||||
|
@ -959,11 +978,15 @@ public class RMContainerAllocator extends RMContainerRequestor
|
|||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private ContainerRequest assignToMap(Container allocated) {
|
||||
//try to assign to maps if present
|
||||
//first by host, then by rack, followed by *
|
||||
ContainerRequest assigned = null;
|
||||
while (assigned == null && maps.size() > 0) {
|
||||
private void assignMapsWithLocality(List<Container> allocatedContainers) {
|
||||
// try to assign to all nodes first to match node local
|
||||
Iterator<Container> it = allocatedContainers.iterator();
|
||||
while(it.hasNext() && maps.size() > 0){
|
||||
Container allocated = it.next();
|
||||
Priority priority = allocated.getPriority();
|
||||
assert PRIORITY_MAP.equals(priority);
|
||||
// "if (maps.containsKey(tId))" below should be almost always true.
|
||||
// hence this while loop would almost always have O(1) complexity
|
||||
String host = allocated.getNodeId().getHost();
|
||||
LinkedList<TaskAttemptId> list = mapsHostMapping.get(host);
|
||||
while (list != null && list.size() > 0) {
|
||||
|
@ -972,7 +995,9 @@ public class RMContainerAllocator extends RMContainerRequestor
|
|||
}
|
||||
TaskAttemptId tId = list.removeFirst();
|
||||
if (maps.containsKey(tId)) {
|
||||
assigned = maps.remove(tId);
|
||||
ContainerRequest assigned = maps.remove(tId);
|
||||
containerAssigned(allocated, assigned);
|
||||
it.remove();
|
||||
JobCounterUpdateEvent jce =
|
||||
new JobCounterUpdateEvent(assigned.attemptID.getTaskId().getJobId());
|
||||
jce.addCounterUpdate(JobCounter.DATA_LOCAL_MAPS, 1);
|
||||
|
@ -984,39 +1009,56 @@ public class RMContainerAllocator extends RMContainerRequestor
|
|||
break;
|
||||
}
|
||||
}
|
||||
if (assigned == null) {
|
||||
String rack = RackResolver.resolve(host).getNetworkLocation();
|
||||
list = mapsRackMapping.get(rack);
|
||||
while (list != null && list.size() > 0) {
|
||||
TaskAttemptId tId = list.removeFirst();
|
||||
if (maps.containsKey(tId)) {
|
||||
assigned = maps.remove(tId);
|
||||
JobCounterUpdateEvent jce =
|
||||
new JobCounterUpdateEvent(assigned.attemptID.getTaskId().getJobId());
|
||||
jce.addCounterUpdate(JobCounter.RACK_LOCAL_MAPS, 1);
|
||||
eventHandler.handle(jce);
|
||||
rackLocalAssigned++;
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("Assigned based on rack match " + rack);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (assigned == null && maps.size() > 0) {
|
||||
TaskAttemptId tId = maps.keySet().iterator().next();
|
||||
assigned = maps.remove(tId);
|
||||
}
|
||||
|
||||
// try to match all rack local
|
||||
it = allocatedContainers.iterator();
|
||||
while(it.hasNext() && maps.size() > 0){
|
||||
Container allocated = it.next();
|
||||
Priority priority = allocated.getPriority();
|
||||
assert PRIORITY_MAP.equals(priority);
|
||||
// "if (maps.containsKey(tId))" below should be almost always true.
|
||||
// hence this while loop would almost always have O(1) complexity
|
||||
String host = allocated.getNodeId().getHost();
|
||||
String rack = RackResolver.resolve(host).getNetworkLocation();
|
||||
LinkedList<TaskAttemptId> list = mapsRackMapping.get(rack);
|
||||
while (list != null && list.size() > 0) {
|
||||
TaskAttemptId tId = list.removeFirst();
|
||||
if (maps.containsKey(tId)) {
|
||||
ContainerRequest assigned = maps.remove(tId);
|
||||
containerAssigned(allocated, assigned);
|
||||
it.remove();
|
||||
JobCounterUpdateEvent jce =
|
||||
new JobCounterUpdateEvent(assigned.attemptID.getTaskId().getJobId());
|
||||
jce.addCounterUpdate(JobCounter.OTHER_LOCAL_MAPS, 1);
|
||||
jce.addCounterUpdate(JobCounter.RACK_LOCAL_MAPS, 1);
|
||||
eventHandler.handle(jce);
|
||||
rackLocalAssigned++;
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("Assigned based on * match");
|
||||
LOG.debug("Assigned based on rack match " + rack);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return assigned;
|
||||
|
||||
// assign remaining
|
||||
it = allocatedContainers.iterator();
|
||||
while(it.hasNext() && maps.size() > 0){
|
||||
Container allocated = it.next();
|
||||
Priority priority = allocated.getPriority();
|
||||
assert PRIORITY_MAP.equals(priority);
|
||||
TaskAttemptId tId = maps.keySet().iterator().next();
|
||||
ContainerRequest assigned = maps.remove(tId);
|
||||
containerAssigned(allocated, assigned);
|
||||
it.remove();
|
||||
JobCounterUpdateEvent jce =
|
||||
new JobCounterUpdateEvent(assigned.attemptID.getTaskId().getJobId());
|
||||
jce.addCounterUpdate(JobCounter.OTHER_LOCAL_MAPS, 1);
|
||||
eventHandler.handle(jce);
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("Assigned based on * match");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -190,6 +190,92 @@ public class TestRMContainerAllocator {
|
|||
checkAssignments(new ContainerRequestEvent[] { event1, event2, event3 },
|
||||
assigned, false);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMapNodeLocality() throws Exception {
|
||||
// test checks that ordering of allocated containers list from the RM does
|
||||
// not affect the map->container assignment done by the AM. If there is a
|
||||
// node local container available for a map then it should be assigned to
|
||||
// that container and not a rack-local container that happened to be seen
|
||||
// earlier in the allocated containers list from the RM.
|
||||
// Regression test for MAPREDUCE-4893
|
||||
LOG.info("Running testMapNodeLocality");
|
||||
|
||||
Configuration conf = new Configuration();
|
||||
MyResourceManager rm = new MyResourceManager(conf);
|
||||
rm.start();
|
||||
DrainDispatcher dispatcher = (DrainDispatcher) rm.getRMContext()
|
||||
.getDispatcher();
|
||||
|
||||
// Submit the application
|
||||
RMApp app = rm.submitApp(1024);
|
||||
dispatcher.await();
|
||||
|
||||
MockNM amNodeManager = rm.registerNode("amNM:1234", 2048);
|
||||
amNodeManager.nodeHeartbeat(true);
|
||||
dispatcher.await();
|
||||
|
||||
ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt()
|
||||
.getAppAttemptId();
|
||||
rm.sendAMLaunched(appAttemptId);
|
||||
dispatcher.await();
|
||||
|
||||
JobId jobId = MRBuilderUtils.newJobId(appAttemptId.getApplicationId(), 0);
|
||||
Job mockJob = mock(Job.class);
|
||||
when(mockJob.getReport()).thenReturn(
|
||||
MRBuilderUtils.newJobReport(jobId, "job", "user", JobState.RUNNING, 0,
|
||||
0, 0, 0, 0, 0, 0, "jobfile", null, false, ""));
|
||||
MyContainerAllocator allocator = new MyContainerAllocator(rm, conf,
|
||||
appAttemptId, mockJob);
|
||||
|
||||
// add resources to scheduler
|
||||
MockNM nodeManager1 = rm.registerNode("h1:1234", 3072); // can assign 2 maps
|
||||
rm.registerNode("h2:1234", 10240); // wont heartbeat on node local node
|
||||
MockNM nodeManager3 = rm.registerNode("h3:1234", 1536); // assign 1 map
|
||||
dispatcher.await();
|
||||
|
||||
// create the container requests for maps
|
||||
ContainerRequestEvent event1 = createReq(jobId, 1, 1024,
|
||||
new String[] { "h1" });
|
||||
allocator.sendRequest(event1);
|
||||
ContainerRequestEvent event2 = createReq(jobId, 2, 1024,
|
||||
new String[] { "h1" });
|
||||
allocator.sendRequest(event2);
|
||||
ContainerRequestEvent event3 = createReq(jobId, 3, 1024,
|
||||
new String[] { "h2" });
|
||||
allocator.sendRequest(event3);
|
||||
|
||||
// this tells the scheduler about the requests
|
||||
// as nodes are not added, no allocations
|
||||
List<TaskAttemptContainerAssignedEvent> assigned = allocator.schedule();
|
||||
dispatcher.await();
|
||||
Assert.assertEquals("No of assignments must be 0", 0, assigned.size());
|
||||
|
||||
// update resources in scheduler
|
||||
// Node heartbeat from rack-local first. This makes node h3 the first in the
|
||||
// list of allocated containers but it should not be assigned to task1.
|
||||
nodeManager3.nodeHeartbeat(true);
|
||||
// Node heartbeat from node-local next. This allocates 2 node local
|
||||
// containers for task1 and task2. These should be matched with those tasks.
|
||||
nodeManager1.nodeHeartbeat(true);
|
||||
dispatcher.await();
|
||||
|
||||
assigned = allocator.schedule();
|
||||
dispatcher.await();
|
||||
checkAssignments(new ContainerRequestEvent[] { event1, event2, event3 },
|
||||
assigned, false);
|
||||
// remove the rack-local assignment that should have happened for task3
|
||||
for(TaskAttemptContainerAssignedEvent event : assigned) {
|
||||
if(event.getTaskAttemptID().equals(event3.getAttemptID())) {
|
||||
assigned.remove(event);
|
||||
Assert.assertTrue(
|
||||
event.getContainer().getNodeId().getHost().equals("h3"));
|
||||
break;
|
||||
}
|
||||
}
|
||||
checkAssignments(new ContainerRequestEvent[] { event1, event2},
|
||||
assigned, true);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testResource() throws Exception {
|
||||
|
@ -1202,7 +1288,7 @@ public class TestRMContainerAllocator {
|
|||
if (checkHostMatch) {
|
||||
Assert.assertTrue("Not assigned to requested host", Arrays.asList(
|
||||
request.getHosts()).contains(
|
||||
assigned.getContainer().getNodeId().toString()));
|
||||
assigned.getContainer().getNodeId().getHost()));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -136,32 +136,47 @@ public class TestKeyValueTextInputFormat extends TestCase {
|
|||
}
|
||||
|
||||
public void testUTF8() throws Exception {
|
||||
LineReader in = makeStream("abcd\u20acbdcd\u20ac");
|
||||
Text line = new Text();
|
||||
in.readLine(line);
|
||||
assertEquals("readLine changed utf8 characters",
|
||||
"abcd\u20acbdcd\u20ac", line.toString());
|
||||
in = makeStream("abc\u200axyz");
|
||||
in.readLine(line);
|
||||
assertEquals("split on fake newline", "abc\u200axyz", line.toString());
|
||||
LineReader in = null;
|
||||
|
||||
try {
|
||||
in = makeStream("abcd\u20acbdcd\u20ac");
|
||||
Text line = new Text();
|
||||
in.readLine(line);
|
||||
assertEquals("readLine changed utf8 characters",
|
||||
"abcd\u20acbdcd\u20ac", line.toString());
|
||||
in = makeStream("abc\u200axyz");
|
||||
in.readLine(line);
|
||||
assertEquals("split on fake newline", "abc\u200axyz", line.toString());
|
||||
} finally {
|
||||
if (in != null) {
|
||||
in.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testNewLines() throws Exception {
|
||||
LineReader in = makeStream("a\nbb\n\nccc\rdddd\r\neeeee");
|
||||
Text out = new Text();
|
||||
in.readLine(out);
|
||||
assertEquals("line1 length", 1, out.getLength());
|
||||
in.readLine(out);
|
||||
assertEquals("line2 length", 2, out.getLength());
|
||||
in.readLine(out);
|
||||
assertEquals("line3 length", 0, out.getLength());
|
||||
in.readLine(out);
|
||||
assertEquals("line4 length", 3, out.getLength());
|
||||
in.readLine(out);
|
||||
assertEquals("line5 length", 4, out.getLength());
|
||||
in.readLine(out);
|
||||
assertEquals("line5 length", 5, out.getLength());
|
||||
assertEquals("end of file", 0, in.readLine(out));
|
||||
LineReader in = null;
|
||||
try {
|
||||
in = makeStream("a\nbb\n\nccc\rdddd\r\neeeee");
|
||||
Text out = new Text();
|
||||
in.readLine(out);
|
||||
assertEquals("line1 length", 1, out.getLength());
|
||||
in.readLine(out);
|
||||
assertEquals("line2 length", 2, out.getLength());
|
||||
in.readLine(out);
|
||||
assertEquals("line3 length", 0, out.getLength());
|
||||
in.readLine(out);
|
||||
assertEquals("line4 length", 3, out.getLength());
|
||||
in.readLine(out);
|
||||
assertEquals("line5 length", 4, out.getLength());
|
||||
in.readLine(out);
|
||||
assertEquals("line5 length", 5, out.getLength());
|
||||
assertEquals("end of file", 0, in.readLine(out));
|
||||
} finally {
|
||||
if (in != null) {
|
||||
in.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void writeFile(FileSystem fs, Path name,
|
||||
|
@ -183,14 +198,21 @@ public class TestKeyValueTextInputFormat extends TestCase {
|
|||
InputSplit split,
|
||||
JobConf job) throws IOException {
|
||||
List<Text> result = new ArrayList<Text>();
|
||||
RecordReader<Text, Text> reader = format.getRecordReader(split, job,
|
||||
voidReporter);
|
||||
Text key = reader.createKey();
|
||||
Text value = reader.createValue();
|
||||
while (reader.next(key, value)) {
|
||||
result.add(value);
|
||||
value = reader.createValue();
|
||||
}
|
||||
RecordReader<Text, Text> reader = null;
|
||||
|
||||
try {
|
||||
reader = format.getRecordReader(split, job, voidReporter);
|
||||
Text key = reader.createKey();
|
||||
Text value = reader.createValue();
|
||||
while (reader.next(key, value)) {
|
||||
result.add(value);
|
||||
value = (Text) reader.createValue();
|
||||
}
|
||||
} finally {
|
||||
if (reader != null) {
|
||||
reader.close();
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
|
@ -281,6 +281,9 @@ Release 0.23.7 - UNRELEASED
|
|||
|
||||
BUG FIXES
|
||||
|
||||
YARN-343. Capacity Scheduler maximum-capacity value -1 is invalid (Xuan
|
||||
Gong via tgraves)
|
||||
|
||||
Release 0.23.6 - UNRELEASED
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
|
|
@ -99,6 +99,9 @@ public class CapacitySchedulerConfiguration extends Configuration {
|
|||
@Private
|
||||
public static final float MAXIMUM_CAPACITY_VALUE = 100;
|
||||
|
||||
@Private
|
||||
public static final float DEFAULT_MAXIMUM_CAPACITY_VALUE = -1.0f;
|
||||
|
||||
@Private
|
||||
public static final int DEFAULT_USER_LIMIT = 100;
|
||||
|
||||
|
@ -206,6 +209,8 @@ public class CapacitySchedulerConfiguration extends Configuration {
|
|||
public float getMaximumCapacity(String queue) {
|
||||
float maxCapacity = getFloat(getQueuePrefix(queue) + MAXIMUM_CAPACITY,
|
||||
MAXIMUM_CAPACITY_VALUE);
|
||||
maxCapacity = (maxCapacity == DEFAULT_MAXIMUM_CAPACITY_VALUE) ?
|
||||
MAXIMUM_CAPACITY_VALUE : maxCapacity;
|
||||
return maxCapacity;
|
||||
}
|
||||
|
||||
|
|
|
@ -243,6 +243,18 @@ public class TestCapacityScheduler {
|
|||
LOG.info("Setup top-level queues a and b");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMaximumCapacitySetup() {
|
||||
float delta = 0.0000001f;
|
||||
CapacitySchedulerConfiguration conf = new CapacitySchedulerConfiguration();
|
||||
assertEquals(CapacitySchedulerConfiguration.MAXIMUM_CAPACITY_VALUE,conf.getMaximumCapacity(A),delta);
|
||||
conf.setMaximumCapacity(A, 50.0f);
|
||||
assertEquals(50.0f, conf.getMaximumCapacity(A),delta);
|
||||
conf.setMaximumCapacity(A, -1);
|
||||
assertEquals(CapacitySchedulerConfiguration.MAXIMUM_CAPACITY_VALUE,conf.getMaximumCapacity(A),delta);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testRefreshQueues() throws Exception {
|
||||
CapacityScheduler cs = new CapacityScheduler();
|
||||
|
|
Loading…
Reference in New Issue