diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index bcd337a2e77..7a575344257 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -11,6 +11,9 @@ Trunk (Unreleased) NEW FEATURES + HADOOP-8561. Introduce HADOOP_PROXY_USER for secure impersonation in child + hadoop client processes. (Yu Gao via llu) + HADOOP-8469. Make NetworkTopology class pluggable. (Junping Du via szetszwo) @@ -129,9 +132,6 @@ Trunk (Unreleased) HADOOP-8776. Provide an option in test-patch that can enable/disable compiling native code. (Chris Nauroth via suresh) - HADOOP-9004. Allow security unit tests to use external KDC. (Stephen Chu - via suresh) - HADOOP-6616. Improve documentation for rack awareness. (Adam Faris via jghoman) @@ -141,8 +141,16 @@ Trunk (Unreleased) HADOOP-9093. Move all the Exception in PathExceptions to o.a.h.fs package. (suresh) + HADOOP-9140 Cleanup rpc PB protos (sanjay Radia) + + HADOOP-9162. Add utility to check native library availability. + (Binglin Chang via suresh) + BUG FIXES + HADOOP-9041. FsUrlStreamHandlerFactory could cause an infinite loop in + FileSystem initialization. (Yanbo Liang and Radim Kolar via llu) + HADOOP-8418. Update UGI Principal classes name for running with IBM JDK on 64 bits Windows. (Yu Gao via eyang) @@ -295,6 +303,12 @@ Trunk (Unreleased) HADOOP-9121. InodeTree.java has redundant check for vName while throwing exception. (Arup Malakar via suresh) + HADOOP-9131. Turn off TestLocalFileSystem#testListStatusWithColons on + Windows. (Chris Nauroth via suresh) + + HADOOP-8957 AbstractFileSystem#IsValidName should be overridden for + embedded file systems like ViewFs (Chris Nauroth via Sanjay Radia) + OPTIMIZATIONS HADOOP-7761. Improve the performance of raw comparisons. (todd) @@ -395,6 +409,17 @@ Release 2.0.3-alpha - Unreleased HADOOP-9042. Add a test for umask in FileSystemContractBaseTest. (Colin McCabe via eli) + HADOOP-9127. Update documentation for ZooKeeper Failover Controller. + (Daisuke Kobayashi via atm) + + HADOOP-9004. Allow security unit tests to use external KDC. (Stephen Chu + via suresh) + + HADOOP-9147. Add missing fields to FIleStatus.toString. + (Jonathan Allen via suresh) + + HADOOP-8427. Convert Forrest docs to APT, incremental. (adi2 via tucu) + OPTIMIZATIONS HADOOP-8866. SampleQuantiles#query is O(N^2) instead of O(N). (Andrew Wang @@ -473,6 +498,24 @@ Release 2.0.3-alpha - Unreleased HADOOP-9070. Kerberos SASL server cannot find kerberos key. (daryn via atm) + HADOOP-6762. Exception while doing RPC I/O closes channel + (Sam Rash and todd via todd) + + HADOOP-9126. FormatZK and ZKFC startup can fail due to zkclient connection + establishment delay. (Rakesh R and todd via todd) + + HADOOP-9113. o.a.h.fs.TestDelegationTokenRenewer is failing intermittently. + (Karthik Kambatla via eli) + + HADOOP-9135. JniBasedUnixGroupsMappingWithFallback should log at debug + rather than info during fallback. (Colin Patrick McCabe via todd) + + HADOOP-9152. HDFS can report negative DFS Used on clusters with very small + amounts of data. (Brock Noland via atm) + + HADOOP-9153. Support createNonRecursive in ViewFileSystem. + (Sandy Ryza via tomwhite) + Release 2.0.2-alpha - 2012-09-07 INCOMPATIBLE CHANGES @@ -1184,6 +1227,8 @@ Release 0.23.6 - UNRELEASED HADOOP-9038. unit-tests for AllocatorPerContext.PathIterator (Ivan A. Veselovsky via bobby) + HADOOP-9105. FsShell -moveFromLocal erroneously fails (daryn via bobby) + Release 0.23.5 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-common-project/hadoop-common/dev-support/findbugsExcludeFile.xml b/hadoop-common-project/hadoop-common/dev-support/findbugsExcludeFile.xml index 0fd183d7639..d3a98d0f0eb 100644 --- a/hadoop-common-project/hadoop-common/dev-support/findbugsExcludeFile.xml +++ b/hadoop-common-project/hadoop-common/dev-support/findbugsExcludeFile.xml @@ -260,7 +260,7 @@ - + @@ -272,7 +272,7 @@ - + diff --git a/hadoop-common-project/hadoop-common/pom.xml b/hadoop-common-project/hadoop-common/pom.xml index f6bbe0f5276..82e5b372c0a 100644 --- a/hadoop-common-project/hadoop-common/pom.xml +++ b/hadoop-common-project/hadoop-common/pom.xml @@ -378,9 +378,9 @@ src/main/proto/HAServiceProtocol.proto src/main/proto/IpcConnectionContext.proto src/main/proto/ProtocolInfo.proto - src/main/proto/RpcPayloadHeader.proto + src/main/proto/RpcHeader.proto src/main/proto/ZKFCProtocol.proto - src/main/proto/hadoop_rpc.proto + src/main/proto/ProtobufRpcEngine.proto diff --git a/hadoop-common-project/hadoop-common/src/main/bin/hadoop b/hadoop-common-project/hadoop-common/src/main/bin/hadoop index f57d9ab862f..17b41f77bef 100755 --- a/hadoop-common-project/hadoop-common/src/main/bin/hadoop +++ b/hadoop-common-project/hadoop-common/src/main/bin/hadoop @@ -31,6 +31,7 @@ function print_usage(){ echo " fs run a generic filesystem user client" echo " version print the version" echo " jar run a jar file" + echo " checknative [-a|-h] check native hadoop and compression libraries availability" echo " distcp copy file or directories recursively" echo " archive -archiveName NAME -p * create a hadoop archive" echo " classpath prints the class path needed to get the" @@ -100,6 +101,8 @@ case $COMMAND in CLASS=org.apache.hadoop.util.VersionInfo elif [ "$COMMAND" = "jar" ] ; then CLASS=org.apache.hadoop.util.RunJar + elif [ "$COMMAND" = "checknative" ] ; then + CLASS=org.apache.hadoop.util.NativeLibraryChecker elif [ "$COMMAND" = "distcp" ] ; then CLASS=org.apache.hadoop.tools.DistCp CLASSPATH=${CLASSPATH}:${TOOL_PATH} diff --git a/hadoop-common-project/hadoop-common/src/main/docs/src/documentation/content/xdocs/HttpAuthentication.xml b/hadoop-common-project/hadoop-common/src/main/docs/src/documentation/content/xdocs/HttpAuthentication.xml deleted file mode 100644 index fc7b59ba70a..00000000000 --- a/hadoop-common-project/hadoop-common/src/main/docs/src/documentation/content/xdocs/HttpAuthentication.xml +++ /dev/null @@ -1,127 +0,0 @@ - - - - - - - - -
- - Authentication for Hadoop HTTP web-consoles - -
- - -
- Introduction -

- This document describes how to configure Hadoop HTTP web-consoles to require user - authentication. -

-

- By default Hadoop HTTP web-consoles (JobTracker, NameNode, TaskTrackers and DataNodes) allow - access without any form of authentication. -

-

- Similarly to Hadoop RPC, Hadoop HTTP web-consoles can be configured to require Kerberos - authentication using HTTP SPNEGO protocol (supported by browsers like Firefox and Internet - Explorer). -

-

- In addition, Hadoop HTTP web-consoles support the equivalent of Hadoop's Pseudo/Simple - authentication. If this option is enabled, user must specify their user name in the first - browser interaction using the user.name query string parameter. For example: - http://localhost:50030/jobtracker.jsp?user.name=babu. -

-

- If a custom authentication mechanism is required for the HTTP web-consoles, it is possible - to implement a plugin to support the alternate authentication mechanism (refer to - Hadoop hadoop-auth for details on writing an AuthenticatorHandler). -

-

- The next section describes how to configure Hadoop HTTP web-consoles to require user - authentication. -

-
- -
- Configuration - -

- The following properties should be in the core-site.xml of all the nodes - in the cluster. -

- -

hadoop.http.filter.initializers: add to this property the - org.apache.hadoop.security.AuthenticationFilterInitializer initializer class. -

- -

hadoop.http.authentication.type: Defines authentication used for the HTTP - web-consoles. The supported values are: simple | kerberos | - #AUTHENTICATION_HANDLER_CLASSNAME#. The dfeault value is simple. -

- -

hadoop.http.authentication.token.validity: Indicates how long (in seconds) - an authentication token is valid before it has to be renewed. The default value is - 36000. -

- -

hadoop.http.authentication.signature.secret.file: The signature secret - file for signing the authentication tokens. If not set a random secret is generated at - startup time. The same secret should be used for all nodes in the cluster, JobTracker, - NameNode, DataNode and TastTracker. The default value is - ${user.home}/hadoop-http-auth-signature-secret. - IMPORTANT: This file should be readable only by the Unix user running the daemons. -

- -

hadoop.http.authentication.cookie.domain: The domain to use for the HTTP - cookie that stores the authentication token. In order to authentiation to work - correctly across all nodes in the cluster the domain must be correctly set. - There is no default value, the HTTP cookie will not have a domain working only - with the hostname issuing the HTTP cookie. -

- -

- IMPORTANT: when using IP addresses, browsers ignore cookies with domain settings. - For this setting to work properly all nodes in the cluster must be configured - to generate URLs with hostname.domain names on it. -

- -

hadoop.http.authentication.simple.anonymous.allowed: Indicates if anonymous - requests are allowed when using 'simple' authentication. The default value is - true -

- -

hadoop.http.authentication.kerberos.principal: Indicates the Kerberos - principal to be used for HTTP endpoint when using 'kerberos' authentication. - The principal short name must be HTTP per Kerberos HTTP SPNEGO specification. - The default value is HTTP/_HOST@$LOCALHOST, where _HOST -if present- - is replaced with bind address of the HTTP server. -

- -

hadoop.http.authentication.kerberos.keytab: Location of the keytab file - with the credentials for the Kerberos principal used for the HTTP endpoint. - The default value is ${user.home}/hadoop.keytab.i -

- -
- - -
- diff --git a/hadoop-common-project/hadoop-common/src/main/docs/src/documentation/content/xdocs/cluster_setup.xml b/hadoop-common-project/hadoop-common/src/main/docs/src/documentation/content/xdocs/cluster_setup.xml deleted file mode 100644 index fedd1bee4d5..00000000000 --- a/hadoop-common-project/hadoop-common/src/main/docs/src/documentation/content/xdocs/cluster_setup.xml +++ /dev/null @@ -1,1485 +0,0 @@ - - - - - - - -
- Cluster Setup -
- - - -
- Purpose - -

This document describes how to install, configure and manage non-trivial - Hadoop clusters ranging from a few nodes to extremely large clusters with - thousands of nodes.

-

- To play with Hadoop, you may first want to install Hadoop on a single machine (see Hadoop Quick Start). -

-
- -
- Pre-requisites - -
    -
  1. - Make sure all requisite software - is installed on all nodes in your cluster. -
  2. -
  3. - Get the Hadoop software. -
  4. -
-
- -
- Installation - -

Installing a Hadoop cluster typically involves unpacking the software - on all the machines in the cluster.

- -

Typically one machine in the cluster is designated as the - NameNode and another machine the as JobTracker, - exclusively. These are the masters. The rest of the machines in - the cluster act as both DataNode and - TaskTracker. These are the slaves.

- -

The root of the distribution is referred to as - HADOOP_PREFIX. All machines in the cluster usually have the same - HADOOP_PREFIX path.

-
- -
- Configuration - -

The following sections describe how to configure a Hadoop cluster.

- -
- Configuration Files - -

Hadoop configuration is driven by two types of important - configuration files:

-
    -
  1. - Read-only default configuration - - src/core/core-default.xml, - src/hdfs/hdfs-default.xml, - src/mapred/mapred-default.xml and - conf/mapred-queues.xml.template. -
  2. -
  3. - Site-specific configuration - - conf/core-site.xml, - conf/hdfs-site.xml, - conf/mapred-site.xml and - conf/mapred-queues.xml. -
  4. -
- -

To learn more about how the Hadoop framework is controlled by these - configuration files, look - here.

- -

Additionally, you can control the Hadoop scripts found in the - bin/ directory of the distribution, by setting site-specific - values via the conf/hadoop-env.sh.

-
- -
- Site Configuration - -

To configure the Hadoop cluster you will need to configure the - environment in which the Hadoop daemons execute as well as - the configuration parameters for the Hadoop daemons.

- -

The Hadoop daemons are NameNode/DataNode - and JobTracker/TaskTracker.

- -
- Configuring the Environment of the Hadoop Daemons - -

Administrators should use the conf/hadoop-env.sh script - to do site-specific customization of the Hadoop daemons' process - environment.

- -

At the very least you should specify the - JAVA_HOME so that it is correctly defined on each - remote node.

- -

Administrators can configure individual daemons using the - configuration options HADOOP_*_OPTS. Various options - available are shown below in the table.

- - - - - - -
DaemonConfigure Options
NameNodeHADOOP_NAMENODE_OPTS
DataNodeHADOOP_DATANODE_OPTS
SecondaryNamenodeHADOOP_SECONDARYNAMENODE_OPTS
- -

For example, To configure Namenode to use parallelGC, the - following statement should be added in hadoop-env.sh : -
- export HADOOP_NAMENODE_OPTS="-XX:+UseParallelGC ${HADOOP_NAMENODE_OPTS}" -

- -

Other useful configuration parameters that you can customize - include:

-
    -
  • - HADOOP_LOG_DIR - The directory where the daemons' - log files are stored. They are automatically created if they don't - exist. -
  • -
  • - HADOOP_HEAPSIZE - The maximum amount of heapsize - to use, in MB e.g. 1000MB. This is used to - configure the heap size for the hadoop daemon. By default, - the value is 1000MB. -
  • -
-
- -
- Configuring the Hadoop Daemons - -

This section deals with important parameters to be specified in the - following:

-

conf/core-site.xml:

- - - - - - - - - - - - -
ParameterValueNotes
fs.defaultFSURI of NameNode.hdfs://hostname/
- -

conf/hdfs-site.xml:

- - - - - - - - - - - - - - - - - -
ParameterValueNotes
dfs.namenode.name.dir - Path on the local filesystem where the NameNode - stores the namespace and transactions logs persistently. - If this is a comma-delimited list of directories then the name - table is replicated in all of the directories, for redundancy. -
dfs.datanode.data.dir - Comma separated list of paths on the local filesystem of a - DataNode where it should store its blocks. - - If this is a comma-delimited list of directories, then data will - be stored in all named directories, typically on different - devices. -
- -

conf/mapred-site.xml:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ParameterValueNotes
mapreduce.jobtracker.addressHost or IP and port of JobTracker.host:port pair.
mapreduce.jobtracker.system.dir - Path on the HDFS where where the Map/Reduce framework stores - system files e.g. /hadoop/mapred/system/. - - This is in the default filesystem (HDFS) and must be accessible - from both the server and client machines. -
mapreduce.cluster.local.dir - Comma-separated list of paths on the local filesystem where - temporary Map/Reduce data is written. - Multiple paths help spread disk i/o.
mapred.tasktracker.{map|reduce}.tasks.maximum - The maximum number of Map/Reduce tasks, which are run - simultaneously on a given TaskTracker, individually. - - Defaults to 2 (2 maps and 2 reduces), but vary it depending on - your hardware. -
dfs.hosts/dfs.hosts.excludeList of permitted/excluded DataNodes. - If necessary, use these files to control the list of allowable - datanodes. -
mapreduce.jobtracker.hosts.filename/mapreduce.jobtracker.hosts.exclude.filenameList of permitted/excluded TaskTrackers. - If necessary, use these files to control the list of allowable - TaskTrackers. -
mapreduce.cluster.acls.enabledBoolean, specifying whether checks for queue ACLs and job ACLs - are to be done for authorizing users for doing queue operations and - job operations. - - If true, queue ACLs are checked while submitting - and administering jobs and job ACLs are checked for authorizing - view and modification of jobs. Queue ACLs are specified using the - configuration parameters of the form defined below under - mapred-queues.xml. Job ACLs are described at - mapred-tutorial in "Job Authorization" section. - For enabling this flag(mapreduce.cluster.acls.enabled), this is to be - set to true in mapred-site.xml on JobTracker node and on all - TaskTracker nodes. -
- -

Typically all the above parameters are marked as - - final to ensure that they cannot be overriden by user-applications. -

- -

conf/mapred-queues.xml - :

-

This file is used to configure the queues in the Map/Reduce - system. Queues are abstract entities in the JobTracker that can be - used to manage collections of jobs. They provide a way for - administrators to organize jobs in specific ways and to enforce - certain policies on such collections, thus providing varying - levels of administrative control and management functions on jobs. -

-

One can imagine the following sample scenarios:

-
    -
  • Jobs submitted by a particular group of users can all be - submitted to one queue.
  • -
  • Long running jobs in an organization can be submitted to a - queue.
  • -
  • Short running jobs can be submitted to a queue and the number - of jobs that can run concurrently can be restricted.
  • -
-

The usage of queues is closely tied to the scheduler configured - at the JobTracker via mapreduce.jobtracker.taskscheduler. - The degree of support of queues depends on the scheduler used. Some - schedulers support a single queue, while others support more complex - configurations. Schedulers also implement the policies that apply - to jobs in a queue. Some schedulers, such as the Fairshare scheduler, - implement their own mechanisms for collections of jobs and do not rely - on queues provided by the framework. The administrators are - encouraged to refer to the documentation of the scheduler they are - interested in for determining the level of support for queues.

-

The Map/Reduce framework supports some basic operations on queues - such as job submission to a specific queue, access control for queues, - queue states, viewing configured queues and their properties - and refresh of queue properties. In order to fully implement some of - these operations, the framework takes the help of the configured - scheduler.

-

The following types of queue configurations are possible:

-
    -
  • Single queue: The default configuration in Map/Reduce comprises - of a single queue, as supported by the default scheduler. All jobs - are submitted to this default queue which maintains jobs in a priority - based FIFO order.
  • -
  • Multiple single level queues: Multiple queues are defined, and - jobs can be submitted to any of these queues. Different policies - can be applied to these queues by schedulers that support this - configuration to provide a better level of support. For example, - the capacity scheduler - provides ways of configuring different - capacity and fairness guarantees on these queues.
  • -
  • Hierarchical queues: Hierarchical queues are a configuration in - which queues can contain other queues within them recursively. The - queues that contain other queues are referred to as - container queues. Queues that do not contain other queues are - referred as leaf or job queues. Jobs can only be submitted to leaf - queues. Hierarchical queues can potentially offer a higher level - of control to administrators, as schedulers can now build a - hierarchy of policies where policies applicable to a container - queue can provide context for policies applicable to queues it - contains. It also opens up possibilities for delegating queue - administration where administration of queues in a container queue - can be turned over to a different set of administrators, within - the context provided by the container queue. For example, the - capacity scheduler - uses hierarchical queues to partition capacity of a cluster - among container queues, and allowing queues they contain to divide - that capacity in more ways.
  • -
- -

Most of the configuration of the queues can be refreshed/reloaded - without restarting the Map/Reduce sub-system by editing this - configuration file as described in the section on - reloading queue - configuration. - Not all configuration properties can be reloaded of course, - as will description of each property below explain.

- -

The format of conf/mapred-queues.xml is different from the other - configuration files, supporting nested configuration - elements to support hierarchical queues. The format is as follows: -

- - - <queues> - <queue> - <name>$queue-name</name> - <state>$state</state> - <queue> - <name>$child-queue1</name> - <properties> - <property key="$key" value="$value"/> - ... - </properties> - <queue> - <name>$grand-child-queue1</name> - ... - </queue> - </queue> - <queue> - <name>$child-queue2</name> - ... - </queue> - ... - ... - ... - <queue> - <name>$leaf-queue</name> - <acl-submit-job>$acls</acl-submit-job> - <acl-administer-jobs>$acls</acl-administer-jobs> - <properties> - <property key="$key" value="$value"/> - ... - </properties> - </queue> - </queue> - </queues> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Tag/AttributeValue - Refresh-able? - Notes
queuesRoot element of the configuration file.Not-applicableAll the queues are nested inside this root element of the - file. There can be only one root queues element in the file.
aclsEnabledBoolean attribute to the - <queues> tag - specifying whether ACLs are supported for controlling job - submission and administration for all the queues - configured. - YesIf false, ACLs are ignored for all the - configured queues.

- If true, the user and group details of the user - are checked against the configured ACLs of the corresponding - job-queue while submitting and administering jobs. ACLs can be - specified for each queue using the queue-specific tags - "acl-$acl_name", defined below. ACLs are checked only against - the job-queues, i.e. the leaf-level queues; ACLs configured - for the rest of the queues in the hierarchy are ignored. -
queueA child element of the - <queues> tag or another - <queue>. Denotes a queue - in the system. - Not applicableQueues can be hierarchical and so this element can contain - children of this same type.
nameChild element of a - <queue> specifying the - name of the queue.NoName of the queue cannot contain the character ":" - which is reserved as the queue-name delimiter when addressing a - queue in a hierarchy.
stateChild element of a - <queue> specifying the - state of the queue. - YesEach queue has a corresponding state. A queue in - 'running' state can accept new jobs, while a queue in - 'stopped' state will stop accepting any new jobs. State - is defined and respected by the framework only for the - leaf-level queues and is ignored for all other queues. -

- The state of the queue can be viewed from the command line using - 'bin/mapred queue' command and also on the the Web - UI.

- Administrators can stop and start queues at runtime using the - feature of reloading - queue configuration. If a queue is stopped at runtime, it - will complete all the existing running jobs and will stop - accepting any new jobs. -
acl-submit-jobChild element of a - <queue> specifying the - list of users and groups that can submit jobs to the specified - queue.Yes - Applicable only to leaf-queues.

- The list of users and groups are both comma separated - list of names. The two lists are separated by a blank. - Example: user1,user2 group1,group2. - If you wish to define only a list of groups, provide - a blank at the beginning of the value. -

-
acl-administer-jobsChild element of a - <queue> specifying the - list of users and groups that can view job details, change the - priority of a job or kill a job that has been submitted to the - specified queue. - Yes - Applicable only to leaf-queues.

- The list of users and groups are both comma separated - list of names. The two lists are separated by a blank. - Example: user1,user2 group1,group2. - If you wish to define only a list of groups, provide - a blank at the beginning of the value. Note that the - owner of a job can always change the priority or kill - his/her own job, irrespective of the ACLs. -
propertiesChild element of a - <queue> specifying the - scheduler specific properties.Not applicableThe scheduler specific properties are the children of this - element specified as a group of <property> tags described - below. The JobTracker completely ignores these properties. These - can be used as per-queue properties needed by the scheduler - being configured. Please look at the scheduler specific - documentation as to how these properties are used by that - particular scheduler. -
propertyChild element of - <properties> for a - specific queue.Not applicableA single scheduler specific queue-property. Ignored by - the JobTracker and used by the scheduler that is configured.
keyAttribute of a - <property> for a - specific queue.Scheduler-specificThe name of a single scheduler specific queue-property.
valueAttribute of a - <property> for a - specific queue.Scheduler-specificThe value of a single scheduler specific queue-property. - The value can be anything that is left for the proper - interpretation by the scheduler that is configured.
- -

Once the queues are configured properly and the Map/Reduce - system is up and running, from the command line one can - get the list - of queues and - obtain - information specific to each queue. This information is also - available from the web UI. On the web UI, queue information can be - seen by going to queueinfo.jsp, linked to from the queues table-cell - in the cluster-summary table. The queueinfo.jsp prints the hierarchy - of queues as well as the specific information for each queue. -

- -

Users can submit jobs only to a - leaf-level queue by specifying the fully-qualified queue-name for - the property name mapreduce.job.queuename in the job - configuration. The character ':' is the queue-name delimiter and so, - for e.g., if one wants to submit to a configured job-queue 'Queue-C' - which is one of the sub-queues of 'Queue-B' which in-turn is a - sub-queue of 'Queue-A', then the job configuration should contain - property mapreduce.job.queuename set to the - <value>Queue-A:Queue-B:Queue-C</value>

-
-
- Real-World Cluster Configurations - -

This section lists some non-default configuration parameters which - have been used to run the sort benchmark on very large - clusters.

- -
    -
  • -

    Some non-default configuration values used to run sort900, - that is 9TB of data sorted on a cluster with 900 nodes:

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    Configuration FileParameterValueNotes
    conf/hdfs-site.xmldfs.blocksize128m - HDFS blocksize of 128 MB for large file-systems. Sizes can be provided - in size-prefixed values (10k, 128m, 1g, etc.) or simply in bytes (134217728 for 128 MB, etc.). -
    conf/hdfs-site.xmldfs.namenode.handler.count40 - More NameNode server threads to handle RPCs from large - number of DataNodes. -
    conf/mapred-site.xmlmapreduce.reduce.shuffle.parallelcopies20 - Higher number of parallel copies run by reduces to fetch - outputs from very large number of maps. -
    conf/mapred-site.xmlmapreduce.map.java.opts-Xmx512M - Larger heap-size for child jvms of maps. -
    conf/mapred-site.xmlmapreduce.reduce.java.opts-Xmx512M - Larger heap-size for child jvms of reduces. -
    conf/mapred-site.xmlmapreduce.reduce.shuffle.input.buffer.percent0.80 - Larger amount of memory allocated for merging map output - in memory during the shuffle. Expressed as a fraction of - the total heap. -
    conf/mapred-site.xmlmapreduce.reduce.input.buffer.percent0.80 - Larger amount of memory allocated for retaining map output - in memory during the reduce. Expressed as a fraction of - the total heap. -
    conf/mapred-site.xmlmapreduce.task.io.sort.factor100More streams merged at once while sorting files.
    conf/mapred-site.xmlmapreduce.task.io.sort.mb200Higher memory-limit while sorting data.
    conf/core-site.xmlio.file.buffer.size131072Size of read/write buffer used in SequenceFiles.
    -
  • -
  • -

    Updates to some configuration values to run sort1400 and - sort2000, that is 14TB of data sorted on 1400 nodes and 20TB of - data sorted on 2000 nodes:

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    Configuration FileParameterValueNotes
    conf/mapred-site.xmlmapreduce.jobtracker.handler.count60 - More JobTracker server threads to handle RPCs from large - number of TaskTrackers. -
    conf/mapred-site.xmlmapreduce.reduce.shuffle.parallelcopies50
    conf/mapred-site.xmlmapreduce.tasktracker.http.threads50 - More worker threads for the TaskTracker's http server. The - http server is used by reduces to fetch intermediate - map-outputs. -
    conf/mapred-site.xmlmapreduce.map.java.opts-Xmx512M - Larger heap-size for child jvms of maps. -
    conf/mapred-site.xmlmapreduce.reduce.java.opts-Xmx1024MLarger heap-size for child jvms of reduces.
    -
  • -
-
-
- Configuring Memory Parameters for MapReduce Jobs -

- As MapReduce jobs could use varying amounts of memory, Hadoop - provides various configuration options to users and administrators - for managing memory effectively. Some of these options are job - specific and can be used by users. While setting up a cluster, - administrators can configure appropriate default values for these - options so that users jobs run out of the box. Other options are - cluster specific and can be used by administrators to enforce - limits and prevent misconfigured or memory intensive jobs from - causing undesired side effects on the cluster. -

-

- The values configured should - take into account the hardware resources of the cluster, such as the - amount of physical and virtual memory available for tasks, - the number of slots configured on the slaves and the requirements - for other processes running on the slaves. If right values are not - set, it is likely that jobs start failing with memory related - errors or in the worst case, even affect other tasks or - the slaves themselves. -

- -
- Monitoring Task Memory Usage -

- Before describing the memory options, it is - useful to look at a feature provided by Hadoop to monitor - memory usage of MapReduce tasks it runs. The basic objective - of this feature is to prevent MapReduce tasks from consuming - memory beyond a limit that would result in their affecting - other processes running on the slave, including other tasks - and daemons like the DataNode or TaskTracker. -

- -

- Note: For the time being, this feature is available - only for the Linux platform. -

- -

- Hadoop allows monitoring to be done both for virtual - and physical memory usage of tasks. This monitoring - can be done independently of each other, and therefore the - options can be configured independently of each other. It - has been found in some environments, particularly related - to streaming, that virtual memory recorded for tasks is high - because of libraries loaded by the programs used to run - the tasks. However, this memory is largely unused and does - not affect the slaves's memory itself. In such cases, - monitoring based on physical memory can provide a more - accurate picture of memory usage. -

- -

- This feature considers that there is a limit on - the amount of virtual or physical memory on the slaves - that can be used by - the running MapReduce tasks. The rest of the memory is - assumed to be required for the system and other processes. - Since some jobs may require higher amount of memory for their - tasks than others, Hadoop allows jobs to specify how much - memory they expect to use at a maximum. Then by using - resource aware scheduling and monitoring, Hadoop tries to - ensure that at any time, only enough tasks are running on - the slaves as can meet the dual constraints of an individual - job's memory requirements and the total amount of memory - available for all MapReduce tasks. -

- -

- The TaskTracker monitors tasks in regular intervals. Each time, - it operates in two steps: -

- -
    - -
  • - In the first step, it - checks that a job's task and any child processes it - launches are not cumulatively using more virtual or physical - memory than specified. If both virtual and physical memory - monitoring is enabled, then virtual memory usage is checked - first, followed by physical memory usage. - Any task that is found to - use more memory is killed along with any child processes it - might have launched, and the task status is marked - failed. Repeated failures such as this will terminate - the job. -
  • - -
  • - In the next step, it checks that the cumulative virtual and - physical memory - used by all running tasks and their child processes - does not exceed the total virtual and physical memory limit, - respectively. Again, virtual memory limit is checked first, - followed by physical memory limit. In this case, it kills - enough number of tasks, along with any child processes they - might have launched, until the cumulative memory usage - is brought under limit. In the case of virtual memory limit - being exceeded, the tasks chosen for killing are - the ones that have made the least progress. In the case of - physical memory limit being exceeded, the tasks chosen - for killing are the ones that have used the maximum amount - of physical memory. Also, the status - of these tasks is marked as killed, and hence repeated - occurrence of this will not result in a job failure. -
  • - -
- -

- In either case, the task's diagnostic message will indicate the - reason why the task was terminated. -

- -

- Resource aware scheduling can ensure that tasks are scheduled - on a slave only if their memory requirement can be satisfied - by the slave. The Capacity Scheduler, for example, - takes virtual memory requirements into account while - scheduling tasks, as described in the section on - - memory based scheduling. -

- -

- Memory monitoring is enabled when certain configuration - variables are defined with non-zero values, as described below. -

- -
- -
- Job Specific Options -

- Memory related options that can be configured individually per - job are described in detail in the section on - - Configuring Memory Requirements For A Job in the MapReduce - tutorial. While setting up - the cluster, the Hadoop defaults for these options can be reviewed - and changed to better suit the job profiles expected to be run on - the clusters, as also the hardware configuration. -

-

- As with any other configuration option in Hadoop, if the - administrators desire to prevent users from overriding these - options in jobs they submit, these values can be marked as - final in the cluster configuration. -

-
- - -
- Cluster Specific Options - -

- This section describes the memory related options that are - used by the JobTracker and TaskTrackers, and cannot be changed - by jobs. The values set for these options should be the same - for all the slave nodes in a cluster. -

- -
    - -
  • - mapreduce.cluster.{map|reduce}memory.mb: These - options define the default amount of virtual memory that should be - allocated for MapReduce tasks running in the cluster. They - typically match the default values set for the options - mapreduce.{map|reduce}.memory.mb. They help in the - calculation of the total amount of virtual memory available for - MapReduce tasks on a slave, using the following equation:
    - Total virtual memory for all MapReduce tasks = - (mapreduce.cluster.mapmemory.mb * - mapreduce.tasktracker.map.tasks.maximum) + - (mapreduce.cluster.reducememory.mb * - mapreduce.tasktracker.reduce.tasks.maximum)
    - Typically, reduce tasks require more memory than map tasks. - Hence a higher value is recommended for - mapreduce.cluster.reducememory.mb. The value is - specified in MB. To set a value of 2GB for reduce tasks, set - mapreduce.cluster.reducememory.mb to 2048. -
  • - -
  • - mapreduce.jobtracker.max{map|reduce}memory.mb: - These options define the maximum amount of virtual memory that - can be requested by jobs using the parameters - mapreduce.{map|reduce}.memory.mb. The system - will reject any job that is submitted requesting for more - memory than these limits. Typically, the values for these - options should be set to satisfy the following constraint:
    - mapreduce.jobtracker.maxmapmemory.mb = - mapreduce.cluster.mapmemory.mb * - mapreduce.tasktracker.map.tasks.maximum
    - mapreduce.jobtracker.maxreducememory.mb = - mapreduce.cluster.reducememory.mb * - mapreduce.tasktracker.reduce.tasks.maximum

    - The value is specified in MB. If - mapreduce.cluster.reducememory.mb is set to 2GB and - there are 2 reduce slots configured in the slaves, the value - for mapreduce.jobtracker.maxreducememory.mb should - be set to 4096. -
  • - -
  • - mapreduce.tasktracker.reserved.physicalmemory.mb: - This option defines the amount of physical memory that is - marked for system and daemon processes. Using this, the amount - of physical memory available for MapReduce tasks is calculated - using the following equation:
    - Total physical memory for all MapReduce tasks = - Total physical memory available on the system - - mapreduce.tasktracker.reserved.physicalmemory.mb
    - The value is specified in MB. To set this value to 2GB, - specify the value as 2048. -
  • - -
  • - mapreduce.tasktracker.taskmemorymanager.monitoringinterval: - This option defines the time the TaskTracker waits between - two cycles of memory monitoring. The value is specified in - milliseconds. -
  • - -
- -

- Note: The virtual memory monitoring function is only - enabled if - the variables mapreduce.cluster.{map|reduce}memory.mb - and mapreduce.jobtracker.max{map|reduce}memory.mb - are set to values greater than zero. Likewise, the physical - memory monitoring function is only enabled if the variable - mapreduce.tasktracker.reserved.physicalmemory.mb - is set to a value greater than zero. -

-
-
- - -
- Task Controllers -

Task controllers are classes in the Hadoop Map/Reduce - framework that define how user's map and reduce tasks - are launched and controlled. They can - be used in clusters that require some customization in - the process of launching or controlling the user tasks. - For example, in some - clusters, there may be a requirement to run tasks as - the user who submitted the job, instead of as the task - tracker user, which is how tasks are launched by default. - This section describes how to configure and use - task controllers.

-

The following task controllers are the available in - Hadoop. -

- - - - - - - - - - - - -
NameClass NameDescription
DefaultTaskControllerorg.apache.hadoop.mapred.DefaultTaskController The default task controller which Hadoop uses to manage task - execution. The tasks run as the task tracker user.
LinuxTaskControllerorg.apache.hadoop.mapred.LinuxTaskControllerThis task controller, which is supported only on Linux, - runs the tasks as the user who submitted the job. It requires - these user accounts to be created on the cluster nodes - where the tasks are launched. It - uses a setuid executable that is included in the Hadoop - distribution. The task tracker uses this executable to - launch and kill tasks. The setuid executable switches to - the user who has submitted the job and launches or kills - the tasks. For maximum security, this task controller - sets up restricted permissions and user/group ownership of - local files and directories used by the tasks such as the - job jar files, intermediate files, task log files and distributed - cache files. Particularly note that, because of this, except the - job owner and tasktracker, no other user can access any of the - local files/directories including those localized as part of the - distributed cache. -
-
- Configuring Task Controllers -

The task controller to be used can be configured by setting the - value of the following key in mapred-site.xml

- - - - - - - - - -
PropertyValueNotes
mapreduce.tasktracker.taskcontrollerFully qualified class name of the task controller classCurrently there are two implementations of task controller - in the Hadoop system, DefaultTaskController and LinuxTaskController. - Refer to the class names mentioned above to determine the value - to set for the class of choice. -
-
-
- Using the LinuxTaskController -

This section of the document describes the steps required to - use the LinuxTaskController.

- -

In order to use the LinuxTaskController, a setuid executable - should be built and deployed on the compute nodes. The - executable is named task-controller. To build the executable, - execute - ant task-controller -Dhadoop.conf.dir=/path/to/conf/dir. - - The path passed in -Dhadoop.conf.dir should be the path - on the cluster nodes where a configuration file for the setuid - executable would be located. The executable would be built to - build.dir/dist.dir/bin and should be installed to - $HADOOP_PREFIX/bin. -

- -

- The executable must have specific permissions as follows. The - executable should have 6050 or --Sr-s--- permissions - user-owned by root(super-user) and group-owned by a special group - of which the TaskTracker's user is the group member and no job - submitter is. If any job submitter belongs to this special group, - security will be compromised. This special group name should be - specified for the configuration property - "mapreduce.tasktracker.group" in both mapred-site.xml and - task-controller.cfg. - For example, let's say that the TaskTracker is run as user - mapred who is part of the groups users and - specialGroup any of them being the primary group. - Let also be that users has both mapred and - another user (job submitter) X as its members, and X does - not belong to specialGroup. Going by the above - description, the setuid/setgid executable should be set - 6050 or --Sr-s--- with user-owner as mapred and - group-owner as specialGroup which has - mapred as its member(and not users which has - X also as its member besides mapred). -

- -

- The LinuxTaskController requires that paths including and leading up - to the directories specified in - mapreduce.cluster.local.dir and hadoop.log.dir to - be set 755 permissions. -

- -
- task-controller.cfg -

The executable requires a configuration file called - taskcontroller.cfg to be - present in the configuration directory passed to the ant target - mentioned above. If the binary was not built with a specific - conf directory, the path defaults to - /path-to-binary/../conf. The configuration file must be - owned by the user running TaskTracker (user mapred in the - above example), group-owned by anyone and should have the - permissions 0400 or r--------. -

- -

The executable requires following configuration items to be - present in the taskcontroller.cfg file. The items should - be mentioned as simple key=value pairs. -

- - - - - - - - - - - - - -
NameDescription
mapreduce.cluster.local.dirPath to mapreduce.cluster.local.directories. Should be same as the value - which was provided to key in mapred-site.xml. This is required to - validate paths passed to the setuid executable in order to prevent - arbitrary paths being passed to it.
hadoop.log.dirPath to hadoop log directory. Should be same as the value which - the TaskTracker is started with. This is required to set proper - permissions on the log files so that they can be written to by the user's - tasks and read by the TaskTracker for serving on the web UI.
mapreduce.tasktracker.groupGroup to which the TaskTracker belongs. The group owner of the - taskcontroller binary should be this group. Should be same as - the value with which the TaskTracker is configured. This - configuration is required for validating the secure access of the - task-controller binary.
-
-
- -
-
- Monitoring Health of TaskTracker Nodes -

Hadoop Map/Reduce provides a mechanism by which administrators - can configure the TaskTracker to run an administrator supplied - script periodically to determine if a node is healthy or not. - Administrators can determine if the node is in a healthy state - by performing any checks of their choice in the script. If the - script detects the node to be in an unhealthy state, it must print - a line to standard output beginning with the string ERROR. - The TaskTracker spawns the script periodically and checks its - output. If the script's output contains the string ERROR, - as described above, the node's status is reported as 'unhealthy' - and the node is black-listed on the JobTracker. No further tasks - will be assigned to this node. However, the - TaskTracker continues to run the script, so that if the node - becomes healthy again, it will be removed from the blacklisted - nodes on the JobTracker automatically. The node's health - along with the output of the script, if it is unhealthy, is - available to the administrator in the JobTracker's web interface. - The time since the node was healthy is also displayed on the - web interface. -

- -
- Configuring the Node Health Check Script -

The following parameters can be used to control the node health - monitoring script in mapred-site.xml.

- - - - - - - - - - - - - - - - - -
NameDescription
mapreduce.tasktracker.healthchecker.script.pathAbsolute path to the script which is periodically run by the - TaskTracker to determine if the node is - healthy or not. The file should be executable by the TaskTracker. - If the value of this key is empty or the file does - not exist or is not executable, node health monitoring - is not started.
mapreduce.tasktracker.healthchecker.intervalFrequency at which the node health script is run, - in milliseconds
mapreduce.tasktracker.healthchecker.script.timeoutTime after which the node health script will be killed by - the TaskTracker if unresponsive. - The node is marked unhealthy. if node health script times out.
mapreduce.tasktracker.healthchecker.script.argsExtra arguments that can be passed to the node health script - when launched. - These should be comma separated list of arguments.
-
-
- -
- -
- Slaves - -

Typically you choose one machine in the cluster to act as the - NameNode and one machine as to act as the - JobTracker, exclusively. The rest of the machines act as - both a DataNode and TaskTracker and are - referred to as slaves.

- -

List all slave hostnames or IP addresses in your - conf/slaves file, one per line.

-
- -
- Logging - -

Hadoop uses the Apache - log4j via the Apache - Commons Logging framework for logging. Edit the - conf/log4j.properties file to customize the Hadoop - daemons' logging configuration (log-formats and so on).

- -
- History Logging - -

The job history files are stored in central location - mapreduce.jobtracker.jobhistory.location which can be on DFS also, - whose default value is ${HADOOP_LOG_DIR}/history. - The history web UI is accessible from job tracker web UI.

- -

The history files are also logged to user specified directory - mapreduce.job.userhistorylocation - which defaults to job output directory. The files are stored in - "_logs/history/" in the specified directory. Hence, by default - they will be in "mapreduce.output.fileoutputformat.outputdir/_logs/history/". User can stop - logging by giving the value none for - mapreduce.job.userhistorylocation

- -

User can view the history logs summary in specified directory - using the following command
- $ bin/hadoop job -history output-dir
- This command will print job details, failed and killed tip - details.
- More details about the job such as successful tasks and - task attempts made for each task can be viewed using the - following command
- $ bin/hadoop job -history all output-dir

-
-
- -

Once all the necessary configuration is complete, distribute the files - to the HADOOP_CONF_DIR directory on all the machines, - typically ${HADOOP_PREFIX}/conf.

-
-
- Cluster Restartability -
- Map/Reduce -

The job tracker restart can recover running jobs if - mapreduce.jobtracker.restart.recover is set true and - JobHistory logging is enabled. Also - mapreduce.jobtracker.jobhistory.block.size value should be - set to an optimal value to dump job history to disk as soon as - possible, the typical value is 3145728(3MB).

-
-
- -
- Hadoop Rack Awareness -

- Both HDFS and Map/Reduce components are rack-aware. HDFS block placement will use rack - awareness for fault tolerance by placing one block replica on a different rack. This provides - data availability in the event of a network switch failure within the cluster. The jobtracker uses rack - awareness to reduce network transfers of HDFS data blocks by attempting to schedule tasks on datanodes with a local - copy of needed HDFS blocks. If the tasks cannot be scheduled on the datanodes - containing the needed HDFS blocks, then the tasks will be scheduled on the same rack to reduce network transfers if possible. -

-

The NameNode and the JobTracker obtain the rack id of the cluster slaves by invoking either - an external script or java class as specified by configuration files. Using either the - java class or external script for topology, output must adhere to the java - DNSToSwitchMapping - interface. The interface expects a one-to-one correspondence to be maintained - and the topology information in the format of '/myrack/myhost', where '/' is the topology - delimiter, 'myrack' is the rack identifier, and 'myhost' is the individual host. Assuming - a single /24 subnet per rack, one could use the format of '/192.168.100.0/192.168.100.5' as a - unique rack-host topology mapping. -

-

- To use the java class for topology mapping, the class name is specified by the - 'topology.node.switch.mapping.impl' parameter in the configuration file. - An example, NetworkTopology.java, is included with the hadoop distribution and can be customized - by the hadoop administrator. If not included with your distribution, NetworkTopology.java can also be found in the Hadoop - - subversion tree. Using a java class instead of an external script has a slight performance benefit in - that it doesn't need to fork an external process when a new slave node registers itself with the jobtracker or namenode. - As this class is only used during slave node registration, the performance benefit is limited. -

-

- If implementing an external script, it will be specified with the - topology.script.file.name parameter in the configuration files. Unlike the java - class, the external topology script is not included with the Hadoop distribution and is provided by the - administrator. Hadoop will send multiple IP addresses to ARGV when forking the topology script. The - number of IP addresses sent to the topology script is controlled with net.topology.script.number.args - and defaults to 100. If net.topology.script.number.args was changed to 1, a topology script would - get forked for each IP submitted by datanodes and/or tasktrackers. Below are example topology scripts. -

-
- Python example - - - #!/usr/bin/python - - # this script makes assumptions about the physical environment. - # 1) each rack is its own layer 3 network with a /24 subnet, which could be typical where each rack has its own - # switch with uplinks to a central core router. - # - # +-----------+ - # |core router| - # +-----------+ - # / \ - # +-----------+ +-----------+ - # |rack switch| |rack switch| - # +-----------+ +-----------+ - # | data node | | data node | - # +-----------+ +-----------+ - # | data node | | data node | - # +-----------+ +-----------+ - # - # 2) topology script gets list of IP's as input, calculates network address, and prints '/network_address/ip'. - - import netaddr - import sys - sys.argv.pop(0) # discard name of topology script from argv list as we just want IP addresses - - netmask = '255.255.255.0' # set netmask to what's being used in your environment. The example uses a /24 - - for ip in sys.argv: # loop over list of datanode IP's - address = '{0}/{1}'.format(ip, netmask) # format address string so it looks like 'ip/netmask' to make netaddr work - try: - network_address = netaddr.IPNetwork(address).network # calculate and print network address - print "/{0}".format(network_address) - except: - print "/rack-unknown" # print catch-all value if unable to calculate network address - - - -
- -
- Bash example - - - #!/bin/bash - # Here's a bash example to show just how simple these scripts can be - - # Assuming we have flat network with everything on a single switch, we can fake a rack topology. - # This could occur in a lab environment where we have limited nodes,like 2-8 physical machines on a unmanaged switch. - # This may also apply to multiple virtual machines running on the same physical hardware. - # The number of machines isn't important, but that we are trying to fake a network topology when there isn't one. - # - # +----------+ +--------+ - # |jobtracker| |datanode| - # +----------+ +--------+ - # \ / - # +--------+ +--------+ +--------+ - # |datanode|--| switch |--|datanode| - # +--------+ +--------+ +--------+ - # / \ - # +--------+ +--------+ - # |datanode| |namenode| - # +--------+ +--------+ - # - # With this network topology, we are treating each host as a rack. This is being done by taking the last octet - # in the datanode's IP and prepending it with the word '/rack-'. The advantage for doing this is so HDFS - # can create its 'off-rack' block copy. - - # 1) 'echo $@' will echo all ARGV values to xargs. - # 2) 'xargs' will enforce that we print a single argv value per line - # 3) 'awk' will split fields on dots and append the last field to the string '/rack-'. If awk - # fails to split on four dots, it will still print '/rack-' last field value - - echo $@ | xargs -n 1 | awk -F '.' '{print "/rack-"$NF}' - - - - -
- - -

- If topology.script.file.name or topology.node.switch.mapping.impl is - not set, the rack id '/default-rack' is returned for any passed IP address. - While this behavior appears desirable, it can cause issues with HDFS block replication as - default behavior is to write one replicated block off rack and is unable to do so as there is - only a single rack named '/default-rack'. -

-

- An additional configuration setting is mapred.cache.task.levels which determines - the number of levels (in the network topology) of caches. So, for example, if it is the - default value of 2, two levels of caches will be constructed - one for hosts - (host -> task mapping) and another for racks (rack -> task mapping). Giving us our one-to-one - mapping of '/myrack/myhost' -

-
- -
- Hadoop Startup - -

To start a Hadoop cluster you will need to start both the HDFS and - Map/Reduce cluster.

- -

- Format a new distributed filesystem:
- $ bin/hadoop namenode -format -

- -

- Start the HDFS with the following command, run on the designated - NameNode:
- $ bin/start-dfs.sh -

-

The bin/start-dfs.sh script also consults the - ${HADOOP_CONF_DIR}/slaves file on the NameNode - and starts the DataNode daemon on all the listed slaves.

- -

- Start Map-Reduce with the following command, run on the designated - JobTracker:
- $ bin/start-mapred.sh -

-

The bin/start-mapred.sh script also consults the - ${HADOOP_CONF_DIR}/slaves file on the JobTracker - and starts the TaskTracker daemon on all the listed slaves. -

-
- -
- Hadoop Shutdown - -

- Stop HDFS with the following command, run on the designated - NameNode:
- $ bin/stop-dfs.sh -

-

The bin/stop-dfs.sh script also consults the - ${HADOOP_CONF_DIR}/slaves file on the NameNode - and stops the DataNode daemon on all the listed slaves.

- -

- Stop Map/Reduce with the following command, run on the designated - the designated JobTracker:
- $ bin/stop-mapred.sh
-

-

The bin/stop-mapred.sh script also consults the - ${HADOOP_CONF_DIR}/slaves file on the JobTracker - and stops the TaskTracker daemon on all the listed slaves.

-
- - -
diff --git a/hadoop-common-project/hadoop-common/src/main/docs/src/documentation/content/xdocs/commands_manual.xml b/hadoop-common-project/hadoop-common/src/main/docs/src/documentation/content/xdocs/commands_manual.xml deleted file mode 100644 index ff893a3db6c..00000000000 --- a/hadoop-common-project/hadoop-common/src/main/docs/src/documentation/content/xdocs/commands_manual.xml +++ /dev/null @@ -1,798 +0,0 @@ - - - - - -
- Hadoop Commands Guide -
- - -
- Overview -

- All Hadoop commands are invoked by the bin/hadoop script. Running the Hadoop - script without any arguments prints the description for all commands. -

-

- Usage: hadoop [--config confdir] [COMMAND] [GENERIC_OPTIONS] [COMMAND_OPTIONS] -

-

- Hadoop has an option parsing framework that employs parsing generic options as well as running classes. -

- - - - - - - - - - - - - - - -
COMMAND_OPTION Description
--config confdirOverwrites the default Configuration directory. Default is ${HADOOP_PREFIX}/conf.
GENERIC_OPTIONSThe common set of options supported by multiple commands.
COMMAND
COMMAND_OPTIONS
Various commands with their options are described in the following sections. The commands - have been grouped into User Commands - and Administration Commands.
-
- Generic Options -

- The following options are supported by dfsadmin, - fs, fsck, - job and fetchdt. - Applications should implement - Tool to support - - GenericOptions. -

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
GENERIC_OPTION Description
-conf <configuration file>Specify an application configuration file.
-D <property=value>Use value for given property.
-fs <local|namenode:port>Specify a namenode.
-jt <local|jobtracker:port>Specify a job tracker. Applies only to job.
-files <comma separated list of files>Specify comma separated files to be copied to the map reduce cluster. - Applies only to job.
-libjars <comma seperated list of jars>Specify comma separated jar files to include in the classpath. - Applies only to job.
-archives <comma separated list of archives>Specify comma separated archives to be unarchived on the compute machines. - Applies only to job.
-
-
- -
- User Commands -

Commands useful for users of a Hadoop cluster.

-
- archive -

- Creates a Hadoop archive. More information see the Hadoop Archives Guide. -

-

- Usage: hadoop archive -archiveName NAME <src>* <dest> -

- - - - - - - - - - - - - - -
COMMAND_OPTION Description
-archiveName NAMEName of the archive to be created.
srcFilesystem pathnames which work as usual with regular expressions.
destDestination directory which would contain the archive.
-
- -
- distcp -

- Copy file or directories recursively. More information can be found at DistCp Guide. -

-

- Usage: hadoop distcp <srcurl> <desturl> -

- - - - - - - - - - - -
COMMAND_OPTION Description
srcurlSource Url
desturlDestination Url
-
- -
- fs -

- Runs a generic filesystem user client. -

-

- Usage: hadoop fs [GENERIC_OPTIONS] - [COMMAND_OPTIONS] -

-

- The various COMMAND_OPTIONS can be found at - File System Shell Guide. -

-
- -
- fsck -

- Runs a HDFS filesystem checking utility. See Fsck for more info. -

-

Usage: hadoop fsck [GENERIC_OPTIONS] - <path> [-move | -delete | -openforwrite] [-files [-blocks - [-locations | -racks]]]

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
COMMAND_OPTION Description
<path>Start checking from this path.
-moveMove corrupted files to /lost+found
-deleteDelete corrupted files.
-openforwritePrint out files opened for write.
-filesPrint out files being checked.
-blocksPrint out block report.
-locationsPrint out locations for every block.
-racksPrint out network topology for data-node locations.
-
- -
- fetchdt -

- Gets Delegation Token from a NameNode. See fetchdt for more info. -

-

Usage: hadoop fetchdt [GENERIC_OPTIONS] - [--webservice <namenode_http_addr>] <file_name>

- - - - - - - - - - -
COMMAND_OPTION Description
<file_name>File name to store the token into.
--webservice <https_address>use http protocol instead of RPC
-
- -
- jar -

- Runs a jar file. Users can bundle their Map Reduce code in a jar file and execute it using this command. -

-

- Usage: hadoop jar <jar> [mainClass] args... -

-

- The streaming jobs are run via this command. For examples, see - Hadoop Streaming. -

-

- The WordCount example is also run using jar command. For examples, see the - MapReduce Tutorial. -

-
- -
- job -

- Command to interact with Map Reduce Jobs. -

-

- Usage: hadoop job [GENERIC_OPTIONS] - [-submit <job-file>] | [-status <job-id>] | - [-counter <job-id> <group-name> <counter-name>] | [-kill <job-id>] | - [-events <job-id> <from-event-#> <#-of-events>] | [-history [all] <historyFile>] | - [-list [all]] | [-kill-task <task-id>] | [-fail-task <task-id>] | - [-set-priority <job-id> <priority>] -

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
COMMAND_OPTION Description
-submit <job-file>Submits the job.
-status <job-id>Prints the map and reduce completion percentage and all job counters.
-counter <job-id> <group-name> <counter-name>Prints the counter value.
-kill <job-id>Kills the job.
-events <job-id> <from-event-#> <#-of-events>Prints the events' details received by jobtracker for the given range.
-history [all] <historyFile>-history <historyFile> prints job details, failed and killed tip details. More details - about the job such as successful tasks and task attempts made for each task can be viewed by - specifying the [all] option.
-list [all]-list all displays all jobs. -list displays only jobs which are yet to complete.
-kill-task <task-id>Kills the task. Killed tasks are NOT counted against failed attempts.
-fail-task <task-id>Fails the task. Failed tasks are counted against failed attempts.
-set-priority <job-id> <priority>Changes the priority of the job. - Allowed priority values are VERY_HIGH, HIGH, NORMAL, LOW, VERY_LOW
-
- -
- pipes -

- Runs a pipes job. -

-

- Usage: hadoop pipes [-conf <path>] [-jobconf <key=value>, <key=value>, ...] - [-input <path>] [-output <path>] [-jar <jar file>] [-inputformat <class>] - [-map <class>] [-partitioner <class>] [-reduce <class>] [-writer <class>] - [-program <executable>] [-reduces <num>] -

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
COMMAND_OPTION Description
-conf <path>Configuration for job
-jobconf <key=value>, <key=value>, ...Add/override configuration for job
-input <path>Input directory
-output <path>Output directory
-jar <jar file>Jar filename
-inputformat <class>InputFormat class
-map <class>Java Map class
-partitioner <class>Java Partitioner
-reduce <class>Java Reduce class
-writer <class>Java RecordWriter
-program <executable>Executable URI
-reduces <num>Number of reduces
-
-
- queue -

- command to interact and view Job Queue information -

-

- Usage : hadoop queue [-list] | [-info <job-queue-name> [-showJobs]] | [-showacls] -

- - - - - - - - - - - - - - - - -
COMMAND_OPTION Description
-list Gets list of Job Queues configured in the system. Along with scheduling information - associated with the job queues. -
-info <job-queue-name> [-showJobs] - Displays the job queue information and associated scheduling information of particular - job queue. If -showJobs options is present a list of jobs submitted to the particular job - queue is displayed. -
-showaclsDisplays the queue name and associated queue operations allowed for the current user. - The list consists of only those queues to which the user has access. -
-
-
- version -

- Prints the version. -

-

- Usage: hadoop version -

-
-
- CLASSNAME -

- Hadoop script can be used to invoke any class. -

-

- Runs the class named CLASSNAME. -

- -

- Usage: hadoop CLASSNAME -

- -
-
-
- Administration Commands -

Commands useful for administrators of a Hadoop cluster.

-
- balancer -

- Runs a cluster balancing utility. An administrator can simply press Ctrl-C to stop the - rebalancing process. For more details see - Rebalancer. -

-

- Usage: hadoop balancer [-policy <blockpool|datanode>] [-threshold <threshold>] -

- - - - - - - - - - -
COMMAND_OPTION Description
-policy <blockpool|datanode>The balancing policy. -
datanode: Cluster is balance if the disk usage of each datanode is balance. -
blockpool: Cluster is balance if the disk usage of each block pool in each datanode is balance. -
Note that blockpool is a condition stronger than datanode. - The default policy is datanode. -
-threshold <threshold>Percentage of disk capacity. This default threshold is 10%.
-
- -
- daemonlog -

- Get/Set the log level for each daemon. -

-

- Usage: hadoop daemonlog -getlevel <host:port> <name>
- Usage: hadoop daemonlog -setlevel <host:port> <name> <level> -

- - - - - - - - - - - -
COMMAND_OPTION Description
-getlevel <host:port> <name>Prints the log level of the daemon running at <host:port>. - This command internally connects to http://<host:port>/logLevel?log=<name>
-setlevel <host:port> <name> <level>Sets the log level of the daemon running at <host:port>. - This command internally connects to http://<host:port>/logLevel?log=<name>
-
- -
- datanode -

- Runs a HDFS datanode. -

-

- Usage: hadoop datanode [-rollback] -

- - - - - - - -
COMMAND_OPTION Description
-rollbackRollsback the datanode to the previous version. This should be used after stopping the datanode - and distributing the old Hadoop version.
-
- -
- dfsadmin -

- Runs a HDFS dfsadmin client. -

-

- Usage: hadoop dfsadmin [GENERIC_OPTIONS] [-report] [-safemode enter | leave | get | wait] [-refreshNodes] - [-finalizeUpgrade] [-upgradeProgress status | details | force] [-metasave filename] - [-setQuota <quota> <dirname>...<dirname>] [-clrQuota <dirname>...<dirname>] - [-restoreFailedStorage true|false|check] - [-help [cmd]] -

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
COMMAND_OPTION Description
-reportReports basic filesystem information and statistics.
-safemode enter | leave | get | waitSafe mode maintenance command. - Safe mode is a Namenode state in which it
- 1. does not accept changes to the name space (read-only)
- 2. does not replicate or delete blocks.
- Safe mode is entered automatically at Namenode startup, and - leaves safe mode automatically when the configured minimum - percentage of blocks satisfies the minimum replication - condition. Safe mode can also be entered manually, but then - it can only be turned off manually as well.
-refreshNodesRe-read the hosts and exclude files to update the set - of Datanodes that are allowed to connect to the Namenode - and those that should be decommissioned or recommissioned.
-finalizeUpgradeFinalize upgrade of HDFS. - Datanodes delete their previous version working directories, - followed by Namenode doing the same. - This completes the upgrade process.
-printTopologyPrint a tree of the rack/datanode topology of the - cluster as seen by the NameNode.
-upgradeProgress status | details | forceRequest current distributed upgrade status, - a detailed status or force the upgrade to proceed.
-metasave filenameSave Namenode's primary data structures - to <filename> in the directory specified by hadoop.log.dir property. - <filename> will contain one line for each of the following
- 1. Datanodes heart beating with Namenode
- 2. Blocks waiting to be replicated
- 3. Blocks currrently being replicated
- 4. Blocks waiting to be deleted
-setQuota <quota> <dirname>...<dirname>Set the quota <quota> for each directory <dirname>. - The directory quota is a long integer that puts a hard limit on the number of names in the directory tree.
- Best effort for the directory, with faults reported if
- 1. N is not a positive integer, or
- 2. user is not an administrator, or
- 3. the directory does not exist or is a file, or
- 4. the directory would immediately exceed the new quota.
-clrQuota <dirname>...<dirname>Clear the quota for each directory <dirname>.
- Best effort for the directory. with fault reported if
- 1. the directory does not exist or is a file, or
- 2. user is not an administrator.
- It does not fault if the directory has no quota.
-restoreFailedStorage true | false | checkThis option will turn on/off automatic attempt to restore failed storage replicas. - If a failed storage becomes available again the system will attempt to restore - edits and/or fsimage during checkpoint. 'check' option will return current setting.
-help [cmd] Displays help for the given command or all commands if none - is specified.
-
-
- mradmin -

Runs MR admin client

-

Usage: hadoop mradmin [ - GENERIC_OPTIONS - ] [-refreshServiceAcl] [-refreshQueues] [-refreshNodes] [-help [cmd]]

- - - - - - - - - - - - - - - - - - - - -
COMMAND_OPTION Description
-refreshServiceAcl Reload the service-level authorization policies. Jobtracker - will reload the authorization policy file.
-refreshQueues

Reload the queues' configuration at the JobTracker. - Most of the configuration of the queues can be refreshed/reloaded - without restarting the Map/Reduce sub-system. Administrators - typically own the - - conf/mapred-queues.xml - file, can edit it while the JobTracker is still running, and can do - a reload by running this command.

-

It should be noted that while trying to refresh queues' - configuration, one cannot change the hierarchy of queues itself. - This means no operation that involves a change in either the - hierarchy structure itself or the queues' names will be allowed. - Only selected properties of queues can be changed during refresh. - For example, new queues cannot be added dynamically, neither can an - existing queue be deleted.

-

If during a reload of queue configuration, - a syntactic or semantic error in made during the editing of the - configuration file, the refresh command fails with an exception that - is printed on the standard output of this command, thus informing the - requester with any helpful messages of what has gone wrong during - the edit/reload. Importantly, the existing queue configuration is - untouched and the system is left in a consistent state. -

-

As described in the - - conf/mapred-queues.xml section, the - - <properties> tag in the queue configuration file can - also be used to specify per-queue properties needed by the scheduler. - When the framework's queue configuration is reloaded using this - command, this scheduler specific configuration will also be reloaded - , provided the scheduler being configured supports this reload. - Please see the documentation of the particular scheduler in use.

-
-refreshNodes Refresh the hosts information at the jobtracker.
-help [cmd]Displays help for the given command or all commands if none - is specified.
-
-
- jobtracker -

- Runs the MapReduce job Tracker node. -

-

- Usage: hadoop jobtracker [-dumpConfiguration] -

- - - - - - - - -
COMMAND_OPTION Description
-dumpConfiguration Dumps the configuration used by the JobTracker alongwith queue - configuration in JSON format into Standard output used by the - jobtracker and exits.
- -
- -
- namenode -

- Runs the namenode. For more information about upgrade, rollback and finalize see - Upgrade and Rollback. -

-

- Usage: hadoop namenode [-format [-force] [-nonInteractive] [-clusterid someid]] | [-upgrade] | [-rollback] | [-finalize] | [-importCheckpoint] | [-checkpoint] | [-backup] -

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
COMMAND_OPTION Description
-regularStart namenode in standard, active role rather than as backup or checkpoint node. This is the default role.
-checkpointStart namenode in checkpoint role, creating periodic checkpoints of the active namenode metadata.
-backupStart namenode in backup role, maintaining an up-to-date in-memory copy of the namespace and creating periodic checkpoints.
-format [-force] [-nonInteractive] [-clusterid someid]Formats the namenode. It starts the namenode, formats it and then shuts it down. User will be prompted before formatting any non empty name directories in the local filesystem.
- -nonInteractive: User will not be prompted for input if non empty name directories exist in the local filesystem and the format will fail.
- -force: Formats the namenode and the user will NOT be prompted to confirm formatting of the name directories in the local filesystem. If -nonInteractive option is specified it will be ignored.
- -clusterid: Associates the namenode with the id specified. When formatting federated namenodes use this option to make sure all namenodes are associated with the same id.
-upgradeNamenode should be started with upgrade option after the distribution of new Hadoop version.
-rollbackRollsback the namenode to the previous version. This should be used after stopping the cluster - and distributing the old Hadoop version.
-finalizeFinalize will remove the previous state of the files system. Recent upgrade will become permanent. - Rollback option will not be available anymore. After finalization it shuts the namenode down.
-importCheckpointLoads image from a checkpoint directory and saves it into the current one. Checkpoint directory - is read from property dfs.namenode.checkpoint.dir - (see Import Checkpoint). -
-checkpointEnables checkpointing - (see Checkpoint Node).
-backupEnables checkpointing and maintains an in-memory, up-to-date copy of the file system namespace - (see Backup Node).
-
- -
- secondarynamenode -

- Runs the HDFS secondary - namenode. See Secondary NameNode - for more info. -

-

- Usage: hadoop secondarynamenode [-checkpoint [force]] | [-geteditsize] -

- - - - - - - - - - - -
COMMAND_OPTION Description
-checkpoint [force]Checkpoints the Secondary namenode if EditLog size >= dfs.namenode.checkpoint.size. - If -force is used, checkpoint irrespective of EditLog size.
-geteditsizePrints the EditLog size.
-
- -
- tasktracker -

- Runs a MapReduce task Tracker node. -

-

- Usage: hadoop tasktracker -

-
- -
- - - - - -
diff --git a/hadoop-common-project/hadoop-common/src/main/docs/src/documentation/content/xdocs/file_system_shell.xml b/hadoop-common-project/hadoop-common/src/main/docs/src/documentation/content/xdocs/file_system_shell.xml deleted file mode 100644 index 69a3ac292dc..00000000000 --- a/hadoop-common-project/hadoop-common/src/main/docs/src/documentation/content/xdocs/file_system_shell.xml +++ /dev/null @@ -1,594 +0,0 @@ - - - - -
- File System Shell Guide -
- -
- Overview -

- The File System (FS) shell includes various shell-like commands that directly - interact with the Hadoop Distributed File System (HDFS) as well as other file systems that Hadoop supports, - such as Local FS, HFTP FS, S3 FS, and others. The FS shell is invoked by:

- - bin/hdfs dfs <args> - -

- All FS shell commands take path URIs as arguments. The URI - format is scheme://autority/path. For HDFS the scheme - is hdfs, and for the Local FS the scheme - is file. The scheme and authority are optional. If not - specified, the default scheme specified in the configuration is - used. An HDFS file or directory such as /parent/child - can be specified as hdfs://namenodehost/parent/child or - simply as /parent/child (given that your configuration - is set to point to hdfs://namenodehost). -

-

- Most of the commands in FS shell behave like corresponding Unix - commands. Differences are described with each of the - commands. Error information is sent to stderr and the - output is sent to stdout. -

- - - -
- cat -

- Usage: hdfs dfs -cat URI [URI …] -

-

- Copies source paths to stdout. -

-

Example:

-
    -
  • - hdfs dfs -cat hdfs://nn1.example.com/file1 hdfs://nn2.example.com/file2 - -
  • -
  • - hdfs dfs -cat file:///file3 /user/hadoop/file4 -
  • -
-

Exit Code:
- Returns 0 on success and -1 on error.

-
- - - -
- chgrp -

- Usage: hdfs dfs -chgrp [-R] GROUP URI [URI …] -

-

- Change group association of files. With -R, make the change recursively through the directory structure. - The user must be the owner of files, or else a super-user. - Additional information is in the HDFS Permissions Guide. -

-
-
- chmod -

- Usage: hdfs dfs -chmod [-R] <MODE[,MODE]... | OCTALMODE> URI [URI …] -

-

- Change the permissions of files. With -R, make the change recursively through the directory structure. - The user must be the owner of the file, or else a super-user. - Additional information is in the HDFS Permissions Guide. -

-
- - - -
- chown -

- Usage: hdfs dfs -chown [-R] [OWNER][:[GROUP]] URI [URI ] -

-

- Change the owner of files. With -R, make the change recursively through the directory structure. - The user must be a super-user. - Additional information is in the HDFS Permissions Guide. -

-
- - - -
- copyFromLocal -

- Usage: hdfs dfs -copyFromLocal <localsrc> URI -

-

Similar to put command, except that the source is restricted to a local file reference.

-
- - - -
- copyToLocal -

- Usage: hdfs dfs -copyToLocal [-ignorecrc] [-crc] URI <localdst> -

-

Similar to get command, except that the destination is restricted to a local file reference.

-
- - -
- count -

- Usage: hdfs dfs -count [-q] <paths> -

-

- Count the number of directories, files and bytes under the paths that match the specified file pattern.

- The output columns with -count are:

- DIR_COUNT, FILE_COUNT, CONTENT_SIZE FILE_NAME

- The output columns with -count -q are:

- QUOTA, REMAINING_QUATA, SPACE_QUOTA, REMAINING_SPACE_QUOTA, - DIR_COUNT, FILE_COUNT, CONTENT_SIZE, FILE_NAME -

-

Example:

-
    -
  • - hdfs dfs -count hdfs://nn1.example.com/file1 hdfs://nn2.example.com/file2 - -
  • -
  • - hdfs dfs -count -q hdfs://nn1.example.com/file1 - -
  • -
-

Exit Code:

-

- Returns 0 on success and -1 on error. -

-
- - - -
- cp -

- Usage: hdfs dfs -cp URI [URI …] <dest> -

-

- Copy files from source to destination. This command allows multiple sources as well in which case the destination must be a directory. -
- Example:

-
    -
  • - hdfs dfs -cp /user/hadoop/file1 /user/hadoop/file2 -
  • -
  • - hdfs dfs -cp /user/hadoop/file1 /user/hadoop/file2 /user/hadoop/dir -
  • -
-

Exit Code:

-

- Returns 0 on success and -1 on error. -

-
- - -
- du -

- Usage: hdfs dfs -du [-s] [-h] URI [URI …] -

-

- Displays sizes of files and directories contained in the given directory or the length of a file in case its just a file.

-

Options:

-
    -
  • The -s option will result in an aggregate summary of file lengths being displayed, rather than the individual files.
  • -
  • The -h option will format file sizes in a "human-readable" fashion (e.g 64.0m instead of 67108864)
  • -
-

- Example:
hdfs dfs -du /user/hadoop/dir1 /user/hadoop/file1 hdfs://nn.example.com/user/hadoop/dir1
- Exit Code:
Returns 0 on success and -1 on error.

-
- - -
- dus -

- Usage: hdfs dfs -dus <args> -

-

- Displays a summary of file lengths. This is an alternate form of hdfs dfs -du -s. -

-
- - - -
- expunge -

- Usage: hdfs dfs -expunge -

-

Empty the Trash. Refer to the HDFS Architecture Guide - for more information on the Trash feature.

-
- - - -
- get -

- Usage: hdfs dfs -get [-ignorecrc] [-crc] <src> <localdst> -
-

-

- Copy files to the local file system. Files that fail the CRC check may be copied with the - -ignorecrc option. Files and CRCs may be copied using the - -crc option. -

-

Example:

-
    -
  • - hdfs dfs -get /user/hadoop/file localfile -
  • -
  • - hdfs dfs -get hdfs://nn.example.com/user/hadoop/file localfile -
  • -
-

Exit Code:

-

- Returns 0 on success and -1 on error. -

-
- - - -
- getmerge -

- Usage: hdfs dfs -getmerge [-nl] <src> <localdst> -

-

- Takes a source directory and a destination file as input and concatenates files in src into the destination local file. - Optionally -nl flag can be set to enable adding a newline character at the end of each file during merge. -

-
- - - -
- ls -

- Usage: hdfs dfs -ls [-d] [-h] [-R] <args> -

-

For a file returns stat on the file with the following format:

-

- permissions number_of_replicas userid groupid filesize modification_date modification_time filename -

-

For a directory it returns list of its direct children as in unix.A directory is listed as:

-

- permissions userid groupid modification_date modification_time dirname -

-

Options:

-
    -
  • -d Directories are listed as plain files
  • -
  • -h Format file sizes in a "human-readable" fashion (e.g 64.0m instead of 67108864)
  • -
  • -R Recursively list subdirectories encountered
  • -
-

Example:

-

- hdfs dfs -ls /user/hadoop/file1 -

-

Exit Code:

-

- Returns 0 on success and -1 on error. -

-
- - - -
- lsr -

Usage: hdfs dfs -lsr <args>
- Recursive version of ls. Similar to Unix ls -R. -

-
- - - -
- mkdir -

- Usage: hdfs dfs -mkdir <paths> -
-

-

- Takes path uri's as argument and creates directories. The behavior is much like unix mkdir -p creating parent directories along the path. -

-

Example:

-
    -
  • - hdfs dfs -mkdir /user/hadoop/dir1 /user/hadoop/dir2 -
  • -
  • - hdfs dfs -mkdir hdfs://nn1.example.com/user/hadoop/dir hdfs://nn2.example.com/user/hadoop/dir - -
  • -
-

Exit Code:

-

- Returns 0 on success and -1 on error. -

-
- - - -
- moveFromLocal -

- Usage: dfs -moveFromLocal <localsrc> <dst> -

-

Similar to put command, except that the source localsrc is deleted after it's copied.

-
- - - -
- moveToLocal -

- Usage: hdfs dfs -moveToLocal [-crc] <src> <dst> -

-

Displays a "Not implemented yet" message.

-
- - - -
- mv -

- Usage: hdfs dfs -mv URI [URI …] <dest> -

-

- Moves files from source to destination. This command allows multiple sources as well in which case the destination needs to be a directory. - Moving files across file systems is not permitted. -
- Example: -

-
    -
  • - hdfs dfs -mv /user/hadoop/file1 /user/hadoop/file2 -
  • -
  • - hdfs dfs -mv hdfs://nn.example.com/file1 hdfs://nn.example.com/file2 hdfs://nn.example.com/file3 hdfs://nn.example.com/dir1 -
  • -
-

Exit Code:

-

- Returns 0 on success and -1 on error. -

-
- - - -
- put -

- Usage: hdfs dfs -put <localsrc> ... <dst> -

-

Copy single src, or multiple srcs from local file system to the destination file system. - Also reads input from stdin and writes to destination file system.
-

-
    -
  • - hdfs dfs -put localfile /user/hadoop/hadoopfile -
  • -
  • - hdfs dfs -put localfile1 localfile2 /user/hadoop/hadoopdir -
  • -
  • - hdfs dfs -put localfile hdfs://nn.example.com/hadoop/hadoopfile -
  • -
  • hdfs dfs -put - hdfs://nn.example.com/hadoop/hadoopfile
    Reads the input from stdin.
  • -
-

Exit Code:

-

- Returns 0 on success and -1 on error. -

-
- - - -
- rm -

- Usage: hdfs dfs -rm [-skipTrash] URI [URI …] -

-

- Delete files specified as args. Only deletes files. If the -skipTrash option - is specified, the trash, if enabled, will be bypassed and the specified file(s) deleted immediately. This can be - useful when it is necessary to delete files from an over-quota directory. - Use -rm -r or rmr for recursive deletes.
- Example: -

-
    -
  • - hdfs dfs -rm hdfs://nn.example.com/file -
  • -
-

Exit Code:

-

- Returns 0 on success and -1 on error. -

-
- - - -
- rmr -

- Usage: hdfs dfs -rmr [-skipTrash] URI [URI …] -

-

Recursive version of delete. The rmr command recursively deletes the directory and any content under it. If the -skipTrash option - is specified, the trash, if enabled, will be bypassed and the specified file(s) deleted immediately. This can be - useful when it is necessary to delete files from an over-quota directory.
- Example: -

-
    -
  • - hdfs dfs -rmr /user/hadoop/dir -
  • -
  • - hdfs dfs -rmr hdfs://nn.example.com/user/hadoop/dir -
  • -
-

Exit Code:

-

- Returns 0 on success and -1 on error. -

-
- - - -
- setrep -

- Usage: hdfs dfs -setrep [-R] <path> -

-

- Changes the replication factor of a file. -R option is for recursively increasing the replication factor of files within a directory. -

-

Example:

-
    -
  • - hdfs dfs -setrep -w 3 -R /user/hadoop/dir1 -
  • -
-

Exit Code:

-

- Returns 0 on success and -1 on error. -

-
- - - -
- stat -

- Usage: hdfs dfs -stat [format] URI [URI …] -

-

Print statistics about the file/directory matching the given URI pattern in the specified format.

-

Format accepts:

-
    -
  • filesize in blocks (%b)
  • -
  • filename (%n)
  • -
  • block size (%o)
  • -
  • replication (%r)
  • -
  • modification date, formatted as Y-M-D H:M:S (%y)
  • -
  • modification date, in epoch seconds (%Y)
  • -
-

Example:

-
    -
  • - hdfs dfs -stat path -
  • -
  • - hdfs dfs -stat %y path -
  • -
  • - hdfs dfs -stat '%b %r' path -
  • -
-

Exit Code:
- Returns 0 on success and -1 on error.

-
- - - -
- tail -

- Usage: hdfs dfs -tail [-f] URI -

-

- Displays last kilobyte of the file to stdout. -f option can be used as in Unix. -

-

Example:

-
    -
  • - hdfs dfs -tail pathname -
  • -
-

Exit Code:
- Returns 0 on success and -1 on error.

-
- - - -
- test -

- Usage: hdfs dfs -test -[ezd] URI -

-

- Options:
- -e check to see if the file exists. Return 0 if true.
- -z check to see if the file is zero length. Return 0 if true.
- -d check to see if the path is directory. Return 0 if true.

-

Example:

-
    -
  • - hdfs dfs -test -e filename -
  • -
-
- - - -
- text -

- Usage: hdfs dfs -text <src> -
-

-

- Takes a source file and outputs the file in text format. The allowed formats are zip and TextRecordInputStream. -

-
- - - -
- touchz -

- Usage: hdfs dfs -touchz URI [URI …] -
-

-

- Create a file of zero length. -

-

Example:

-
    -
  • - hadoop -touchz pathname -
  • -
-

Exit Code:
- Returns 0 on success and -1 on error.

-
-
- -
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/AbstractFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/AbstractFileSystem.java index 6adbeab60a0..92e3e6a4636 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/AbstractFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/AbstractFileSystem.java @@ -85,14 +85,20 @@ public abstract class AbstractFileSystem { } /** - * Prohibits names which contain a ".", "..", ":" or "/" + * Returns true if the specified string is considered valid in the path part + * of a URI by this file system. The default implementation enforces the rules + * of HDFS, but subclasses may override this method to implement specific + * validation rules for specific file systems. + * + * @param src String source filename to check, path part of the URI + * @return boolean true if the specified string is considered valid */ - private static boolean isValidName(String src) { - // Check for ".." "." ":" "/" + public boolean isValidName(String src) { + // Prohibit ".." "." and anything containing ":" StringTokenizer tokens = new StringTokenizer(src, Path.SEPARATOR); while(tokens.hasMoreTokens()) { String element = tokens.nextToken(); - if (element.equals("target/generated-sources") || + if (element.equals("..") || element.equals(".") || (element.indexOf(":") >= 0)) { return false; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DU.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DU.java index 2c96b0abaf0..9a9f1e3efc7 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DU.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DU.java @@ -136,7 +136,7 @@ public class DU extends Shell { } } - return used.longValue(); + return Math.max(used.longValue(), 0L); } /** diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DelegationTokenRenewer.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DelegationTokenRenewer.java index b60507afa34..6e45a1ea6ae 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DelegationTokenRenewer.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DelegationTokenRenewer.java @@ -18,6 +18,8 @@ package org.apache.hadoop.fs; +import com.google.common.annotations.VisibleForTesting; + import java.io.IOException; import java.lang.ref.WeakReference; import java.util.concurrent.DelayQueue; @@ -147,6 +149,12 @@ public class DelegationTokenRenewer /** Queue to maintain the RenewActions to be processed by the {@link #run()} */ private volatile DelayQueue> queue = new DelayQueue>(); + /** For testing purposes */ + @VisibleForTesting + protected int getRenewQueueLength() { + return queue.size(); + } + /** * Create the singleton instance. However, the thread can be started lazily in * {@link #addRenewAction(FileSystem)} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileStatus.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileStatus.java index 5445f6eb155..ea2f1dc6169 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileStatus.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileStatus.java @@ -349,9 +349,15 @@ public class FileStatus implements Writable, Comparable { sb.append("; replication=" + block_replication); sb.append("; blocksize=" + blocksize); } + sb.append("; modification_time=" + modification_time); + sb.append("; access_time=" + access_time); sb.append("; owner=" + owner); sb.append("; group=" + group); sb.append("; permission=" + permission); + sb.append("; isSymlink=" + isSymlink()); + if(isSymlink()) { + sb.append("; symlink=" + symlink); + } sb.append("}"); return sb.toString(); } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FilterFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FilterFileSystem.java index 6e1e099cb0e..35aa4dc7f68 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FilterFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FilterFileSystem.java @@ -166,6 +166,18 @@ public class FilterFileSystem extends FileSystem { return fs.create(f, permission, overwrite, bufferSize, replication, blockSize, progress); } + + + + @Override + @Deprecated + public FSDataOutputStream createNonRecursive(Path f, FsPermission permission, + EnumSet flags, int bufferSize, short replication, long blockSize, + Progressable progress) throws IOException { + + return fs.createNonRecursive(f, permission, flags, bufferSize, replication, blockSize, + progress); + } /** * Set replication for an existing file. diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FilterFs.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FilterFs.java index 9637b6b913a..cdc0d1fdefa 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FilterFs.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FilterFs.java @@ -278,4 +278,9 @@ public abstract class FilterFs extends AbstractFileSystem { public List> getDelegationTokens(String renewer) throws IOException { return myFs.getDelegationTokens(renewer); } + + @Override + public boolean isValidName(String src) { + return myFs.isValidName(src); + } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsUrlStreamHandlerFactory.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsUrlStreamHandlerFactory.java index 2a9208ea5bd..9c212a4a59d 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsUrlStreamHandlerFactory.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsUrlStreamHandlerFactory.java @@ -56,6 +56,12 @@ public class FsUrlStreamHandlerFactory implements public FsUrlStreamHandlerFactory(Configuration conf) { this.conf = new Configuration(conf); + // force init of FileSystem code to avoid HADOOP-9041 + try { + FileSystem.getFileSystemClass("file", conf); + } catch (IOException io) { + throw new RuntimeException(io); + } this.handler = new FsUrlStreamHandler(this.conf); } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/RawLocalFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/RawLocalFileSystem.java index 4c089f1a299..88b877d146f 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/RawLocalFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/RawLocalFileSystem.java @@ -30,6 +30,7 @@ import java.io.FileDescriptor; import java.net.URI; import java.nio.ByteBuffer; import java.util.Arrays; +import java.util.EnumSet; import java.util.StringTokenizer; import org.apache.hadoop.classification.InterfaceAudience; @@ -281,6 +282,18 @@ public class RawLocalFileSystem extends FileSystem { return new FSDataOutputStream(new BufferedOutputStream( new LocalFSFileOutputStream(f, false), bufferSize), statistics); } + + @Override + @Deprecated + public FSDataOutputStream createNonRecursive(Path f, FsPermission permission, + EnumSet flags, int bufferSize, short replication, long blockSize, + Progressable progress) throws IOException { + if (exists(f) && !flags.contains(CreateFlag.OVERWRITE)) { + throw new IOException("File already exists: "+f); + } + return new FSDataOutputStream(new BufferedOutputStream( + new LocalFSFileOutputStream(f, false), bufferSize), statistics); + } @Override public FSDataOutputStream create(Path f, FsPermission permission, diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/local/RawLocalFs.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/local/RawLocalFs.java index b9a9277ade1..9ce0a97ab13 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/local/RawLocalFs.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/local/RawLocalFs.java @@ -159,6 +159,14 @@ public class RawLocalFs extends DelegateToFileSystem { } } + @Override + public boolean isValidName(String src) { + // Different local file systems have different validation rules. Skip + // validation here and just let the OS handle it. This is consistent with + // RawLocalFileSystem. + return true; + } + @Override public Path getLinkTarget(Path f) throws IOException { /* We should never get here. Valid local links are resolved transparently diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Command.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Command.java index f229d1f2230..8c5e88058e9 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Command.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Command.java @@ -311,6 +311,7 @@ abstract public class Command extends Configured { if (recursive && item.stat.isDirectory()) { recursePath(item); } + postProcessPath(item); } catch (IOException e) { displayError(e); } @@ -329,6 +330,15 @@ abstract public class Command extends Configured { throw new RuntimeException("processPath() is not implemented"); } + /** + * Hook for commands to implement an operation to be applied on each + * path for the command after being processed successfully + * @param item a {@link PathData} object + * @throws IOException if anything goes wrong... + */ + protected void postProcessPath(PathData item) throws IOException { + } + /** * Gets the directory listing for a path and invokes * {@link #processPaths(PathData, PathData...)} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/MoveCommands.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/MoveCommands.java index e1ad1a2ef6d..ffb3483ef21 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/MoveCommands.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/MoveCommands.java @@ -24,6 +24,7 @@ import java.util.LinkedList; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.PathIOException; +import org.apache.hadoop.fs.PathExistsException; import org.apache.hadoop.fs.shell.CopyCommands.CopyFromLocal; /** Various commands for moving files */ @@ -49,7 +50,21 @@ class MoveCommands { @Override protected void processPath(PathData src, PathData target) throws IOException { - target.fs.moveFromLocalFile(src.path, target.path); + // unlike copy, don't merge existing dirs during move + if (target.exists && target.stat.isDirectory()) { + throw new PathExistsException(target.toString()); + } + super.processPath(src, target); + } + + @Override + protected void postProcessPath(PathData src) throws IOException { + if (!src.fs.delete(src.path, false)) { + // we have no way to know the actual error... + PathIOException e = new PathIOException(src.toString()); + e.setOperation("remove"); + throw e; + } } } @@ -95,4 +110,4 @@ class MoveCommands { } } } -} \ No newline at end of file +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ChRootedFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ChRootedFileSystem.java index e4988efeaff..b73d3c65195 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ChRootedFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ChRootedFileSystem.java @@ -19,11 +19,14 @@ package org.apache.hadoop.fs.viewfs; import java.io.FileNotFoundException; import java.io.IOException; import java.net.URI; +import java.util.EnumSet; + import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.ContentSummary; +import org.apache.hadoop.fs.CreateFlag; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileChecksum; @@ -171,6 +174,16 @@ class ChRootedFileSystem extends FilterFileSystem { return super.create(fullPath(f), permission, overwrite, bufferSize, replication, blockSize, progress); } + + @Override + @Deprecated + public FSDataOutputStream createNonRecursive(Path f, FsPermission permission, + EnumSet flags, int bufferSize, short replication, long blockSize, + Progressable progress) throws IOException { + + return super.createNonRecursive(fullPath(f), permission, flags, bufferSize, replication, blockSize, + progress); + } @Override public boolean delete(final Path f, final boolean recursive) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ChRootedFs.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ChRootedFs.java index c99ce3be13b..2c184f6bb05 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ChRootedFs.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ChRootedFs.java @@ -83,7 +83,12 @@ class ChRootedFs extends AbstractFileSystem { return new Path((chRootPathPart.isRoot() ? "" : chRootPathPartString) + path.toUri().getPath()); } - + + @Override + public boolean isValidName(String src) { + return myFs.isValidName(fullPath(new Path(src)).toUri().toString()); + } + public ChRootedFs(final AbstractFileSystem fs, final Path theRoot) throws URISyntaxException { super(fs.getUri(), fs.getUri().getScheme(), @@ -103,7 +108,7 @@ class ChRootedFs extends AbstractFileSystem { // scheme:/// and scheme://authority/ myUri = new URI(myFs.getUri().toString() + (myFs.getUri().getAuthority() == null ? "" : Path.SEPARATOR) + - chRootPathPart.toString().substring(1)); + chRootPathPart.toUri().getPath().substring(1)); super.checkPath(theRoot); } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystem.java index f4fbc66b530..aec87a34c04 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystem.java @@ -24,6 +24,7 @@ import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; import java.util.Arrays; +import java.util.EnumSet; import java.util.HashSet; import java.util.List; import java.util.Set; @@ -35,6 +36,7 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.ContentSummary; +import org.apache.hadoop.fs.CreateFlag; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileAlreadyExistsException; @@ -62,6 +64,9 @@ import org.apache.hadoop.util.Time; @InterfaceAudience.Public @InterfaceStability.Evolving /*Evolving for a release,to be changed to Stable */ public class ViewFileSystem extends FileSystem { + + private static final Path ROOT_PATH = new Path(Path.SEPARATOR); + static AccessControlException readOnlyMountTable(final String operation, final String p) { return new AccessControlException( @@ -96,23 +101,6 @@ public class ViewFileSystem extends FileSystem { InodeTree fsState; // the fs state; ie the mount table Path homeDir = null; - /** - * Prohibits names which contain a ".", "..", ":" or "/" - */ - private static boolean isValidName(final String src) { - // Check for ".." "." ":" "/" - final StringTokenizer tokens = new StringTokenizer(src, Path.SEPARATOR); - while(tokens.hasMoreTokens()) { - String element = tokens.nextToken(); - if (element.equals("..") || - element.equals(".") || - (element.indexOf(":") >= 0)) { - return false; - } - } - return true; - } - /** * Make the path Absolute and get the path-part of a pathname. * Checks that URI matches this file system @@ -124,10 +112,6 @@ public class ViewFileSystem extends FileSystem { private String getUriPath(final Path p) { checkPath(p); String s = makeAbsolute(p).toUri().getPath(); - if (!isValidName(s)) { - throw new InvalidPathException("Path part " + s + " from URI" + p - + " is not a valid filename."); - } return s; } @@ -282,6 +266,21 @@ public class ViewFileSystem extends FileSystem { return res.targetFileSystem.append(res.remainingPath, bufferSize, progress); } + @Override + public FSDataOutputStream createNonRecursive(Path f, FsPermission permission, + EnumSet flags, int bufferSize, short replication, long blockSize, + Progressable progress) throws IOException { + InodeTree.ResolveResult res; + try { + res = fsState.resolve(getUriPath(f), false); + } catch (FileNotFoundException e) { + throw readOnlyMountTable("create", f); + } + assert(res.remainingPath != null); + return res.targetFileSystem.createNonRecursive(res.remainingPath, permission, + flags, bufferSize, replication, blockSize, progress); + } + @Override public FSDataOutputStream create(final Path f, final FsPermission permission, final boolean overwrite, final int bufferSize, final short replication, @@ -672,7 +671,7 @@ public class ViewFileSystem extends FileSystem { PERMISSION_RRR, ugi.getUserName(), ugi.getGroupNames()[0], new Path(theInternalDir.fullPath).makeQualified( - myUri, null)); + myUri, ROOT_PATH)); } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFs.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFs.java index dcfe5f32031..2bbdc164f87 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFs.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFs.java @@ -597,6 +597,12 @@ public class ViewFs extends AbstractFileSystem { return result; } + @Override + public boolean isValidName(String src) { + // Prefix validated at mount time and rest of path validated by mount target. + return true; + } + /* diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java index 7a2a5f1ebab..bb4d4b5809c 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java @@ -21,6 +21,8 @@ package org.apache.hadoop.ha; import java.io.IOException; import java.util.Arrays; import java.util.List; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; @@ -45,6 +47,7 @@ import org.apache.zookeeper.KeeperException.Code; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; /** * @@ -205,7 +208,7 @@ public class ActiveStandbyElector implements StatCallback, StringCallback { int zookeeperSessionTimeout, String parentZnodeName, List acl, List authInfo, ActiveStandbyElectorCallback app) throws IOException, - HadoopIllegalArgumentException { + HadoopIllegalArgumentException, KeeperException { if (app == null || acl == null || parentZnodeName == null || zookeeperHostPorts == null || zookeeperSessionTimeout <= 0) { throw new HadoopIllegalArgumentException("Invalid argument"); @@ -602,10 +605,24 @@ public class ActiveStandbyElector implements StatCallback, StringCallback { * * @return new zookeeper client instance * @throws IOException + * @throws KeeperException zookeeper connectionloss exception */ - protected synchronized ZooKeeper getNewZooKeeper() throws IOException { - ZooKeeper zk = new ZooKeeper(zkHostPort, zkSessionTimeout, null); - zk.register(new WatcherWithClientRef(zk)); + protected synchronized ZooKeeper getNewZooKeeper() throws IOException, + KeeperException { + + // Unfortunately, the ZooKeeper constructor connects to ZooKeeper and + // may trigger the Connected event immediately. So, if we register the + // watcher after constructing ZooKeeper, we may miss that event. Instead, + // we construct the watcher first, and have it queue any events it receives + // before we can set its ZooKeeper reference. + WatcherWithClientRef watcher = new WatcherWithClientRef(); + ZooKeeper zk = new ZooKeeper(zkHostPort, zkSessionTimeout, watcher); + watcher.setZooKeeperRef(zk); + + // Wait for the asynchronous success/failure. This may throw an exception + // if we don't connect within the session timeout. + watcher.waitForZKConnectionEvent(zkSessionTimeout); + for (ZKAuthInfo auth : zkAuthInfo) { zk.addAuthInfo(auth.getScheme(), auth.getAuth()); } @@ -710,13 +727,16 @@ public class ActiveStandbyElector implements StatCallback, StringCallback { } catch(IOException e) { LOG.warn(e); sleepFor(5000); + } catch(KeeperException e) { + LOG.warn(e); + sleepFor(5000); } ++connectionRetryCount; } return success; } - private void createConnection() throws IOException { + private void createConnection() throws IOException, KeeperException { if (zkClient != null) { try { zkClient.close(); @@ -973,14 +993,76 @@ public class ActiveStandbyElector implements StatCallback, StringCallback { * events. */ private final class WatcherWithClientRef implements Watcher { - private final ZooKeeper zk; + private ZooKeeper zk; + + /** + * Latch fired whenever any event arrives. This is used in order + * to wait for the Connected event when the client is first created. + */ + private CountDownLatch hasReceivedEvent = new CountDownLatch(1); + + /** + * If any events arrive before the reference to ZooKeeper is set, + * they get queued up and later forwarded when the reference is + * available. + */ + private final List queuedEvents = Lists.newLinkedList(); + + private WatcherWithClientRef() { + } private WatcherWithClientRef(ZooKeeper zk) { this.zk = zk; } + + /** + * Waits for the next event from ZooKeeper to arrive. + * + * @param connectionTimeoutMs zookeeper connection timeout in milliseconds + * @throws KeeperException if the connection attempt times out. This will + * be a ZooKeeper ConnectionLoss exception code. + * @throws IOException if interrupted while connecting to ZooKeeper + */ + private void waitForZKConnectionEvent(int connectionTimeoutMs) + throws KeeperException, IOException { + try { + if (!hasReceivedEvent.await(connectionTimeoutMs, TimeUnit.MILLISECONDS)) { + LOG.error("Connection timed out: couldn't connect to ZooKeeper in " + + connectionTimeoutMs + " milliseconds"); + synchronized (this) { + zk.close(); + } + throw KeeperException.create(Code.CONNECTIONLOSS); + } + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new IOException( + "Interrupted when connecting to zookeeper server", e); + } + } + + private synchronized void setZooKeeperRef(ZooKeeper zk) { + Preconditions.checkState(this.zk == null, + "zk already set -- must be set exactly once"); + this.zk = zk; + + for (WatchedEvent e : queuedEvents) { + forwardEvent(e); + } + queuedEvents.clear(); + } @Override - public void process(WatchedEvent event) { + public synchronized void process(WatchedEvent event) { + if (zk != null) { + forwardEvent(event); + } else { + queuedEvents.add(event); + } + } + + private void forwardEvent(WatchedEvent event) { + hasReceivedEvent.countDown(); try { ActiveStandbyElector.this.processWatchEvent( zk, event); @@ -1024,5 +1106,4 @@ public class ActiveStandbyElector implements StatCallback, StringCallback { ((appData == null) ? "null" : StringUtils.byteToHexString(appData)) + " cb=" + appClient; } - } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ZKFailoverController.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ZKFailoverController.java index 35d75b72ae5..712c1d0f182 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ZKFailoverController.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ZKFailoverController.java @@ -180,7 +180,15 @@ public abstract class ZKFailoverController { private int doRun(String[] args) throws HadoopIllegalArgumentException, IOException, InterruptedException { - initZK(); + try { + initZK(); + } catch (KeeperException ke) { + LOG.fatal("Unable to start failover controller. Unable to connect " + + "to ZooKeeper quorum at " + zkQuorum + ". Please check the " + + "configured value for " + ZK_QUORUM_KEY + " and ensure that " + + "ZooKeeper is running."); + return ERR_CODE_NO_ZK; + } if (args.length > 0) { if ("-formatZK".equals(args[0])) { boolean force = false; @@ -199,24 +207,12 @@ public abstract class ZKFailoverController { badArg(args[0]); } } - - try { - if (!elector.parentZNodeExists()) { - LOG.fatal("Unable to start failover controller. " + - "Parent znode does not exist.\n" + - "Run with -formatZK flag to initialize ZooKeeper."); - return ERR_CODE_NO_PARENT_ZNODE; - } - } catch (IOException ioe) { - if (ioe.getCause() instanceof KeeperException.ConnectionLossException) { - LOG.fatal("Unable to start failover controller. Unable to connect " + - "to ZooKeeper quorum at " + zkQuorum + ". Please check the " + - "configured value for " + ZK_QUORUM_KEY + " and ensure that " + - "ZooKeeper is running."); - return ERR_CODE_NO_ZK; - } else { - throw ioe; - } + + if (!elector.parentZNodeExists()) { + LOG.fatal("Unable to start failover controller. " + + "Parent znode does not exist.\n" + + "Run with -formatZK flag to initialize ZooKeeper."); + return ERR_CODE_NO_PARENT_ZNODE; } try { @@ -310,7 +306,8 @@ public abstract class ZKFailoverController { } - private void initZK() throws HadoopIllegalArgumentException, IOException { + private void initZK() throws HadoopIllegalArgumentException, IOException, + KeeperException { zkQuorum = conf.get(ZK_QUORUM_KEY); int zkTimeout = conf.getInt(ZK_SESSION_TIMEOUT_KEY, ZK_SESSION_TIMEOUT_DEFAULT); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java index 9bea3db4ef1..f5376a33962 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java @@ -38,6 +38,11 @@ import java.util.Iterator; import java.util.Map.Entry; import java.util.Random; import java.util.Set; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.RejectedExecutionException; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; @@ -58,11 +63,10 @@ import org.apache.hadoop.io.WritableUtils; import org.apache.hadoop.io.retry.RetryPolicies; import org.apache.hadoop.io.retry.RetryPolicy; import org.apache.hadoop.io.retry.RetryPolicy.RetryAction; -import org.apache.hadoop.ipc.protobuf.IpcConnectionContextProtos.IpcConnectionContextProto; -import org.apache.hadoop.ipc.protobuf.RpcPayloadHeaderProtos.RpcPayloadHeaderProto; -import org.apache.hadoop.ipc.protobuf.RpcPayloadHeaderProtos.RpcPayloadOperationProto; -import org.apache.hadoop.ipc.protobuf.RpcPayloadHeaderProtos.RpcResponseHeaderProto; -import org.apache.hadoop.ipc.protobuf.RpcPayloadHeaderProtos.RpcStatusProto; +import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos.RpcRequestHeaderProto; +import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos.RpcRequestHeaderProto.OperationProto; +import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos.RpcResponseHeaderProto; +import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos.RpcResponseHeaderProto.RpcStatusProto; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.security.KerberosInfo; import org.apache.hadoop.security.SaslRpcClient; @@ -78,6 +82,8 @@ import org.apache.hadoop.util.ProtoUtil; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.Time; +import com.google.common.util.concurrent.ThreadFactoryBuilder; + /** A client for an IPC service. IPC calls take a single {@link Writable} as a * parameter, and return a {@link Writable} as their value. A service runs on * a port and is defined by a parameter class and a value class. @@ -103,6 +109,19 @@ public class Client { final static int PING_CALL_ID = -1; + /** + * Executor on which IPC calls' parameters are sent. Deferring + * the sending of parameters to a separate thread isolates them + * from thread interruptions in the calling code. + */ + private static final ExecutorService SEND_PARAMS_EXECUTOR = + Executors.newCachedThreadPool( + new ThreadFactoryBuilder() + .setDaemon(true) + .setNameFormat("IPC Parameter Sending Thread #%d") + .build()); + + /** * set the ping interval value in configuration * @@ -171,7 +190,7 @@ public class Client { */ private class Call { final int id; // call id - final Writable rpcRequest; // the serialized rpc request - RpcPayload + final Writable rpcRequest; // the serialized rpc request Writable rpcResponse; // null if rpc has error IOException error; // exception, null if success final RPC.RpcKind rpcKind; // Rpc EngineKind @@ -245,6 +264,8 @@ public class Client { private AtomicLong lastActivity = new AtomicLong();// last I/O activity time private AtomicBoolean shouldCloseConnection = new AtomicBoolean(); // indicate if the connection is closed private IOException closeException; // close reason + + private final Object sendRpcRequestLock = new Object(); public Connection(ConnectionId remoteId) throws IOException { this.remoteId = remoteId; @@ -746,7 +767,7 @@ public class Client { remoteId.getTicket(), authMethod).writeTo(buf); - // Write out the payload length + // Write out the packet length int bufLen = buf.getLength(); out.writeInt(bufLen); @@ -810,7 +831,7 @@ public class Client { try { while (waitForWork()) {//wait here for work - read or close connection - receiveResponse(); + receiveRpcResponse(); } } catch (Throwable t) { // This truly is unexpected, since we catch IOException in receiveResponse @@ -827,52 +848,86 @@ public class Client { + connections.size()); } - /** Initiates a call by sending the parameter to the remote server. + /** Initiates a rpc call by sending the rpc request to the remote server. * Note: this is not called from the Connection thread, but by other * threads. + * @param call - the rpc request */ - public void sendParam(Call call) { + public void sendRpcRequest(final Call call) + throws InterruptedException, IOException { if (shouldCloseConnection.get()) { return; } - DataOutputBuffer d=null; - try { - synchronized (this.out) { - if (LOG.isDebugEnabled()) - LOG.debug(getName() + " sending #" + call.id); + // Serialize the call to be sent. This is done from the actual + // caller thread, rather than the SEND_PARAMS_EXECUTOR thread, + // so that if the serialization throws an error, it is reported + // properly. This also parallelizes the serialization. + // + // Format of a call on the wire: + // 0) Length of rest below (1 + 2) + // 1) RpcRequestHeader - is serialized Delimited hence contains length + // 2) RpcRequest + // + // Items '1' and '2' are prepared here. + final DataOutputBuffer d = new DataOutputBuffer(); + RpcRequestHeaderProto header = ProtoUtil.makeRpcRequestHeader( + call.rpcKind, OperationProto.RPC_FINAL_PACKET, call.id); + header.writeDelimitedTo(d); + call.rpcRequest.write(d); + + synchronized (sendRpcRequestLock) { + Future senderFuture = SEND_PARAMS_EXECUTOR.submit(new Runnable() { + @Override + public void run() { + try { + synchronized (Connection.this.out) { + if (shouldCloseConnection.get()) { + return; + } + + if (LOG.isDebugEnabled()) + LOG.debug(getName() + " sending #" + call.id); + + byte[] data = d.getData(); + int totalLength = d.getLength(); + out.writeInt(totalLength); // Total Length + out.write(data, 0, totalLength);// RpcRequestHeader + RpcRequest + out.flush(); + } + } catch (IOException e) { + // exception at this point would leave the connection in an + // unrecoverable state (eg half a call left on the wire). + // So, close the connection, killing any outstanding calls + markClosed(e); + } finally { + //the buffer is just an in-memory buffer, but it is still polite to + // close early + IOUtils.closeStream(d); + } + } + }); + + try { + senderFuture.get(); + } catch (ExecutionException e) { + Throwable cause = e.getCause(); - // Serializing the data to be written. - // Format: - // 0) Length of rest below (1 + 2) - // 1) PayloadHeader - is serialized Delimited hence contains length - // 2) the Payload - the RpcRequest - // - d = new DataOutputBuffer(); - RpcPayloadHeaderProto header = ProtoUtil.makeRpcPayloadHeader( - call.rpcKind, RpcPayloadOperationProto.RPC_FINAL_PAYLOAD, call.id); - header.writeDelimitedTo(d); - call.rpcRequest.write(d); - byte[] data = d.getData(); - - int totalLength = d.getLength(); - out.writeInt(totalLength); // Total Length - out.write(data, 0, totalLength);//PayloadHeader + RpcRequest - out.flush(); + // cause should only be a RuntimeException as the Runnable above + // catches IOException + if (cause instanceof RuntimeException) { + throw (RuntimeException) cause; + } else { + throw new RuntimeException("unexpected checked exception", cause); + } } - } catch(IOException e) { - markClosed(e); - } finally { - //the buffer is just an in-memory buffer, but it is still polite to - // close early - IOUtils.closeStream(d); } - } + } /* Receive a response. * Because only one receiver, so no synchronization on in. */ - private void receiveResponse() { + private void receiveRpcResponse() { if (shouldCloseConnection.get()) { return; } @@ -1138,7 +1193,16 @@ public class Client { ConnectionId remoteId) throws InterruptedException, IOException { Call call = new Call(rpcKind, rpcRequest); Connection connection = getConnection(remoteId, call); - connection.sendParam(call); // send the parameter + try { + connection.sendRpcRequest(call); // send the rpc request + } catch (RejectedExecutionException e) { + throw new IOException("connection has been closed", e); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + LOG.warn("interrupted waiting to send rpc request to server", e); + throw new IOException(e); + } + boolean interrupted = false; synchronized (call) { while (!call.done) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngine.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngine.java index 3a33abe6a65..198a618947e 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngine.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngine.java @@ -39,7 +39,7 @@ import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.retry.RetryPolicy; import org.apache.hadoop.ipc.Client.ConnectionId; import org.apache.hadoop.ipc.RPC.RpcInvoker; -import org.apache.hadoop.ipc.protobuf.HadoopRpcProtos.HadoopRpcRequestProto; +import org.apache.hadoop.ipc.protobuf.ProtobufRpcEngineProtos.RequestProto; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.SecretManager; import org.apache.hadoop.security.token.TokenIdentifier; @@ -128,10 +128,10 @@ public class ProtobufRpcEngine implements RpcEngine { .getProtocolVersion(protocol); } - private HadoopRpcRequestProto constructRpcRequest(Method method, + private RequestProto constructRpcRequest(Method method, Object[] params) throws ServiceException { - HadoopRpcRequestProto rpcRequest; - HadoopRpcRequestProto.Builder builder = HadoopRpcRequestProto + RequestProto rpcRequest; + RequestProto.Builder builder = RequestProto .newBuilder(); builder.setMethodName(method.getName()); @@ -190,7 +190,7 @@ public class ProtobufRpcEngine implements RpcEngine { startTime = Time.now(); } - HadoopRpcRequestProto rpcRequest = constructRpcRequest(method, args); + RequestProto rpcRequest = constructRpcRequest(method, args); RpcResponseWritable val = null; if (LOG.isTraceEnabled()) { @@ -271,13 +271,13 @@ public class ProtobufRpcEngine implements RpcEngine { * Writable Wrapper for Protocol Buffer Requests */ private static class RpcRequestWritable implements Writable { - HadoopRpcRequestProto message; + RequestProto message; @SuppressWarnings("unused") public RpcRequestWritable() { } - RpcRequestWritable(HadoopRpcRequestProto message) { + RpcRequestWritable(RequestProto message) { this.message = message; } @@ -292,7 +292,7 @@ public class ProtobufRpcEngine implements RpcEngine { int length = ProtoUtil.readRawVarint32(in); byte[] bytes = new byte[length]; in.readFully(bytes); - message = HadoopRpcRequestProto.parseFrom(bytes); + message = RequestProto.parseFrom(bytes); } @Override @@ -426,7 +426,7 @@ public class ProtobufRpcEngine implements RpcEngine { public Writable call(RPC.Server server, String connectionProtocolName, Writable writableRequest, long receiveTime) throws Exception { RpcRequestWritable request = (RpcRequestWritable) writableRequest; - HadoopRpcRequestProto rpcRequest = request.message; + RequestProto rpcRequest = request.message; String methodName = rpcRequest.getMethodName(); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java index 093aadaa091..ef8687f35e4 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java @@ -80,7 +80,8 @@ import org.apache.hadoop.ipc.RPC.VersionMismatch; import org.apache.hadoop.ipc.metrics.RpcDetailedMetrics; import org.apache.hadoop.ipc.metrics.RpcMetrics; import org.apache.hadoop.ipc.protobuf.IpcConnectionContextProtos.IpcConnectionContextProto; -import org.apache.hadoop.ipc.protobuf.RpcPayloadHeaderProtos.*; +import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos.RpcResponseHeaderProto.RpcStatusProto; +import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos.*; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.security.SaslRpcServer; @@ -160,7 +161,7 @@ public abstract class Server { public static final ByteBuffer HEADER = ByteBuffer.wrap("hrpc".getBytes()); /** - * Serialization type for ConnectionContext and RpcPayloadHeader + * Serialization type for ConnectionContext and RpcRequestHeader */ public enum IpcSerializationType { // Add new serialization type to the end without affecting the enum order @@ -197,7 +198,7 @@ public abstract class Server { // 4 : Introduced SASL security layer // 5 : Introduced use of {@link ArrayPrimitiveWritable$Internal} // in ObjectWritable to efficiently transmit arrays of primitives - // 6 : Made RPC payload header explicit + // 6 : Made RPC Request header explicit // 7 : Changed Ipc Connection Header to use Protocol buffers // 8 : SASL server always sends a final response public static final byte CURRENT_VERSION = 8; @@ -1637,14 +1638,15 @@ public abstract class Server { private void processData(byte[] buf) throws IOException, InterruptedException { DataInputStream dis = new DataInputStream(new ByteArrayInputStream(buf)); - RpcPayloadHeaderProto header = RpcPayloadHeaderProto.parseDelimitedFrom(dis); + RpcRequestHeaderProto header = RpcRequestHeaderProto.parseDelimitedFrom(dis); if (LOG.isDebugEnabled()) LOG.debug(" got #" + header.getCallId()); if (!header.hasRpcOp()) { - throw new IOException(" IPC Server: No rpc op in rpcPayloadHeader"); + throw new IOException(" IPC Server: No rpc op in rpcRequestHeader"); } - if (header.getRpcOp() != RpcPayloadOperationProto.RPC_FINAL_PAYLOAD) { + if (header.getRpcOp() != + RpcRequestHeaderProto.OperationProto.RPC_FINAL_PACKET) { throw new IOException("IPC Server does not implement operation" + header.getRpcOp()); } @@ -1652,7 +1654,7 @@ public abstract class Server { // (Note it would make more sense to have the handler deserialize but // we continue with this original design. if (!header.hasRpcKind()) { - throw new IOException(" IPC Server: No rpc kind in rpcPayloadHeader"); + throw new IOException(" IPC Server: No rpc kind in rpcRequestHeader"); } Class rpcRequestClass = getRpcRequestWrapper(header.getRpcKind()); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/JniBasedUnixGroupsMappingWithFallback.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/JniBasedUnixGroupsMappingWithFallback.java index 5b6d538f8a9..908ca1468d1 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/JniBasedUnixGroupsMappingWithFallback.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/JniBasedUnixGroupsMappingWithFallback.java @@ -37,7 +37,7 @@ public class JniBasedUnixGroupsMappingWithFallback implements if (NativeCodeLoader.isNativeCodeLoaded()) { this.impl = new JniBasedUnixGroupsMapping(); } else { - LOG.info("Falling back to shell based"); + LOG.debug("Falling back to shell based"); this.impl = new ShellBasedUnixGroupsMapping(); } if (LOG.isDebugEnabled()){ diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java index 9260fbe9f53..b5cb5b518a4 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java @@ -81,6 +81,7 @@ public class UserGroupInformation { */ private static final float TICKET_RENEW_WINDOW = 0.80f; static final String HADOOP_USER_NAME = "HADOOP_USER_NAME"; + static final String HADOOP_PROXY_USER = "HADOOP_PROXY_USER"; /** * UgiMetrics maintains UGI activity statistics @@ -641,10 +642,18 @@ public class UserGroupInformation { newLoginContext(authenticationMethod.getLoginAppName(), subject, new HadoopConfiguration()); login.login(); - loginUser = new UserGroupInformation(subject); - loginUser.setLogin(login); - loginUser.setAuthenticationMethod(authenticationMethod); - loginUser = new UserGroupInformation(login.getSubject()); + UserGroupInformation realUser = new UserGroupInformation(subject); + realUser.setLogin(login); + realUser.setAuthenticationMethod(authenticationMethod); + realUser = new UserGroupInformation(login.getSubject()); + // If the HADOOP_PROXY_USER environment variable or property + // is specified, create a proxy user as the logged in user. + String proxyUser = System.getenv(HADOOP_PROXY_USER); + if (proxyUser == null) { + proxyUser = System.getProperty(HADOOP_PROXY_USER); + } + loginUser = proxyUser == null ? realUser : createProxyUser(proxyUser, realUser); + String fileLocation = System.getenv(HADOOP_TOKEN_FILE_LOCATION); if (fileLocation != null) { // load the token storage file and put all of the tokens into the diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeLibraryChecker.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeLibraryChecker.java new file mode 100644 index 00000000000..f4cf2f53314 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeLibraryChecker.java @@ -0,0 +1,75 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util; + +import org.apache.hadoop.util.NativeCodeLoader; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.compress.SnappyCodec; +import org.apache.hadoop.io.compress.zlib.ZlibFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +@InterfaceAudience.Private +@InterfaceStability.Unstable +public class NativeLibraryChecker { + /** + * A tool to test native library availability, + */ + public static void main(String[] args) { + String usage = "NativeLibraryChecker [-a|-h]\n" + + " -a use -a to check all libraries are available\n" + + " by default just check hadoop library is available\n" + + " exit with error code if check failed\n" + + " -h print this message\n"; + if (args.length > 1 || + (args.length == 1 && + !(args[0].equals("-a") || args[0].equals("-h")))) { + System.err.println(usage); + ExitUtil.terminate(1); + } + boolean checkAll = false; + if (args.length == 1) { + if (args[0].equals("-h")) { + System.out.println(usage); + return; + } + checkAll = true; + } + boolean nativeHadoopLoaded = NativeCodeLoader.isNativeCodeLoaded(); + boolean zlibLoaded = false; + boolean snappyLoaded = false; + // lz4 is linked within libhadoop + boolean lz4Loaded = nativeHadoopLoaded; + if (nativeHadoopLoaded) { + zlibLoaded = ZlibFactory.isNativeZlibLoaded(new Configuration()); + snappyLoaded = NativeCodeLoader.buildSupportsSnappy() && + SnappyCodec.isNativeCodeLoaded(); + } + System.out.println("Native library checking:"); + System.out.printf("hadoop: %b\n", nativeHadoopLoaded); + System.out.printf("zlib: %b\n", zlibLoaded); + System.out.printf("snappy: %b\n", snappyLoaded); + System.out.printf("lz4: %b\n", lz4Loaded); + if ((!nativeHadoopLoaded) || + (checkAll && !(zlibLoaded && snappyLoaded && lz4Loaded))) { + // return 1 to indicated check failed + ExitUtil.terminate(1); + } + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ProtoUtil.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ProtoUtil.java index 0618f0631c8..bec2e85af85 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ProtoUtil.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ProtoUtil.java @@ -24,7 +24,7 @@ import java.io.IOException; import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.ipc.protobuf.IpcConnectionContextProtos.IpcConnectionContextProto; import org.apache.hadoop.ipc.protobuf.IpcConnectionContextProtos.UserInformationProto; -import org.apache.hadoop.ipc.protobuf.RpcPayloadHeaderProtos.*; +import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos.*; import org.apache.hadoop.security.SaslRpcServer.AuthMethod; import org.apache.hadoop.security.UserGroupInformation; @@ -157,9 +157,9 @@ public abstract class ProtoUtil { return null; } - public static RpcPayloadHeaderProto makeRpcPayloadHeader(RPC.RpcKind rpcKind, - RpcPayloadOperationProto operation, int callId) { - RpcPayloadHeaderProto.Builder result = RpcPayloadHeaderProto.newBuilder(); + public static RpcRequestHeaderProto makeRpcRequestHeader(RPC.RpcKind rpcKind, + RpcRequestHeaderProto.OperationProto operation, int callId) { + RpcRequestHeaderProto.Builder result = RpcRequestHeaderProto.newBuilder(); result.setRpcKind(convert(rpcKind)).setRpcOp(operation).setCallId(callId); return result.build(); } diff --git a/hadoop-common-project/hadoop-common/src/main/proto/hadoop_rpc.proto b/hadoop-common-project/hadoop-common/src/main/proto/ProtobufRpcEngine.proto similarity index 82% rename from hadoop-common-project/hadoop-common/src/main/proto/hadoop_rpc.proto rename to hadoop-common-project/hadoop-common/src/main/proto/ProtobufRpcEngine.proto index cea0eaa432f..c0bb23587a2 100644 --- a/hadoop-common-project/hadoop-common/src/main/proto/hadoop_rpc.proto +++ b/hadoop-common-project/hadoop-common/src/main/proto/ProtobufRpcEngine.proto @@ -17,11 +17,13 @@ */ /** - * These are the messages used by Hadoop RPC to marshal the - * request and response in the RPC layer. + * These are the messages used by Hadoop RPC for the Rpc Engine Protocol Buffer + * to marshal the request and response in the RPC layer. + * The messages are sent in addition to the normal RPC header as + * defined in RpcHeader.proto */ option java_package = "org.apache.hadoop.ipc.protobuf"; -option java_outer_classname = "HadoopRpcProtos"; +option java_outer_classname = "ProtobufRpcEngineProtos"; option java_generate_equals_and_hash = true; package hadoop.common; @@ -29,10 +31,11 @@ package hadoop.common; * This message is used for Protobuf Rpc Engine. * The message is used to marshal a Rpc-request * from RPC client to the RPC server. - * The Response to the Rpc call (including errors) are handled - * as part of the standard Rpc response. + * + * No special header is needed for the Rpc Response for Protobuf Rpc Engine. + * The normal RPC response header (see RpcHeader.proto) are sufficient. */ -message HadoopRpcRequestProto { +message RequestProto { /** Name of the RPC method */ required string methodName = 1; diff --git a/hadoop-common-project/hadoop-common/src/main/proto/RpcHeader.proto b/hadoop-common-project/hadoop-common/src/main/proto/RpcHeader.proto new file mode 100644 index 00000000000..723434bf2fe --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/proto/RpcHeader.proto @@ -0,0 +1,92 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +option java_package = "org.apache.hadoop.ipc.protobuf"; +option java_outer_classname = "RpcHeaderProtos"; +option java_generate_equals_and_hash = true; +package hadoop.common; + +/** + * This is the rpc request header. It is sent with every rpc call. + * + * The format of RPC call is as follows: + * +--------------------------------------------------------------+ + * | Rpc length in bytes (4 bytes int) sum of next two parts | + * +--------------------------------------------------------------+ + * | RpcRequestHeaderProto - serialized delimited ie has len | + * +--------------------------------------------------------------+ + * | RpcRequest The actual rpc request | + * | This request is serialized based on RpcKindProto | + * +--------------------------------------------------------------+ + * + */ + +/** + * RpcKind determine the rpcEngine and the serialization of the rpc request + */ +enum RpcKindProto { + RPC_BUILTIN = 0; // Used for built in calls by tests + RPC_WRITABLE = 1; // Use WritableRpcEngine + RPC_PROTOCOL_BUFFER = 2; // Use ProtobufRpcEngine +} + + + +message RpcRequestHeaderProto { // the header for the RpcRequest + enum OperationProto { + RPC_FINAL_PACKET = 0; // The final RPC Packet + RPC_CONTINUATION_PACKET = 1; // not implemented yet + RPC_CLOSE_CONNECTION = 2; // close the rpc connection + } + + optional RpcKindProto rpcKind = 1; + optional OperationProto rpcOp = 2; + required uint32 callId = 3; // each rpc has a callId that is also used in response +} + + + +/** + * Rpc Response Header + * ** If request is successfull response is returned as below ******** + * +------------------------------------------------------------------+ + * | Rpc reponse length in bytes (4 bytes int) | + * | (sum of next two parts) | + * +------------------------------------------------------------------+ + * | RpcResponseHeaderProto - serialized delimited ie has len | + * +------------------------------------------------------------------+ + * | if request is successful: | + * | - RpcResponse - The actual rpc response bytes | + * | This response is serialized based on RpcKindProto | + * | if request fails : | + * | - length (4 byte int) + Class name of exception - UTF-8 string | + * | - length (4 byte int) + Stacktrace - UTF-8 string | + * | if the strings are null then the length is -1 | + * +------------------------------------------------------------------+ + * + */ +message RpcResponseHeaderProto { + enum RpcStatusProto { + SUCCESS = 0; // RPC succeeded + ERROR = 1; // RPC Failed + FATAL = 2; // Fatal error - connection is closed + } + + required uint32 callId = 1; // callId used in Request + required RpcStatusProto status = 2; + optional uint32 serverIpcVersionNum = 3; // in case of an fatal IPC error +} diff --git a/hadoop-common-project/hadoop-common/src/main/proto/RpcPayloadHeader.proto b/hadoop-common-project/hadoop-common/src/main/proto/RpcPayloadHeader.proto deleted file mode 100644 index feb90124412..00000000000 --- a/hadoop-common-project/hadoop-common/src/main/proto/RpcPayloadHeader.proto +++ /dev/null @@ -1,78 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -option java_package = "org.apache.hadoop.ipc.protobuf"; -option java_outer_classname = "RpcPayloadHeaderProtos"; -option java_generate_equals_and_hash = true; -package hadoop.common; - -/** - * This is the rpc payload header. It is sent with every rpc call. - * - * The format of RPC call is as follows: - * +-----------------------------------------------------+ - * | Rpc length in bytes | - * +-----------------------------------------------------+ - * | RpcPayloadHeader - serialized delimited ie has len | - * +-----------------------------------------------------+ - * | RpcRequest Payload | - * +-----------------------------------------------------+ - * - */ - -/** - * RpcKind determine the rpcEngine and the serialization of the rpc payload - */ -enum RpcKindProto { - RPC_BUILTIN = 0; // Used for built in calls by tests - RPC_WRITABLE = 1; // Use WritableRpcEngine - RPC_PROTOCOL_BUFFER = 2; // Use ProtobufRpcEngine -} - -enum RpcPayloadOperationProto { - RPC_FINAL_PAYLOAD = 0; // The final payload - RPC_CONTINUATION_PAYLOAD = 1; // not implemented yet - RPC_CLOSE_CONNECTION = 2; // close the rpc connection -} - -message RpcPayloadHeaderProto { // the header for the RpcRequest - optional RpcKindProto rpcKind = 1; - optional RpcPayloadOperationProto rpcOp = 2; - required uint32 callId = 3; // each rpc has a callId that is also used in response -} - -enum RpcStatusProto { - SUCCESS = 0; // RPC succeeded - ERROR = 1; // RPC Failed - FATAL = 2; // Fatal error - connection is closed -} - -/** - * Rpc Response Header - * - If successfull then the Respose follows after this header - * - length (4 byte int), followed by the response - * - If error or fatal - the exception info follow - * - length (4 byte int) Class name of exception - UTF-8 string - * - length (4 byte int) Stacktrace - UTF-8 string - * - if the strings are null then the length is -1 - * In case of Fatal error then the respose contains the Serverside's IPC version - */ -message RpcResponseHeaderProto { - required uint32 callId = 1; // callId used in Request - required RpcStatusProto status = 2; - optional uint32 serverIpcVersionNum = 3; // in case of an fatal IPC error -} diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml index bd6e9420305..b020610ba79 100644 --- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml +++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml @@ -1090,4 +1090,70 @@ + + + + ha.health-monitor.connect-retry-interval.ms + 1000 + + How often to retry connecting to the service. + + + + + ha.health-monitor.check-interval.ms + 1000 + + How often to check the service. + + + + + ha.health-monitor.sleep-after-disconnect.ms + 1000 + + How long to sleep after an unexpected RPC error. + + + + + ha.health-monitor.rpc-timeout.ms + 45000 + + Timeout for the actual monitorHealth() calls. + + + + + ha.failover-controller.new-active.rpc-timeout.ms + 60000 + + Timeout that the FC waits for the new active to become active + + + + + ha.failover-controller.graceful-fence.rpc-timeout.ms + 5000 + + Timeout that the FC waits for the old active to go to standby + + + + + ha.failover-controller.graceful-fence.connection.retries + 1 + + FC connection retries for graceful fencing + + + + + ha.failover-controller.cli-check.rpc-timeout.ms + 20000 + + Timeout that the CLI (manual) FC waits for monitorHealth, getServiceState + + + diff --git a/hadoop-common-project/hadoop-common/src/site/apt/ClusterSetup.apt.vm b/hadoop-common-project/hadoop-common/src/site/apt/ClusterSetup.apt.vm index 44fcfc74e67..5f28d7c3072 100644 --- a/hadoop-common-project/hadoop-common/src/site/apt/ClusterSetup.apt.vm +++ b/hadoop-common-project/hadoop-common/src/site/apt/ClusterSetup.apt.vm @@ -16,82 +16,82 @@ --- ${maven.build.timestamp} -Hadoop MapReduce Next Generation - Cluster Setup - - \[ {{{./index.html}Go Back}} \] + \[ {{{../index.html}Go Back}} \] %{toc|section=1|fromDepth=0} +Hadoop MapReduce Next Generation - Cluster Setup + * {Purpose} - This document describes how to install, configure and manage non-trivial - Hadoop clusters ranging from a few nodes to extremely large clusters + This document describes how to install, configure and manage non-trivial + Hadoop clusters ranging from a few nodes to extremely large clusters with thousands of nodes. - To play with Hadoop, you may first want to install it on a single + To play with Hadoop, you may first want to install it on a single machine (see {{{SingleCluster}Single Node Setup}}). - + * {Prerequisites} Download a stable version of Hadoop from Apache mirrors. - + * {Installation} - Installing a Hadoop cluster typically involves unpacking the software on all + Installing a Hadoop cluster typically involves unpacking the software on all the machines in the cluster or installing RPMs. - Typically one machine in the cluster is designated as the NameNode and - another machine the as ResourceManager, exclusively. These are the masters. - - The rest of the machines in the cluster act as both DataNode and NodeManager. + Typically one machine in the cluster is designated as the NameNode and + another machine the as ResourceManager, exclusively. These are the masters. + + The rest of the machines in the cluster act as both DataNode and NodeManager. These are the slaves. * {Running Hadoop in Non-Secure Mode} The following sections describe how to configure a Hadoop cluster. - * {Configuration Files} - + {Configuration Files} + Hadoop configuration is driven by two types of important configuration files: - * Read-only default configuration - <<>>, - <<>>, <<>> and + * Read-only default configuration - <<>>, + <<>>, <<>> and <<>>. - - * Site-specific configuration - <>, - <>, <> and + + * Site-specific configuration - <>, + <>, <> and <>. - Additionally, you can control the Hadoop scripts found in the bin/ - directory of the distribution, by setting site-specific values via the + Additionally, you can control the Hadoop scripts found in the bin/ + directory of the distribution, by setting site-specific values via the <> and <>. - * {Site Configuration} - - To configure the Hadoop cluster you will need to configure the - <<>> in which the Hadoop daemons execute as well as the + {Site Configuration} + + To configure the Hadoop cluster you will need to configure the + <<>> in which the Hadoop daemons execute as well as the <<>> for the Hadoop daemons. The Hadoop daemons are NameNode/DataNode and ResourceManager/NodeManager. - * {Configuring Environment of Hadoop Daemons} - - Administrators should use the <> and - <> script to do site-specific customization of the - Hadoop daemons' process environment. +** {Configuring Environment of Hadoop Daemons} - At the very least you should specify the <<>> so that it is - correctly defined on each remote node. + Administrators should use the <> and + <> script to do site-specific customization of the + Hadoop daemons' process environment. - In most cases you should also specify <<>> and - <<>> to point to directories that can only be - written to by the users that are going to run the hadoop daemons. - Otherwise there is the potential for a symlink attack. + At the very least you should specify the <<>> so that it is + correctly defined on each remote node. - Administrators can configure individual daemons using the configuration - options shown below in the table: + In most cases you should also specify <<>> and + <<>> to point to directories that can only be + written to by the users that are going to run the hadoop daemons. + Otherwise there is the potential for a symlink attack. + + Administrators can configure individual daemons using the configuration + options shown below in the table: *--------------------------------------+--------------------------------------+ || Daemon || Environment Variable | @@ -112,24 +112,25 @@ Hadoop MapReduce Next Generation - Cluster Setup *--------------------------------------+--------------------------------------+ - For example, To configure Namenode to use parallelGC, the following - statement should be added in hadoop-env.sh : - ----- - export HADOOP_NAMENODE_OPTS="-XX:+UseParallelGC ${HADOOP_NAMENODE_OPTS}" ----- - - Other useful configuration parameters that you can customize include: + For example, To configure Namenode to use parallelGC, the following + statement should be added in hadoop-env.sh : + +---- + export HADOOP_NAMENODE_OPTS="-XX:+UseParallelGC ${HADOOP_NAMENODE_OPTS}" +---- + + Other useful configuration parameters that you can customize include: + + * <<>> / <<>> - The directory where the + daemons' log files are stored. They are automatically created if they + don't exist. + + * <<>> / <<>> - The maximum amount of + heapsize to use, in MB e.g. if the varibale is set to 1000 the heap + will be set to 1000MB. This is used to configure the heap + size for the daemon. By default, the value is 1000. If you want to + configure the values separately for each deamon you can use. - * <<>> / <<>> - The directory where the - daemons' log files are stored. They are automatically created if they - don't exist. - - * <<>> / <<>> - The maximum amount of - heapsize to use, in MB e.g. if the varibale is set to 1000 the heap - will be set to 1000MB. This is used to configure the heap - size for the daemon. By default, the value is 1000. If you want to - configure the values separately for each deamon you can use. *--------------------------------------+--------------------------------------+ || Daemon || Environment Variable | *--------------------------------------+--------------------------------------+ @@ -141,14 +142,14 @@ Hadoop MapReduce Next Generation - Cluster Setup *--------------------------------------+--------------------------------------+ | Map Reduce Job History Server | HADOOP_JOB_HISTORYSERVER_HEAPSIZE | *--------------------------------------+--------------------------------------+ - - * {Configuring the Hadoop Daemons in Non-Secure Mode} - This section deals with important parameters to be specified in - the given configuration files: - - * <<>> - +** {Configuring the Hadoop Daemons in Non-Secure Mode} + + This section deals with important parameters to be specified in + the given configuration files: + + * <<>> + *-------------------------+-------------------------+------------------------+ || Parameter || Value || Notes | *-------------------------+-------------------------+------------------------+ @@ -158,16 +159,16 @@ Hadoop MapReduce Next Generation - Cluster Setup | | | Size of read/write buffer used in SequenceFiles. | *-------------------------+-------------------------+------------------------+ - * <<>> - - * Configurations for NameNode: + * <<>> + + * Configurations for NameNode: *-------------------------+-------------------------+------------------------+ || Parameter || Value || Notes | *-------------------------+-------------------------+------------------------+ -| <<>> | | | -| | Path on the local filesystem where the NameNode stores the namespace | | -| | and transactions logs persistently. | | +| <<>> | | | +| | Path on the local filesystem where the NameNode stores the namespace | | +| | and transactions logs persistently. | | | | | If this is a comma-delimited list of directories then the name table is | | | | replicated in all of the directories, for redundancy. | *-------------------------+-------------------------+------------------------+ @@ -177,28 +178,28 @@ Hadoop MapReduce Next Generation - Cluster Setup | | | datanodes. | *-------------------------+-------------------------+------------------------+ | <<>> | 268435456 | | -| | | HDFS blocksize of 256MB for large file-systems. | +| | | HDFS blocksize of 256MB for large file-systems. | *-------------------------+-------------------------+------------------------+ | <<>> | 100 | | | | | More NameNode server threads to handle RPCs from large number of | | | | DataNodes. | *-------------------------+-------------------------+------------------------+ - - * Configurations for DataNode: - + + * Configurations for DataNode: + *-------------------------+-------------------------+------------------------+ || Parameter || Value || Notes | *-------------------------+-------------------------+------------------------+ | <<>> | | | -| | Comma separated list of paths on the local filesystem of a | | +| | Comma separated list of paths on the local filesystem of a | | | | <<>> where it should store its blocks. | | -| | | If this is a comma-delimited list of directories, then data will be | +| | | If this is a comma-delimited list of directories, then data will be | | | | stored in all named directories, typically on different devices. | *-------------------------+-------------------------+------------------------+ - - * <<>> - * Configurations for ResourceManager and NodeManager: + * <<>> + + * Configurations for ResourceManager and NodeManager: *-------------------------+-------------------------+------------------------+ || Parameter || Value || Notes | @@ -220,29 +221,29 @@ Hadoop MapReduce Next Generation - Cluster Setup *-------------------------+-------------------------+------------------------+ - * Configurations for ResourceManager: + * Configurations for ResourceManager: *-------------------------+-------------------------+------------------------+ || Parameter || Value || Notes | *-------------------------+-------------------------+------------------------+ -| <<>> | | | +| <<>> | | | | | <<>> host:port for clients to submit jobs. | | | | | | *-------------------------+-------------------------+------------------------+ -| <<>> | | | +| <<>> | | | | | <<>> host:port for ApplicationMasters to talk to | | | | Scheduler to obtain resources. | | | | | | *-------------------------+-------------------------+------------------------+ -| <<>> | | | +| <<>> | | | | | <<>> host:port for NodeManagers. | | | | | | *-------------------------+-------------------------+------------------------+ -| <<>> | | | +| <<>> | | | | | <<>> host:port for administrative commands. | | | | | | *-------------------------+-------------------------+------------------------+ -| <<>> | | | +| <<>> | | | | | <<>> web-ui host:port. | | | | | | *-------------------------+-------------------------+------------------------+ @@ -258,14 +259,14 @@ Hadoop MapReduce Next Generation - Cluster Setup | | Maximum limit of memory to allocate to each container request at the <<>>. | | | | | In MBs | *-------------------------+-------------------------+------------------------+ -| <<>> / | | | -| <<>> | | | +| <<>> / | | | +| <<>> | | | | | List of permitted/excluded NodeManagers. | | -| | | If necessary, use these files to control the list of allowable | +| | | If necessary, use these files to control the list of allowable | | | | NodeManagers. | *-------------------------+-------------------------+------------------------+ - - * Configurations for NodeManager: + + * Configurations for NodeManager: *-------------------------+-------------------------+------------------------+ || Parameter || Value || Notes | @@ -314,7 +315,7 @@ Hadoop MapReduce Next Generation - Cluster Setup | | | Shuffle service that needs to be set for Map Reduce applications. | *-------------------------+-------------------------+------------------------+ - * Configurations for History Server (Needs to be moved elsewhere): + * Configurations for History Server (Needs to be moved elsewhere): *-------------------------+-------------------------+------------------------+ || Parameter || Value || Notes | @@ -327,9 +328,9 @@ Hadoop MapReduce Next Generation - Cluster Setup - * <<>> + * <<>> - * Configurations for MapReduce Applications: + * Configurations for MapReduce Applications: *-------------------------+-------------------------+------------------------+ || Parameter || Value || Notes | @@ -361,7 +362,7 @@ Hadoop MapReduce Next Generation - Cluster Setup | | | from very large number of maps. | *-------------------------+-------------------------+------------------------+ - * Configurations for MapReduce JobHistory Server: + * Configurations for MapReduce JobHistory Server: *-------------------------+-------------------------+------------------------+ || Parameter || Value || Notes | @@ -373,51 +374,51 @@ Hadoop MapReduce Next Generation - Cluster Setup | | MapReduce JobHistory Server Web UI | Default port is 19888. | *-------------------------+-------------------------+------------------------+ | <<>> | /mr-history/tmp | | -| | | Directory where history files are written by MapReduce jobs. | +| | | Directory where history files are written by MapReduce jobs. | *-------------------------+-------------------------+------------------------+ | <<>> | /mr-history/done| | -| | | Directory where history files are managed by the MR JobHistory Server. | +| | | Directory where history files are managed by the MR JobHistory Server. | *-------------------------+-------------------------+------------------------+ - * Hadoop Rack Awareness - - The HDFS and the YARN components are rack-aware. +* {Hadoop Rack Awareness} - The NameNode and the ResourceManager obtains the rack information of the - slaves in the cluster by invoking an API in an administrator - configured module. - - The API resolves the DNS name (also IP address) to a rack id. - - The site-specific module to use can be configured using the configuration - item <<>>. The default implementation - of the same runs a script/command configured using - <<>>. If <<>> is - not set, the rack id is returned for any passed IP address. + The HDFS and the YARN components are rack-aware. - * Monitoring Health of NodeManagers - - Hadoop provides a mechanism by which administrators can configure the - NodeManager to run an administrator supplied script periodically to - determine if a node is healthy or not. - - Administrators can determine if the node is in a healthy state by - performing any checks of their choice in the script. If the script - detects the node to be in an unhealthy state, it must print a line to - standard output beginning with the string ERROR. The NodeManager spawns - the script periodically and checks its output. If the script's output - contains the string ERROR, as described above, the node's status is - reported as <<>> and the node is black-listed by the - ResourceManager. No further tasks will be assigned to this node. - However, the NodeManager continues to run the script, so that if the - node becomes healthy again, it will be removed from the blacklisted nodes - on the ResourceManager automatically. The node's health along with the - output of the script, if it is unhealthy, is available to the - administrator in the ResourceManager web interface. The time since the - node was healthy is also displayed on the web interface. + The NameNode and the ResourceManager obtains the rack information of the + slaves in the cluster by invoking an API in an administrator + configured module. - The following parameters can be used to control the node health - monitoring script in <<>>. + The API resolves the DNS name (also IP address) to a rack id. + + The site-specific module to use can be configured using the configuration + item <<>>. The default implementation + of the same runs a script/command configured using + <<>>. If <<>> is + not set, the rack id is returned for any passed IP address. + +* {Monitoring Health of NodeManagers} + + Hadoop provides a mechanism by which administrators can configure the + NodeManager to run an administrator supplied script periodically to + determine if a node is healthy or not. + + Administrators can determine if the node is in a healthy state by + performing any checks of their choice in the script. If the script + detects the node to be in an unhealthy state, it must print a line to + standard output beginning with the string ERROR. The NodeManager spawns + the script periodically and checks its output. If the script's output + contains the string ERROR, as described above, the node's status is + reported as <<>> and the node is black-listed by the + ResourceManager. No further tasks will be assigned to this node. + However, the NodeManager continues to run the script, so that if the + node becomes healthy again, it will be removed from the blacklisted nodes + on the ResourceManager automatically. The node's health along with the + output of the script, if it is unhealthy, is available to the + administrator in the ResourceManager web interface. The time since the + node was healthy is also displayed on the web interface. + + The following parameters can be used to control the node health + monitoring script in <<>>. *-------------------------+-------------------------+------------------------+ || Parameter || Value || Notes | @@ -439,164 +440,163 @@ Hadoop MapReduce Next Generation - Cluster Setup | | | Timeout for health script execution. | *-------------------------+-------------------------+------------------------+ - The health checker script is not supposed to give ERROR if only some of the - local disks become bad. NodeManager has the ability to periodically check - the health of the local disks (specifically checks nodemanager-local-dirs - and nodemanager-log-dirs) and after reaching the threshold of number of - bad directories based on the value set for the config property - yarn.nodemanager.disk-health-checker.min-healthy-disks, the whole node is - marked unhealthy and this info is sent to resource manager also. The boot - disk is either raided or a failure in the boot disk is identified by the - health checker script. + The health checker script is not supposed to give ERROR if only some of the + local disks become bad. NodeManager has the ability to periodically check + the health of the local disks (specifically checks nodemanager-local-dirs + and nodemanager-log-dirs) and after reaching the threshold of number of + bad directories based on the value set for the config property + yarn.nodemanager.disk-health-checker.min-healthy-disks, the whole node is + marked unhealthy and this info is sent to resource manager also. The boot + disk is either raided or a failure in the boot disk is identified by the + health checker script. - * {Slaves file} - - Typically you choose one machine in the cluster to act as the NameNode and - one machine as to act as the ResourceManager, exclusively. The rest of the - machines act as both a DataNode and NodeManager and are referred to as - . +* {Slaves file} - List all slave hostnames or IP addresses in your <<>> file, - one per line. + Typically you choose one machine in the cluster to act as the NameNode and + one machine as to act as the ResourceManager, exclusively. The rest of the + machines act as both a DataNode and NodeManager and are referred to as + . - * {Logging} - - Hadoop uses the Apache log4j via the Apache Commons Logging framework for - logging. Edit the <<>> file to customize the - Hadoop daemons' logging configuration (log-formats and so on). - - * {Operating the Hadoop Cluster} + List all slave hostnames or IP addresses in your <<>> file, + one per line. - Once all the necessary configuration is complete, distribute the files to the +* {Logging} + + Hadoop uses the Apache log4j via the Apache Commons Logging framework for + logging. Edit the <<>> file to customize the + Hadoop daemons' logging configuration (log-formats and so on). + +* {Operating the Hadoop Cluster} + + Once all the necessary configuration is complete, distribute the files to the <<>> directory on all the machines. - * Hadoop Startup - - To start a Hadoop cluster you will need to start both the HDFS and YARN - cluster. +** Hadoop Startup - Format a new distributed filesystem: - ----- - $ $HADOOP_PREFIX/bin/hdfs namenode -format ----- + To start a Hadoop cluster you will need to start both the HDFS and YARN + cluster. - Start the HDFS with the following command, run on the designated NameNode: - ----- - $ $HADOOP_PREFIX/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR --script hdfs start namenode ----- - - Run a script to start DataNodes on all slaves: + Format a new distributed filesystem: ---- - $ $HADOOP_PREFIX/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR --script hdfs start datanode ----- - - Start the YARN with the following command, run on the designated - ResourceManager: - ----- - $ $HADOOP_YARN_HOME/sbin/yarn-daemon.sh --config $HADOOP_CONF_DIR start resourcemanager ----- - - Run a script to start NodeManagers on all slaves: - ----- - $ $HADOOP_YARN_HOME/sbin/yarn-daemon.sh --config $HADOOP_CONF_DIR start nodemanager ----- - - Start a standalone WebAppProxy server. If multiple servers - are used with load balancing it should be run on each of them: - ----- - $ $HADOOP_YARN_HOME/bin/yarn start proxyserver --config $HADOOP_CONF_DIR +$ $HADOOP_PREFIX/bin/hdfs namenode -format ---- - Start the MapReduce JobHistory Server with the following command, run on the - designated server: - ----- - $ $HADOOP_PREFIX/sbin/mr-jobhistory-daemon.sh start historyserver --config $HADOOP_CONF_DIR ----- - - * Hadoop Shutdown - - Stop the NameNode with the following command, run on the designated - NameNode: - ----- - $ $HADOOP_PREFIX/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR --script hdfs stop namenode ----- - - Run a script to stop DataNodes on all slaves: + Start the HDFS with the following command, run on the designated NameNode: ---- - $ $HADOOP_PREFIX/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR --script hdfs stop datanode ----- - - Stop the ResourceManager with the following command, run on the designated - ResourceManager: - ----- - $ $HADOOP_YARN_HOME/sbin/yarn-daemon.sh --config $HADOOP_CONF_DIR stop resourcemanager ----- +$ $HADOOP_PREFIX/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR --script hdfs start namenode +---- - Run a script to stop NodeManagers on all slaves: + Run a script to start DataNodes on all slaves: ---- - $ $HADOOP_YARN_HOME/sbin/yarn-daemon.sh --config $HADOOP_CONF_DIR stop nodemanager ----- +$ $HADOOP_PREFIX/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR --script hdfs start datanode +---- - Stop the WebAppProxy server. If multiple servers are used with load - balancing it should be run on each of them: + Start the YARN with the following command, run on the designated + ResourceManager: ---- - $ $HADOOP_YARN_HOME/bin/yarn stop proxyserver --config $HADOOP_CONF_DIR +$ $HADOOP_YARN_HOME/sbin/yarn-daemon.sh --config $HADOOP_CONF_DIR start resourcemanager +---- + + Run a script to start NodeManagers on all slaves: + +---- +$ $HADOOP_YARN_HOME/sbin/yarn-daemon.sh --config $HADOOP_CONF_DIR start nodemanager +---- + + Start a standalone WebAppProxy server. If multiple servers + are used with load balancing it should be run on each of them: + +---- +$ $HADOOP_YARN_HOME/bin/yarn start proxyserver --config $HADOOP_CONF_DIR +---- + + Start the MapReduce JobHistory Server with the following command, run on the + designated server: + +---- +$ $HADOOP_PREFIX/sbin/mr-jobhistory-daemon.sh start historyserver --config $HADOOP_CONF_DIR +---- + +** Hadoop Shutdown + + Stop the NameNode with the following command, run on the designated + NameNode: + +---- +$ $HADOOP_PREFIX/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR --script hdfs stop namenode +---- + + Run a script to stop DataNodes on all slaves: + +---- +$ $HADOOP_PREFIX/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR --script hdfs stop datanode +---- + + Stop the ResourceManager with the following command, run on the designated + ResourceManager: + +---- +$ $HADOOP_YARN_HOME/sbin/yarn-daemon.sh --config $HADOOP_CONF_DIR stop resourcemanager +---- + + Run a script to stop NodeManagers on all slaves: + +---- +$ $HADOOP_YARN_HOME/sbin/yarn-daemon.sh --config $HADOOP_CONF_DIR stop nodemanager +---- + + Stop the WebAppProxy server. If multiple servers are used with load + balancing it should be run on each of them: + +---- +$ $HADOOP_YARN_HOME/bin/yarn stop proxyserver --config $HADOOP_CONF_DIR ---- - Stop the MapReduce JobHistory Server with the following command, run on the - designated server: - ----- - $ $HADOOP_PREFIX/sbin/mr-jobhistory-daemon.sh stop historyserver --config $HADOOP_CONF_DIR ----- + Stop the MapReduce JobHistory Server with the following command, run on the + designated server: + +---- +$ $HADOOP_PREFIX/sbin/mr-jobhistory-daemon.sh stop historyserver --config $HADOOP_CONF_DIR +---- - * {Running Hadoop in Secure Mode} - This section deals with important parameters to be specified in + This section deals with important parameters to be specified in to run Hadoop in <> with strong, Kerberos-based authentication. - + * <<>> - + Ensure that HDFS and YARN daemons run as different Unix users, for e.g. <<>> and <<>>. Also, ensure that the MapReduce JobHistory - server runs as user <<>>. - + server runs as user <<>>. + It's recommended to have them share a Unix group, for e.g. <<>>. - -*--------------------------------------+----------------------------------------------------------------------+ -|| User:Group || Daemons | -*--------------------------------------+----------------------------------------------------------------------+ -| hdfs:hadoop | NameNode, Secondary NameNode, Checkpoint Node, Backup Node, DataNode | -*--------------------------------------+----------------------------------------------------------------------+ -| yarn:hadoop | ResourceManager, NodeManager | -*--------------------------------------+----------------------------------------------------------------------+ -| mapred:hadoop | MapReduce JobHistory Server | -*--------------------------------------+----------------------------------------------------------------------+ - + +*---------------+----------------------------------------------------------------------+ +|| User:Group || Daemons | +*---------------+----------------------------------------------------------------------+ +| hdfs:hadoop | NameNode, Secondary NameNode, Checkpoint Node, Backup Node, DataNode | +*---------------+----------------------------------------------------------------------+ +| yarn:hadoop | ResourceManager, NodeManager | +*---------------+----------------------------------------------------------------------+ +| mapred:hadoop | MapReduce JobHistory Server | +*---------------+----------------------------------------------------------------------+ + * <<>> - + The following table lists various paths on HDFS and local filesystems (on all nodes) and recommended permissions: - + *-------------------+-------------------+------------------+------------------+ || Filesystem || Path || User:Group || Permissions | *-------------------+-------------------+------------------+------------------+ -| local | <<>> | hdfs:hadoop | drwx------ | +| local | <<>> | hdfs:hadoop | drwx------ | *-------------------+-------------------+------------------+------------------+ | local | <<>> | hdfs:hadoop | drwx------ | *-------------------+-------------------+------------------+------------------+ @@ -621,123 +621,111 @@ Hadoop MapReduce Next Generation - Cluster Setup | hdfs | <<>> | yarn:hadoop | drwxrwxrwxt | *-------------------+-------------------+------------------+------------------+ | hdfs | <<>> | mapred:hadoop | | -| | | | drwxrwxrwxt | +| | | | drwxrwxrwxt | *-------------------+-------------------+------------------+------------------+ | hdfs | <<>> | mapred:hadoop | | -| | | | drwxr-x--- | +| | | | drwxr-x--- | *-------------------+-------------------+------------------+------------------+ * Kerberos Keytab files - - * HDFS - - The NameNode keytab file, on the NameNode host, should look like the - following: - ----- -$ /usr/kerberos/bin/klist -e -k -t /etc/security/keytab/nn.service.keytab + * HDFS + + The NameNode keytab file, on the NameNode host, should look like the + following: + +---- +$ /usr/kerberos/bin/klist -e -k -t /etc/security/keytab/nn.service.keytab Keytab name: FILE:/etc/security/keytab/nn.service.keytab KVNO Timestamp Principal - 4 07/18/11 21:08:09 nn/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC) - 4 07/18/11 21:08:09 nn/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC) - 4 07/18/11 21:08:09 nn/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5) - 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC) - 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC) - 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5) - + 4 07/18/11 21:08:09 nn/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC) + 4 07/18/11 21:08:09 nn/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC) + 4 07/18/11 21:08:09 nn/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5) + 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC) + 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC) + 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5) ---- - The Secondary NameNode keytab file, on that host, should look like the + The Secondary NameNode keytab file, on that host, should look like the following: - ----- -$ /usr/kerberos/bin/klist -e -k -t /etc/security/keytab/sn.service.keytab +---- +$ /usr/kerberos/bin/klist -e -k -t /etc/security/keytab/sn.service.keytab Keytab name: FILE:/etc/security/keytab/sn.service.keytab KVNO Timestamp Principal - 4 07/18/11 21:08:09 sn/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC) - 4 07/18/11 21:08:09 sn/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC) - 4 07/18/11 21:08:09 sn/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5) - 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC) - 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC) - 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5) - + 4 07/18/11 21:08:09 sn/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC) + 4 07/18/11 21:08:09 sn/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC) + 4 07/18/11 21:08:09 sn/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5) + 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC) + 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC) + 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5) ---- The DataNode keytab file, on each host, should look like the following: - ----- -$ /usr/kerberos/bin/klist -e -k -t /etc/security/keytab/dn.service.keytab +---- +$ /usr/kerberos/bin/klist -e -k -t /etc/security/keytab/dn.service.keytab Keytab name: FILE:/etc/security/keytab/dn.service.keytab KVNO Timestamp Principal - 4 07/18/11 21:08:09 dn/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC) - 4 07/18/11 21:08:09 dn/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC) - 4 07/18/11 21:08:09 dn/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5) - 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC) - 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC) - 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5) - + 4 07/18/11 21:08:09 dn/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC) + 4 07/18/11 21:08:09 dn/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC) + 4 07/18/11 21:08:09 dn/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5) + 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC) + 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC) + 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5) ---- - + * YARN - - The ResourceManager keytab file, on the ResourceManager host, should look - like the following: - ----- -$ /usr/kerberos/bin/klist -e -k -t /etc/security/keytab/rm.service.keytab + The ResourceManager keytab file, on the ResourceManager host, should look + like the following: + +---- +$ /usr/kerberos/bin/klist -e -k -t /etc/security/keytab/rm.service.keytab Keytab name: FILE:/etc/security/keytab/rm.service.keytab KVNO Timestamp Principal - 4 07/18/11 21:08:09 rm/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC) - 4 07/18/11 21:08:09 rm/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC) - 4 07/18/11 21:08:09 rm/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5) - 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC) - 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC) - 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5) - + 4 07/18/11 21:08:09 rm/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC) + 4 07/18/11 21:08:09 rm/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC) + 4 07/18/11 21:08:09 rm/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5) + 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC) + 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC) + 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5) ---- The NodeManager keytab file, on each host, should look like the following: - ----- -$ /usr/kerberos/bin/klist -e -k -t /etc/security/keytab/nm.service.keytab +---- +$ /usr/kerberos/bin/klist -e -k -t /etc/security/keytab/nm.service.keytab Keytab name: FILE:/etc/security/keytab/nm.service.keytab KVNO Timestamp Principal - 4 07/18/11 21:08:09 nm/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC) - 4 07/18/11 21:08:09 nm/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC) - 4 07/18/11 21:08:09 nm/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5) - 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC) - 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC) - 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5) - + 4 07/18/11 21:08:09 nm/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC) + 4 07/18/11 21:08:09 nm/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC) + 4 07/18/11 21:08:09 nm/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5) + 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC) + 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC) + 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5) ---- - + * MapReduce JobHistory Server - The MapReduce JobHistory Server keytab file, on that host, should look + The MapReduce JobHistory Server keytab file, on that host, should look like the following: - ----- -$ /usr/kerberos/bin/klist -e -k -t /etc/security/keytab/jhs.service.keytab +---- +$ /usr/kerberos/bin/klist -e -k -t /etc/security/keytab/jhs.service.keytab Keytab name: FILE:/etc/security/keytab/jhs.service.keytab KVNO Timestamp Principal - 4 07/18/11 21:08:09 jhs/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC) - 4 07/18/11 21:08:09 jhs/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC) - 4 07/18/11 21:08:09 jhs/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5) - 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC) - 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC) - 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5) - + 4 07/18/11 21:08:09 jhs/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC) + 4 07/18/11 21:08:09 jhs/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC) + 4 07/18/11 21:08:09 jhs/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5) + 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC) + 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC) + 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5) ---- - - * Configuration in Secure Mode - - * <<>> + +** Configuration in Secure Mode + + * <<>> *-------------------------+-------------------------+------------------------+ || Parameter || Value || Notes | @@ -748,10 +736,10 @@ KVNO Timestamp Principal | | | Enable RPC service-level authorization. | *-------------------------+-------------------------+------------------------+ - * <<>> - - * Configurations for NameNode: - + * <<>> + + * Configurations for NameNode: + *-------------------------+-------------------------+------------------------+ || Parameter || Value || Notes | *-------------------------+-------------------------+------------------------+ @@ -774,8 +762,8 @@ KVNO Timestamp Principal | | | HTTPS Kerberos principal name for the NameNode. | *-------------------------+-------------------------+------------------------+ - * Configurations for Secondary NameNode: - + * Configurations for Secondary NameNode: + *-------------------------+-------------------------+------------------------+ || Parameter || Value || Notes | *-------------------------+-------------------------+------------------------+ @@ -783,7 +771,7 @@ KVNO Timestamp Principal *-------------------------+-------------------------+------------------------+ | <<>> | <50470> | | *-------------------------+-------------------------+------------------------+ -| <<>> | | | +| <<>> | | | | | | | | | | Kerberos keytab file for the NameNode. | *-------------------------+-------------------------+------------------------+ @@ -795,7 +783,7 @@ KVNO Timestamp Principal | | | HTTPS Kerberos principal name for the Secondary NameNode. | *-------------------------+-------------------------+------------------------+ - * Configurations for DataNode: + * Configurations for DataNode: *-------------------------+-------------------------+------------------------+ || Parameter || Value || Notes | @@ -817,15 +805,15 @@ KVNO Timestamp Principal | | | HTTPS Kerberos principal name for the DataNode. | *-------------------------+-------------------------+------------------------+ - * <<>> - - * WebAppProxy + * <<>> - The <<>> provides a proxy between the web applications - exported by an application and an end user. If security is enabled - it will warn users before accessing a potentially unsafe web application. - Authentication and authorization using the proxy is handled just like - any other privileged web application. + * WebAppProxy + + The <<>> provides a proxy between the web applications + exported by an application and an end user. If security is enabled + it will warn users before accessing a potentially unsafe web application. + Authentication and authorization using the proxy is handled just like + any other privileged web application. *-------------------------+-------------------------+------------------------+ || Parameter || Value || Notes | @@ -844,12 +832,12 @@ KVNO Timestamp Principal | | | Kerberos principal name for the WebAppProxy. | *-------------------------+-------------------------+------------------------+ - * LinuxContainerExecutor - - A <<>> used by YARN framework which define how any - launched and controlled. - - The following are the available in Hadoop YARN: + * LinuxContainerExecutor + + A <<>> used by YARN framework which define how any + launched and controlled. + + The following are the available in Hadoop YARN: *--------------------------------------+--------------------------------------+ || ContainerExecutor || Description | @@ -859,7 +847,7 @@ KVNO Timestamp Principal | | The container process has the same Unix user as the NodeManager. | *--------------------------------------+--------------------------------------+ | <<>> | | -| | Supported only on GNU/Linux, this executor runs the containers as the | +| | Supported only on GNU/Linux, this executor runs the containers as the | | | user who submitted the application. It requires all user accounts to be | | | created on the cluster nodes where the containers are launched. It uses | | | a executable that is included in the Hadoop distribution. | @@ -874,53 +862,53 @@ KVNO Timestamp Principal | | localized as part of the distributed cache. | *--------------------------------------+--------------------------------------+ - To build the LinuxContainerExecutor executable run: - + To build the LinuxContainerExecutor executable run: + ---- $ mvn package -Dcontainer-executor.conf.dir=/etc/hadoop/ ---- - - The path passed in <<<-Dcontainer-executor.conf.dir>>> should be the - path on the cluster nodes where a configuration file for the setuid - executable should be located. The executable should be installed in - $HADOOP_YARN_HOME/bin. - The executable must have specific permissions: 6050 or --Sr-s--- - permissions user-owned by (super-user) and group-owned by a - special group (e.g. <<>>) of which the NodeManager Unix user is - the group member and no ordinary application user is. If any application - user belongs to this special group, security will be compromised. This - special group name should be specified for the configuration property - <<>> in both - <<>> and <<>>. - - For example, let's say that the NodeManager is run as user who is - part of the groups users and , any of them being the primary group. - Let also be that has both and another user - (application submitter) as its members, and does not - belong to . Going by the above description, the setuid/setgid - executable should be set 6050 or --Sr-s--- with user-owner as and - group-owner as which has as its member (and not - which has also as its member besides ). + The path passed in <<<-Dcontainer-executor.conf.dir>>> should be the + path on the cluster nodes where a configuration file for the setuid + executable should be located. The executable should be installed in + $HADOOP_YARN_HOME/bin. - The LinuxTaskController requires that paths including and leading up to - the directories specified in <<>> and - <<>> to be set 755 permissions as described - above in the table on permissions on directories. + The executable must have specific permissions: 6050 or --Sr-s--- + permissions user-owned by (super-user) and group-owned by a + special group (e.g. <<>>) of which the NodeManager Unix user is + the group member and no ordinary application user is. If any application + user belongs to this special group, security will be compromised. This + special group name should be specified for the configuration property + <<>> in both + <<>> and <<>>. - * <<>> - - The executable requires a configuration file called - <<>> to be present in the configuration - directory passed to the mvn target mentioned above. + For example, let's say that the NodeManager is run as user who is + part of the groups users and , any of them being the primary group. + Let also be that has both and another user + (application submitter) as its members, and does not + belong to . Going by the above description, the setuid/setgid + executable should be set 6050 or --Sr-s--- with user-owner as and + group-owner as which has as its member (and not + which has also as its member besides ). - The configuration file must be owned by the user running NodeManager - (user <<>> in the above example), group-owned by anyone and - should have the permissions 0400 or r--------. + The LinuxTaskController requires that paths including and leading up to + the directories specified in <<>> and + <<>> to be set 755 permissions as described + above in the table on permissions on directories. - The executable requires following configuration items to be present - in the <<>> file. The items should be - mentioned as simple key=value pairs, one per-line: + * <<>> + + The executable requires a configuration file called + <<>> to be present in the configuration + directory passed to the mvn target mentioned above. + + The configuration file must be owned by the user running NodeManager + (user <<>> in the above example), group-owned by anyone and + should have the permissions 0400 or r--------. + + The executable requires following configuration items to be present + in the <<>> file. The items should be + mentioned as simple key=value pairs, one per-line: *-------------------------+-------------------------+------------------------+ || Parameter || Value || Notes | @@ -930,16 +918,16 @@ KVNO Timestamp Principal | | | binary should be this group. Should be same as the | | | | value with which the NodeManager is configured. This configuration is | | | | required for validating the secure access of the | -| | | binary. | +| | | binary. | *-------------------------+-------------------------+------------------------+ | <<>> | hfds,yarn,mapred,bin | Banned users. | *-------------------------+-------------------------+------------------------+ -| <<>> | 1000 | Prevent other super-users. | +| <<>> | 1000 | Prevent other super-users. | *-------------------------+-------------------------+------------------------+ - To re-cap, here are the local file-ssytem permissions required for the + To re-cap, here are the local file-sysytem permissions required for the various paths related to the <<>>: - + *-------------------+-------------------+------------------+------------------+ || Filesystem || Path || User:Group || Permissions | *-------------------+-------------------+------------------+------------------+ @@ -951,9 +939,9 @@ KVNO Timestamp Principal *-------------------+-------------------+------------------+------------------+ | local | <<>> | yarn:hadoop | drwxr-xr-x | *-------------------+-------------------+------------------+------------------+ - + * Configurations for ResourceManager: - + *-------------------------+-------------------------+------------------------+ || Parameter || Value || Notes | *-------------------------+-------------------------+------------------------+ @@ -964,9 +952,9 @@ KVNO Timestamp Principal | <<>> | rm/_HOST@REALM.TLD | | | | | Kerberos principal name for the ResourceManager. | *-------------------------+-------------------------+------------------------+ - + * Configurations for NodeManager: - + *-------------------------+-------------------------+------------------------+ || Parameter || Value || Notes | *-------------------------+-------------------------+------------------------+ @@ -977,15 +965,15 @@ KVNO Timestamp Principal | | | Kerberos principal name for the NodeManager. | *-------------------------+-------------------------+------------------------+ | <<>> | | | -| | <<>> | -| | | Use LinuxContainerExecutor. | +| | <<>> | +| | | Use LinuxContainerExecutor. | *-------------------------+-------------------------+------------------------+ | <<>> | | | | | | Unix group of the NodeManager. | *-------------------------+-------------------------+------------------------+ * <<>> - + * Configurations for MapReduce JobHistory Server: *-------------------------+-------------------------+------------------------+ @@ -1002,116 +990,116 @@ KVNO Timestamp Principal | | | Kerberos principal name for the MapReduce JobHistory Server. | *-------------------------+-------------------------+------------------------+ - - * {Operating the Hadoop Cluster} - Once all the necessary configuration is complete, distribute the files to the +* {Operating the Hadoop Cluster} + + Once all the necessary configuration is complete, distribute the files to the <<>> directory on all the machines. This section also describes the various Unix users who should be starting the various components and uses the same Unix accounts and groups used previously: - - * Hadoop Startup - - To start a Hadoop cluster you will need to start both the HDFS and YARN + +** Hadoop Startup + + To start a Hadoop cluster you will need to start both the HDFS and YARN cluster. Format a new distributed filesystem as : - + ---- [hdfs]$ $HADOOP_PREFIX/bin/hdfs namenode -format ---- Start the HDFS with the following command, run on the designated NameNode as : - + ---- [hdfs]$ $HADOOP_PREFIX/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR --script hdfs start namenode ----- +---- Run a script to start DataNodes on all slaves as with a special environment variable <<>> set to : ---- [root]$ HADOOP_SECURE_DN_USER=hdfs $HADOOP_PREFIX/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR --script hdfs start datanode ----- - - Start the YARN with the following command, run on the designated +---- + + Start the YARN with the following command, run on the designated ResourceManager as : - + ---- -[yarn]$ $HADOOP_YARN_HOME/sbin/yarn-daemon.sh --config $HADOOP_CONF_DIR start resourcemanager ----- +[yarn]$ $HADOOP_YARN_HOME/sbin/yarn-daemon.sh --config $HADOOP_CONF_DIR start resourcemanager +---- Run a script to start NodeManagers on all slaves as : ---- -[yarn]$ $HADOOP_YARN_HOME/sbin/yarn-daemon.sh --config $HADOOP_CONF_DIR start nodemanager ----- +[yarn]$ $HADOOP_YARN_HOME/sbin/yarn-daemon.sh --config $HADOOP_CONF_DIR start nodemanager +---- - Start a standalone WebAppProxy server. Run on the WebAppProxy + Start a standalone WebAppProxy server. Run on the WebAppProxy server as . If multiple servers are used with load balancing it should be run on each of them: ---- -[yarn]$ $HADOOP_YARN_HOME/bin/yarn start proxyserver --config $HADOOP_CONF_DIR ----- +[yarn]$ $HADOOP_YARN_HOME/bin/yarn start proxyserver --config $HADOOP_CONF_DIR +---- - Start the MapReduce JobHistory Server with the following command, run on the + Start the MapReduce JobHistory Server with the following command, run on the designated server as : - + ---- -[mapred]$ $HADOOP_PREFIX/sbin/mr-jobhistory-daemon.sh start historyserver --config $HADOOP_CONF_DIR ----- +[mapred]$ $HADOOP_PREFIX/sbin/mr-jobhistory-daemon.sh start historyserver --config $HADOOP_CONF_DIR +---- - * Hadoop Shutdown +** Hadoop Shutdown + + Stop the NameNode with the following command, run on the designated NameNode + as : - Stop the NameNode with the following command, run on the designated NameNode - as : - ---- [hdfs]$ $HADOOP_PREFIX/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR --script hdfs stop namenode ----- +---- - Run a script to stop DataNodes on all slaves as : + Run a script to stop DataNodes on all slaves as : ---- [root]$ $HADOOP_PREFIX/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR --script hdfs stop datanode ----- - - Stop the ResourceManager with the following command, run on the designated - ResourceManager as : - ----- -[yarn]$ $HADOOP_YARN_HOME/sbin/yarn-daemon.sh --config $HADOOP_CONF_DIR stop resourcemanager ----- +---- - Run a script to stop NodeManagers on all slaves as : + Stop the ResourceManager with the following command, run on the designated + ResourceManager as : ---- -[yarn]$ $HADOOP_YARN_HOME/sbin/yarn-daemon.sh --config $HADOOP_CONF_DIR stop nodemanager ----- +[yarn]$ $HADOOP_YARN_HOME/sbin/yarn-daemon.sh --config $HADOOP_CONF_DIR stop resourcemanager +---- - Stop the WebAppProxy server. Run on the WebAppProxy server as - . If multiple servers are used with load balancing it - should be run on each of them: + Run a script to stop NodeManagers on all slaves as : ---- -[yarn]$ $HADOOP_YARN_HOME/bin/yarn stop proxyserver --config $HADOOP_CONF_DIR ----- +[yarn]$ $HADOOP_YARN_HOME/sbin/yarn-daemon.sh --config $HADOOP_CONF_DIR stop nodemanager +---- - Stop the MapReduce JobHistory Server with the following command, run on the - designated server as : + Stop the WebAppProxy server. Run on the WebAppProxy server as + . If multiple servers are used with load balancing it + should be run on each of them: ---- -[mapred]$ $HADOOP_PREFIX/sbin/mr-jobhistory-daemon.sh stop historyserver --config $HADOOP_CONF_DIR ----- - -* {Web Interfaces} +[yarn]$ $HADOOP_YARN_HOME/bin/yarn stop proxyserver --config $HADOOP_CONF_DIR +---- + + Stop the MapReduce JobHistory Server with the following command, run on the + designated server as : + +---- +[mapred]$ $HADOOP_PREFIX/sbin/mr-jobhistory-daemon.sh stop historyserver --config $HADOOP_CONF_DIR +---- + +* {Web Interfaces} + + Once the Hadoop cluster is up and running check the web-ui of the + components as described below: - Once the Hadoop cluster is up and running check the web-ui of the - components as described below: - *-------------------------+-------------------------+------------------------+ || Daemon || Web Interface || Notes | *-------------------------+-------------------------+------------------------+ @@ -1122,5 +1110,5 @@ KVNO Timestamp Principal | MapReduce JobHistory Server | http:/// | | | | | Default HTTP port is 19888. | *-------------------------+-------------------------+------------------------+ - - + + diff --git a/hadoop-common-project/hadoop-common/src/site/apt/CommandsManual.apt.vm b/hadoop-common-project/hadoop-common/src/site/apt/CommandsManual.apt.vm new file mode 100644 index 00000000000..a198bb66404 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/apt/CommandsManual.apt.vm @@ -0,0 +1,490 @@ +~~ Licensed to the Apache Software Foundation (ASF) under one or more +~~ contributor license agreements. See the NOTICE file distributed with +~~ this work for additional information regarding copyright ownership. +~~ The ASF licenses this file to You under the Apache License, Version 2.0 +~~ (the "License"); you may not use this file except in compliance with +~~ the License. You may obtain a copy of the License at +~~ +~~ http://www.apache.org/licenses/LICENSE-2.0 +~~ +~~ Unless required by applicable law or agreed to in writing, software +~~ distributed under the License is distributed on an "AS IS" BASIS, +~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +~~ See the License for the specific language governing permissions and +~~ limitations under the License. + + --- + Hadoop Commands Guide + --- + --- + ${maven.build.timestamp} + +%{toc} + +Overview + + All hadoop commands are invoked by the <<>> script. Running the + hadoop script without any arguments prints the description for all + commands. + + Usage: <<>> + + Hadoop has an option parsing framework that employs parsing generic + options as well as running classes. + +*-----------------------+---------------+ +|| COMMAND_OPTION || Description +*-----------------------+---------------+ +| <<<--config confdir>>>| Overwrites the default Configuration directory. Default is <<<${HADOOP_HOME}/conf>>>. +*-----------------------+---------------+ +| GENERIC_OPTIONS | The common set of options supported by multiple commands. +| COMMAND_OPTIONS | Various commands with their options are described in the following sections. The commands have been grouped into User Commands and Administration Commands. +*-----------------------+---------------+ + +Generic Options + + The following options are supported by {{dfsadmin}}, {{fs}}, {{fsck}}, + {{job}} and {{fetchdt}}. Applications should implement {{{some_useful_url}Tool}} to support + {{{another_useful_url}GenericOptions}}. + +*------------------------------------------------+-----------------------------+ +|| GENERIC_OPTION || Description +*------------------------------------------------+-----------------------------+ +|<<<-conf \ >>> | Specify an application + | configuration file. +*------------------------------------------------+-----------------------------+ +|<<<-D \=\ >>> | Use value for given property. +*------------------------------------------------+-----------------------------+ +|<<<-jt \ or \ >>> | Specify a job tracker. + | Applies only to job. +*------------------------------------------------+-----------------------------+ +|<<<-files \ >>> | Specify comma separated files + | to be copied to the map + | reduce cluster. Applies only + | to job. +*------------------------------------------------+-----------------------------+ +|<<<-libjars \ >>>| Specify comma separated jar + | files to include in the + | classpath. Applies only to + | job. +*------------------------------------------------+-----------------------------+ +|<<<-archives \ >>> | Specify comma separated + | archives to be unarchived on + | the compute machines. Applies + | only to job. +*------------------------------------------------+-----------------------------+ + +User Commands + + Commands useful for users of a hadoop cluster. + +* <<>> + + Creates a hadoop archive. More information can be found at Hadoop + Archives. + + Usage: <<* >>> + +*-------------------+-------------------------------------------------------+ +||COMMAND_OPTION || Description +*-------------------+-------------------------------------------------------+ +| -archiveName NAME | Name of the archive to be created. +*-------------------+-------------------------------------------------------+ +| src | Filesystem pathnames which work as usual with regular + | expressions. +*-------------------+-------------------------------------------------------+ +| dest | Destination directory which would contain the archive. +*-------------------+-------------------------------------------------------+ + +* <<>> + + Copy file or directories recursively. More information can be found at + Hadoop DistCp Guide. + + Usage: << >>> + +*-------------------+--------------------------------------------+ +||COMMAND_OPTION || Description +*-------------------+--------------------------------------------+ +| srcurl | Source Url +*-------------------+--------------------------------------------+ +| desturl | Destination Url +*-------------------+--------------------------------------------+ + +* <<>> + + Usage: <<>> + + Deprecated, use <<>> instead. + + Runs a generic filesystem user client. + + The various COMMAND_OPTIONS can be found at File System Shell Guide. + +* <<>> + + Runs a HDFS filesystem checking utility. See {{Fsck}} for more info. + + Usage: << [-move | -delete | -openforwrite] [-files [-blocks [-locations | -racks]]]>>> + +*------------------+---------------------------------------------+ +|| COMMAND_OPTION || Description +*------------------+---------------------------------------------+ +| | Start checking from this path. +*------------------+---------------------------------------------+ +| -move | Move corrupted files to /lost+found +*------------------+---------------------------------------------+ +| -delete | Delete corrupted files. +*------------------+---------------------------------------------+ +| -openforwrite | Print out files opened for write. +*------------------+---------------------------------------------+ +| -files | Print out files being checked. +*------------------+---------------------------------------------+ +| -blocks | Print out block report. +*------------------+---------------------------------------------+ +| -locations | Print out locations for every block. +*------------------+---------------------------------------------+ +| -racks | Print out network topology for data-node locations. +*------------------+---------------------------------------------+ + +* <<>> + + Gets Delegation Token from a NameNode. See {{fetchdt}} for more info. + + Usage: <<] >>> + +*------------------------------+---------------------------------------------+ +|| COMMAND_OPTION || Description +*------------------------------+---------------------------------------------+ +| | File name to store the token into. +*------------------------------+---------------------------------------------+ +| --webservice | use http protocol instead of RPC +*------------------------------+---------------------------------------------+ + +* <<>> + + Runs a jar file. Users can bundle their Map Reduce code in a jar file and + execute it using this command. + + Usage: << [mainClass] args...>>> + + The streaming jobs are run via this command. Examples can be referred from + Streaming examples + + Word count example is also run using jar command. It can be referred from + Wordcount example + +* <<>> + + Command to interact with Map Reduce Jobs. + + Usage: <<] | [-status ] | [-counter ] | [-kill ] | [-events <#-of-events>] | [-history [all] ] | [-list [all]] | [-kill-task ] | [-fail-task ] | [-set-priority ]>>> + +*------------------------------+---------------------------------------------+ +|| COMMAND_OPTION || Description +*------------------------------+---------------------------------------------+ +| -submit | Submits the job. +*------------------------------+---------------------------------------------+ +| -status | Prints the map and reduce completion + | percentage and all job counters. +*------------------------------+---------------------------------------------+ +| -counter | Prints the counter value. +*------------------------------+---------------------------------------------+ +| -kill | Kills the job. +*------------------------------+---------------------------------------------+ +| -events <#-of-events> | Prints the events' details + | received by jobtracker for the given range. +*------------------------------+---------------------------------------------+ +| -history [all] | Prints job details, failed and killed tip + | details. More details about the job such as + | successful tasks and task attempts made for + | each task can be viewed by specifying the [all] + | option. +*------------------------------+---------------------------------------------+ +| -list [all] | Displays jobs which are yet to complete. + | <<<-list all>>> displays all jobs. +*------------------------------+---------------------------------------------+ +| -kill-task | Kills the task. Killed tasks are NOT counted + | against failed attempts. +*------------------------------+---------------------------------------------+ +| -fail-task | Fails the task. Failed tasks are counted + | against failed attempts. +*------------------------------+---------------------------------------------+ +| -set-priority | Changes the priority of the job. Allowed + | priority values are VERY_HIGH, HIGH, NORMAL, + | LOW, VERY_LOW +*------------------------------+---------------------------------------------+ + +* <<>> + + Runs a pipes job. + + Usage: <<] [-jobconf , , + ...] [-input ] [-output ] [-jar ] [-inputformat + ] [-map ] [-partitioner ] [-reduce ] [-writer + ] [-program ] [-reduces ]>>> + +*----------------------------------------+------------------------------------+ +|| COMMAND_OPTION || Description +*----------------------------------------+------------------------------------+ +| -conf | Configuration for job +*----------------------------------------+------------------------------------+ +| -jobconf , , ... | Add/override configuration for job +*----------------------------------------+------------------------------------+ +| -input | Input directory +*----------------------------------------+------------------------------------+ +| -output | Output directory +*----------------------------------------+------------------------------------+ +| -jar | Jar filename +*----------------------------------------+------------------------------------+ +| -inputformat | InputFormat class +*----------------------------------------+------------------------------------+ +| -map | Java Map class +*----------------------------------------+------------------------------------+ +| -partitioner | Java Partitioner +*----------------------------------------+------------------------------------+ +| -reduce | Java Reduce class +*----------------------------------------+------------------------------------+ +| -writer | Java RecordWriter +*----------------------------------------+------------------------------------+ +| -program | Executable URI +*----------------------------------------+------------------------------------+ +| -reduces | Number of reduces +*----------------------------------------+------------------------------------+ + +* <<>> + + command to interact and view Job Queue information + + Usage: << [-showJobs]] | [-showacls]>>> + +*-----------------+-----------------------------------------------------------+ +|| COMMAND_OPTION || Description +*-----------------+-----------------------------------------------------------+ +| -list | Gets list of Job Queues configured in the system. + | Along with scheduling information associated with the job queues. +*-----------------+-----------------------------------------------------------+ +| -info [-showJobs] | Displays the job queue information and + | associated scheduling information of particular job queue. + | If <<<-showJobs>>> options is present a list of jobs + | submitted to the particular job queue is displayed. +*-----------------+-----------------------------------------------------------+ +| -showacls | Displays the queue name and associated queue operations + | allowed for the current user. The list consists of only + | those queues to which the user has access. +*-----------------+-----------------------------------------------------------+ + +* <<>> + + Prints the version. + + Usage: <<>> + +* <<>> + + hadoop script can be used to invoke any class. + + Usage: <<>> + + Runs the class named <<>>. + +* <<>> + + Prints the class path needed to get the Hadoop jar and the required + libraries. + + Usage: <<>> + +Administration Commands + + Commands useful for administrators of a hadoop cluster. + +* <<>> + + Runs a cluster balancing utility. An administrator can simply press Ctrl-C + to stop the rebalancing process. See Rebalancer for more details. + + Usage: <<]>>> + +*------------------------+-----------------------------------------------------------+ +|| COMMAND_OPTION | Description +*------------------------+-----------------------------------------------------------+ +| -threshold | Percentage of disk capacity. This overwrites the + | default threshold. +*------------------------+-----------------------------------------------------------+ + +* <<>> + + Get/Set the log level for each daemon. + + Usage: << >>> + Usage: << >>> + +*------------------------------+-----------------------------------------------------------+ +|| COMMAND_OPTION || Description +*------------------------------+-----------------------------------------------------------+ +| -getlevel | Prints the log level of the daemon running at + | . This command internally connects + | to http:///logLevel?log= +*------------------------------+-----------------------------------------------------------+ +| -setlevel | Sets the log level of the daemon + | running at . This command internally + | connects to http:///logLevel?log= +*------------------------------+-----------------------------------------------------------+ + +* <<>> + + Runs a HDFS datanode. + + Usage: <<>> + +*-----------------+-----------------------------------------------------------+ +|| COMMAND_OPTION || Description +*-----------------+-----------------------------------------------------------+ +| -rollback | Rollsback the datanode to the previous version. This should + | be used after stopping the datanode and distributing the old + | hadoop version. +*-----------------+-----------------------------------------------------------+ + +* <<>> + + Runs a HDFS dfsadmin client. + + Usage: << ...] [-clrQuota ...] [-help [cmd]]>>> + +*-----------------+-----------------------------------------------------------+ +|| COMMAND_OPTION || Description +| -report | Reports basic filesystem information and statistics. +*-----------------+-----------------------------------------------------------+ +| -safemode enter / leave / get / wait | Safe mode maintenance command. Safe + | mode is a Namenode state in which it \ + | 1. does not accept changes to the name space (read-only) \ + | 2. does not replicate or delete blocks. \ + | Safe mode is entered automatically at Namenode startup, and + | leaves safe mode automatically when the configured minimum + | percentage of blocks satisfies the minimum replication + | condition. Safe mode can also be entered manually, but then + | it can only be turned off manually as well. +*-----------------+-----------------------------------------------------------+ +| -refreshNodes | Re-read the hosts and exclude files to update the set of + | Datanodes that are allowed to connect to the Namenode and + | those that should be decommissioned or recommissioned. +*-----------------+-----------------------------------------------------------+ +| -finalizeUpgrade| Finalize upgrade of HDFS. Datanodes delete their previous + | version working directories, followed by Namenode doing the + | same. This completes the upgrade process. +*-----------------+-----------------------------------------------------------+ +| -upgradeProgress status / details / force | Request current distributed + | upgrade status, a detailed status or force the upgrade to + | proceed. +*-----------------+-----------------------------------------------------------+ +| -metasave filename | Save Namenode's primary data structures to in + | the directory specified by hadoop.log.dir property. + | will contain one line for each of the following\ + | 1. Datanodes heart beating with Namenode\ + | 2. Blocks waiting to be replicated\ + | 3. Blocks currrently being replicated\ + | 4. Blocks waiting to be deleted\ +*-----------------+-----------------------------------------------------------+ +| -setQuota ... | Set the quota for each + | directory . The directory quota is a long integer + | that puts a hard limit on the number of names in the + | directory tree. Best effort for the directory, with faults + | reported if \ + | 1. N is not a positive integer, or \ + | 2. user is not an administrator, or \ + | 3. the directory does not exist or is a file, or \ + | 4. the directory would immediately exceed the new quota. \ +*-----------------+-----------------------------------------------------------+ +| -clrQuota ... | Clear the quota for each directory + | . Best effort for the directory. with fault + | reported if \ + | 1. the directory does not exist or is a file, or \ + | 2. user is not an administrator. It does not fault if the + | directory has no quota. +*-----------------+-----------------------------------------------------------+ +| -help [cmd] | Displays help for the given command or all commands if none + | is specified. +*-----------------+-----------------------------------------------------------+ + +* <<>> + + Runs MR admin client + + Usage: <<>> + +*-------------------+-----------------------------------------------------------+ +|| COMMAND_OPTION || Description +*-------------------+-----------------------------------------------------------+ +| -refreshQueueAcls | Refresh the queue acls used by hadoop, to check access + | during submissions and administration of the job by the + | user. The properties present in mapred-queue-acls.xml is + | reloaded by the queue manager. +*-------------------+-----------------------------------------------------------+ + +* <<>> + + Runs the MapReduce job Tracker node. + + Usage: <<>> + +*--------------------+-----------------------------------------------------------+ +|| COMMAND_OPTION || Description +*--------------------+-----------------------------------------------------------+ +| -dumpConfiguration | Dumps the configuration used by the JobTracker alongwith + | queue configuration in JSON format into Standard output + | used by the jobtracker and exits. +*--------------------+-----------------------------------------------------------+ + +* <<>> + + Runs the namenode. More info about the upgrade, rollback and finalize is + at Upgrade Rollback + + Usage: <<>> + +*--------------------+-----------------------------------------------------------+ +|| COMMAND_OPTION || Description +*--------------------+-----------------------------------------------------------+ +| -format | Formats the namenode. It starts the namenode, formats + | it and then shut it down. +*--------------------+-----------------------------------------------------------+ +| -upgrade | Namenode should be started with upgrade option after + | the distribution of new hadoop version. +*--------------------+-----------------------------------------------------------+ +| -rollback | Rollsback the namenode to the previous version. This + | should be used after stopping the cluster and + | distributing the old hadoop version. +*--------------------+-----------------------------------------------------------+ +| -finalize | Finalize will remove the previous state of the files + | system. Recent upgrade will become permanent. Rollback + | option will not be available anymore. After finalization + | it shuts the namenode down. +*--------------------+-----------------------------------------------------------+ +| -importCheckpoint | Loads image from a checkpoint directory and save it + | into the current one. Checkpoint dir is read from + | property fs.checkpoint.dir +*--------------------+-----------------------------------------------------------+ + +* <<>> + + Runs the HDFS secondary namenode. See Secondary Namenode for more + info. + + Usage: <<>> + +*----------------------+-----------------------------------------------------------+ +|| COMMAND_OPTION || Description +*----------------------+-----------------------------------------------------------+ +| -checkpoint [-force] | Checkpoints the Secondary namenode if EditLog size + | >= fs.checkpoint.size. If <<<-force>>> is used, + | checkpoint irrespective of EditLog size. +*----------------------+-----------------------------------------------------------+ +| -geteditsize | Prints the EditLog size. +*----------------------+-----------------------------------------------------------+ + +* <<>> + + Runs a MapReduce task Tracker node. + + Usage: <<>> diff --git a/hadoop-common-project/hadoop-common/src/site/apt/FileSystemShell.apt.vm b/hadoop-common-project/hadoop-common/src/site/apt/FileSystemShell.apt.vm new file mode 100644 index 00000000000..bf0daf46bc0 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/apt/FileSystemShell.apt.vm @@ -0,0 +1,418 @@ +~~ Licensed to the Apache Software Foundation (ASF) under one or more +~~ contributor license agreements. See the NOTICE file distributed with +~~ this work for additional information regarding copyright ownership. +~~ The ASF licenses this file to You under the Apache License, Version 2.0 +~~ (the "License"); you may not use this file except in compliance with +~~ the License. You may obtain a copy of the License at +~~ +~~ http://www.apache.org/licenses/LICENSE-2.0 +~~ +~~ Unless required by applicable law or agreed to in writing, software +~~ distributed under the License is distributed on an "AS IS" BASIS, +~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +~~ See the License for the specific language governing permissions and +~~ limitations under the License. + + --- + File System Shell Guide + --- + --- + ${maven.build.timestamp} + +%{toc} + +Overview + + The File System (FS) shell includes various shell-like commands that + directly interact with the Hadoop Distributed File System (HDFS) as well as + other file systems that Hadoop supports, such as Local FS, HFTP FS, S3 FS, + and others. The FS shell is invoked by: + ++--- +bin/hadoop fs ++--- + + All FS shell commands take path URIs as arguments. The URI format is + <<>>. For HDFS the scheme is <<>>, and for + the Local FS the scheme is <<>>. The scheme and authority are + optional. If not specified, the default scheme specified in the + configuration is used. An HDFS file or directory such as /parent/child can + be specified as <<>> or simply as + <<
>> (given that your configuration is set to point to + <<>>). + + Most of the commands in FS shell behave like corresponding Unix commands. + Differences are described with each of the commands. Error information is + sent to stderr and the output is sent to stdout. + +cat + + Usage: <<>> + + Copies source paths to stdout. + + Example: + + * <<>> + + * <<>> + + Exit Code: + + Returns 0 on success and -1 on error. + +chgrp + + Usage: <<>> + + Change group association of files. With -R, make the change recursively + through the directory structure. The user must be the owner of files, or + else a super-user. Additional information is in the + {{{betterurl}Permissions Guide}}. + +chmod + + Usage: << URI [URI ...]>>> + + Change the permissions of files. With -R, make the change recursively + through the directory structure. The user must be the owner of the file, or + else a super-user. Additional information is in the + {{{betterurl}Permissions Guide}}. + +chown + + Usage: <<>> + + Change the owner of files. With -R, make the change recursively through the + directory structure. The user must be a super-user. Additional information + is in the {{{betterurl}Permissions Guide}}. + +copyFromLocal + + Usage: << URI>>> + + Similar to put command, except that the source is restricted to a local + file reference. + +copyToLocal + + Usage: << >>> + + Similar to get command, except that the destination is restricted to a + local file reference. + +count + + Usage: << >>> + + Count the number of directories, files and bytes under the paths that match + the specified file pattern. The output columns with -count are: DIR_COUNT, + FILE_COUNT, CONTENT_SIZE FILE_NAME + + The output columns with -count -q are: QUOTA, REMAINING_QUATA, SPACE_QUOTA, + REMAINING_SPACE_QUOTA, DIR_COUNT, FILE_COUNT, CONTENT_SIZE, FILE_NAME + + Example: + + * <<>> + + * <<>> + + Exit Code: + + Returns 0 on success and -1 on error. + +cp + + Usage: << >>> + + Copy files from source to destination. This command allows multiple sources + as well in which case the destination must be a directory. + + Example: + + * <<>> + + * <<>> + + Exit Code: + + Returns 0 on success and -1 on error. + +du + + Usage: <<>> + + Displays sizes of files and directories contained in the given directory or + the length of a file in case its just a file. + + Options: + + * The -s option will result in an aggregate summary of file lengths being + displayed, rather than the individual files. + + * The -h option will format file sizes in a "human-readable" fashion (e.g + 64.0m instead of 67108864) + + Example: + + * hdfs dfs -du /user/hadoop/dir1 /user/hadoop/file1 hdfs://nn.example.com/user/hadoop/dir1 + + Exit Code: + Returns 0 on success and -1 on error. + +dus + + Usage: << >>> + + Displays a summary of file lengths. This is an alternate form of hdfs dfs -du -s. + +expunge + + Usage: <<>> + + Empty the Trash. Refer to the {{{betterurl}HDFS Architecture Guide}} for + more information on the Trash feature. + +get + + Usage: << >>> + + Copy files to the local file system. Files that fail the CRC check may be + copied with the -ignorecrc option. Files and CRCs may be copied using the + -crc option. + + Example: + + * <<>> + + * <<>> + + Exit Code: + + Returns 0 on success and -1 on error. + +getmerge + + Usage: << [addnl]>>> + + Takes a source directory and a destination file as input and concatenates + files in src into the destination local file. Optionally addnl can be set to + enable adding a newline character at the + end of each file. + +ls + + Usage: << >>> + + For a file returns stat on the file with the following format: + ++---+ +permissions number_of_replicas userid groupid filesize modification_date modification_time filename ++---+ + + For a directory it returns list of its direct children as in unix.A directory is listed as: + ++---+ +permissions userid groupid modification_date modification_time dirname ++---+ + + Example: + + * <<>> + + Exit Code: + + Returns 0 on success and -1 on error. + +lsr + + Usage: << >>> + + Recursive version of ls. Similar to Unix ls -R. + +mkdir + + Usage: << >>> + + Takes path uri's as argument and creates directories. With -p the behavior + is much like unix mkdir -p creating parent directories along the path. + + Example: + + * <<>> + + * <<>> + + Exit Code: + + Returns 0 on success and -1 on error. + +moveFromLocal + + Usage: << >>> + + Similar to put command, except that the source localsrc is deleted after + it's copied. + +moveToLocal + + Usage: << >>> + + Displays a "Not implemented yet" message. + +mv + + Usage: << >>> + + Moves files from source to destination. This command allows multiple sources + as well in which case the destination needs to be a directory. Moving files + across file systems is not permitted. + + Example: + + * <<>> + + * <<>> + + Exit Code: + + Returns 0 on success and -1 on error. + +put + + Usage: << ... >>> + + Copy single src, or multiple srcs from local file system to the destination + file system. Also reads input from stdin and writes to destination file + system. + + * <<>> + + * <<>> + + * <<>> + + * <<>> + Reads the input from stdin. + + Exit Code: + + Returns 0 on success and -1 on error. + +rm + + Usage: <<>> + + Delete files specified as args. Only deletes non empty directory and files. + If the -skipTrash option is specified, the trash, if enabled, will be + bypassed and the specified file(s) deleted immediately. This can be useful + when it is necessary to delete files from an over-quota directory. Refer to + rmr for recursive deletes. + + Example: + + * <<>> + + Exit Code: + + Returns 0 on success and -1 on error. + +rmr + + Usage: <<>> + + Recursive version of delete. If the -skipTrash option is specified, the + trash, if enabled, will be bypassed and the specified file(s) deleted + immediately. This can be useful when it is necessary to delete files from an + over-quota directory. + + Example: + + * <<>> + + * <<>> + + Exit Code: + + Returns 0 on success and -1 on error. + +setrep + + Usage: << >>> + + Changes the replication factor of a file. -R option is for recursively + increasing the replication factor of files within a directory. + + Example: + + * <<>> + + Exit Code: + + Returns 0 on success and -1 on error. + +stat + + Usage: <<>> + + Returns the stat information on the path. + + Example: + + * <<>> + + Exit Code: + Returns 0 on success and -1 on error. + +tail + + Usage: <<>> + + Displays last kilobyte of the file to stdout. -f option can be used as in + Unix. + + Example: + + * <<>> + + Exit Code: + Returns 0 on success and -1 on error. + +test + + Usage: <<>> + + Options: + +*----+------------+ +| -e | check to see if the file exists. Return 0 if true. +*----+------------+ +| -z | check to see if the file is zero length. Return 0 if true. +*----+------------+ +| -d | check to see if the path is directory. Return 0 if true. +*----+------------+ + + Example: + + * <<>> + +text + + Usage: << >>> + + Takes a source file and outputs the file in text format. The allowed formats + are zip and TextRecordInputStream. + +touchz + + Usage: <<>> + + Create a file of zero length. + + Example: + + * <<>> + + Exit Code: + Returns 0 on success and -1 on error. diff --git a/hadoop-common-project/hadoop-common/src/site/apt/HttpAuthentication.apt.vm b/hadoop-common-project/hadoop-common/src/site/apt/HttpAuthentication.apt.vm new file mode 100644 index 00000000000..e1d88fab2e5 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/apt/HttpAuthentication.apt.vm @@ -0,0 +1,99 @@ +~~ Licensed under the Apache License, Version 2.0 (the "License"); +~~ you may not use this file except in compliance with the License. +~~ You may obtain a copy of the License at +~~ +~~ http://www.apache.org/licenses/LICENSE-2.0 +~~ +~~ Unless required by applicable law or agreed to in writing, software +~~ distributed under the License is distributed on an "AS IS" BASIS, +~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +~~ See the License for the specific language governing permissions and +~~ limitations under the License. See accompanying LICENSE file. + + --- + Authentication for Hadoop HTTP web-consoles + --- + --- + ${maven.build.timestamp} + +Authentication for Hadoop HTTP web-consoles + +%{toc|section=1|fromDepth=0} + +* Introduction + + This document describes how to configure Hadoop HTTP web-consoles to + require user authentication. + + By default Hadoop HTTP web-consoles (JobTracker, NameNode, TaskTrackers + and DataNodes) allow access without any form of authentication. + + Similarly to Hadoop RPC, Hadoop HTTP web-consoles can be configured to + require Kerberos authentication using HTTP SPNEGO protocol (supported + by browsers like Firefox and Internet Explorer). + + In addition, Hadoop HTTP web-consoles support the equivalent of + Hadoop's Pseudo/Simple authentication. If this option is enabled, user + must specify their user name in the first browser interaction using the + user.name query string parameter. For example: + <<>>. + + If a custom authentication mechanism is required for the HTTP + web-consoles, it is possible to implement a plugin to support the + alternate authentication mechanism (refer to Hadoop hadoop-auth for details + on writing an <<>>). + + The next section describes how to configure Hadoop HTTP web-consoles to + require user authentication. + +* Configuration + + The following properties should be in the <<>> of all the + nodes in the cluster. + + <<>>: add to this property the + <<>> initializer + class. + + <<>>: Defines authentication used for the + HTTP web-consoles. The supported values are: <<>> | <<>> | + <<<#AUTHENTICATION_HANDLER_CLASSNAME#>>>. The dfeault value is <<>>. + + <<>>: Indicates how long (in + seconds) an authentication token is valid before it has to be renewed. + The default value is <<<36000>>>. + + <<>>: The signature secret + file for signing the authentication tokens. If not set a random secret is + generated at startup time. The same secret should be used for all nodes + in the cluster, JobTracker, NameNode, DataNode and TastTracker. The + default value is <<<${user.home}/hadoop-http-auth-signature-secret>>>. + IMPORTANT: This file should be readable only by the Unix user running the + daemons. + + <<>>: The domain to use for the + HTTP cookie that stores the authentication token. In order to + authentiation to work correctly across all nodes in the cluster the + domain must be correctly set. There is no default value, the HTTP + cookie will not have a domain working only with the hostname issuing + the HTTP cookie. + + IMPORTANT: when using IP addresses, browsers ignore cookies with domain + settings. For this setting to work properly all nodes in the cluster + must be configured to generate URLs with <<>> names on it. + + <<>>: Indicates if + anonymous requests are allowed when using 'simple' authentication. The + default value is <<>> + + <<>>: Indicates the Kerberos + principal to be used for HTTP endpoint when using 'kerberos' + authentication. The principal short name must be <<>> per Kerberos HTTP + SPNEGO specification. The default value is <<>>, + where <<<_HOST>>> -if present- is replaced with bind address of the HTTP + server. + + <<>>: Location of the keytab file + with the credentials for the Kerberos principal used for the HTTP + endpoint. The default value is <<<${user.home}/hadoop.keytab>>>.i + diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestDU.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestDU.java index de3d5566eb5..71024acd9e6 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestDU.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestDU.java @@ -103,4 +103,12 @@ public class TestDU extends TestCase { duSize >= writtenSize && writtenSize <= (duSize + slack)); } + public void testDUGetUsedWillNotReturnNegative() throws IOException { + File file = new File(DU_DIR, "data"); + assertTrue(file.createNewFile()); + DU du = new DU(file, 10000); + du.decDfsUsed(Long.MAX_VALUE); + long duSize = du.getUsed(); + assertTrue(String.valueOf(duSize), duSize >= 0L); + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestDelegationTokenRenewer.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestDelegationTokenRenewer.java index 86a580c5258..3f1d34e99b7 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestDelegationTokenRenewer.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestDelegationTokenRenewer.java @@ -4,6 +4,7 @@ import java.io.FileNotFoundException; import java.io.IOException; import java.net.URI; +import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import static org.mockito.Mockito.mock; @@ -133,6 +134,8 @@ public class TestDelegationTokenRenewer { InterruptedException { TestFileSystem tfs = new TestFileSystem(); renewer.addRenewAction(tfs); + assertEquals("FileSystem not added to DelegationTokenRenewer", 1, + renewer.getRenewQueueLength()); for (int i = 0; i < 60; i++) { Thread.sleep(RENEW_CYCLE); @@ -144,7 +147,8 @@ public class TestDelegationTokenRenewer { assertTrue("Token not renewed even after 1 minute", (tfs.testToken.renewCount > 0)); - assertTrue("Token not removed", (tfs.testToken.renewCount < MAX_RENEWALS)); + assertEquals("FileSystem not removed from DelegationTokenRenewer", 0, + renewer.getRenewQueueLength()); assertTrue("Token not cancelled", tfs.testToken.cancelled); } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFileStatus.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFileStatus.java index c6622d25890..e5380484f97 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFileStatus.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFileStatus.java @@ -25,18 +25,34 @@ import java.io.DataInput; import java.io.DataInputStream; import java.io.DataOutput; import java.io.DataOutputStream; +import java.io.IOException; import org.junit.Test; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; public class TestFileStatus { private static final Log LOG = LogFactory.getLog(TestFileStatus.class); + + /** Values for creating {@link FileStatus} in some tests */ + static final int LENGTH = 1; + static final int REPLICATION = 2; + static final long BLKSIZE = 3; + static final long MTIME = 4; + static final long ATIME = 5; + static final String OWNER = "owner"; + static final String GROUP = "group"; + static final FsPermission PERMISSION = FsPermission.valueOf("-rw-rw-rw-"); + static final Path PATH = new Path("path"); + /** + * Check that the write and readField methods work correctly. + */ @Test public void testFileStatusWritable() throws Exception { FileStatus[] tests = { @@ -68,4 +84,181 @@ public class TestFileStatus { iterator++; } } + + /** + * Check that the full parameter constructor works correctly. + */ + @Test + public void constructorFull() throws IOException { + boolean isdir = false; + Path symlink = new Path("symlink"); + FileStatus fileStatus = new FileStatus(LENGTH, isdir, REPLICATION, BLKSIZE, + MTIME, ATIME, PERMISSION, OWNER, GROUP, symlink, PATH); + + validateAccessors(fileStatus, LENGTH, isdir, REPLICATION, BLKSIZE, MTIME, + ATIME, PERMISSION, OWNER, GROUP, symlink, PATH); + } + + /** + * Check that the non-symlink constructor works correctly. + */ + @Test + public void constructorNoSymlink() throws IOException { + boolean isdir = true; + FileStatus fileStatus = new FileStatus(LENGTH, isdir, REPLICATION, BLKSIZE, + MTIME, ATIME, PERMISSION, OWNER, GROUP, PATH); + validateAccessors(fileStatus, LENGTH, isdir, REPLICATION, BLKSIZE, MTIME, + ATIME, PERMISSION, OWNER, GROUP, null, PATH); + } + + /** + * Check that the constructor without owner, group and permissions works + * correctly. + */ + @Test + public void constructorNoOwner() throws IOException { + boolean isdir = true; + FileStatus fileStatus = new FileStatus(LENGTH, isdir, + REPLICATION, BLKSIZE, MTIME, PATH); + validateAccessors(fileStatus, LENGTH, isdir, REPLICATION, BLKSIZE, MTIME, + 0, FsPermission.getDefault(), "", "", null, PATH); + } + + /** + * Check that the no parameter constructor works correctly. + */ + @Test + public void constructorBlank() throws IOException { + FileStatus fileStatus = new FileStatus(); + validateAccessors(fileStatus, 0, false, 0, 0, 0, + 0, FsPermission.getDefault(), "", "", null, null); + } + + /** + * Check that FileStatus are equal if their paths are equal. + */ + @Test + public void testEquals() { + Path path = new Path("path"); + FileStatus fileStatus1 = new FileStatus(1, true, 1, 1, 1, 1, + FsPermission.valueOf("-rw-rw-rw-"), "one", "one", null, path); + FileStatus fileStatus2 = new FileStatus(2, true, 2, 2, 2, 2, + FsPermission.valueOf("---x--x--x"), "two", "two", null, path); + assertEquals(fileStatus1, fileStatus2); + } + + /** + * Check that FileStatus are not equal if their paths are not equal. + */ + @Test + public void testNotEquals() { + Path path1 = new Path("path1"); + Path path2 = new Path("path2"); + FileStatus fileStatus1 = new FileStatus(1, true, 1, 1, 1, 1, + FsPermission.valueOf("-rw-rw-rw-"), "one", "one", null, path1); + FileStatus fileStatus2 = new FileStatus(1, true, 1, 1, 1, 1, + FsPermission.valueOf("-rw-rw-rw-"), "one", "one", null, path2); + assertFalse(fileStatus1.equals(fileStatus2)); + assertFalse(fileStatus2.equals(fileStatus1)); + } + + /** + * Check that toString produces the expected output for a file. + */ + @Test + public void toStringFile() throws IOException { + boolean isdir = false; + FileStatus fileStatus = new FileStatus(LENGTH, isdir, REPLICATION, BLKSIZE, + MTIME, ATIME, PERMISSION, OWNER, GROUP, null, PATH); + validateToString(fileStatus); + } + + /** + * Check that toString produces the expected output for a directory. + */ + @Test + public void toStringDir() throws IOException { + FileStatus fileStatus = new FileStatus(LENGTH, true, REPLICATION, BLKSIZE, + MTIME, ATIME, PERMISSION, OWNER, GROUP, null, PATH); + validateToString(fileStatus); + } + + /** + * Check that toString produces the expected output for a symlink. + */ + @Test + public void toStringSymlink() throws IOException { + boolean isdir = false; + Path symlink = new Path("symlink"); + FileStatus fileStatus = new FileStatus(LENGTH, isdir, REPLICATION, BLKSIZE, + MTIME, ATIME, PERMISSION, OWNER, GROUP, symlink, PATH); + validateToString(fileStatus); + } + + /** + * Validate the accessors for FileStatus. + * @param fileStatus FileStatus to checked + * @param length expected length + * @param isdir expected isDirectory + * @param replication expected replication + * @param blocksize expected blocksize + * @param mtime expected modification time + * @param atime expected access time + * @param permission expected permission + * @param owner expected owner + * @param group expected group + * @param symlink expected symlink + * @param path expected path + */ + private void validateAccessors(FileStatus fileStatus, + long length, boolean isdir, int replication, long blocksize, long mtime, + long atime, FsPermission permission, String owner, String group, + Path symlink, Path path) throws IOException { + + assertEquals(length, fileStatus.getLen()); + assertEquals(isdir, fileStatus.isDirectory()); + assertEquals(replication, fileStatus.getReplication()); + assertEquals(blocksize, fileStatus.getBlockSize()); + assertEquals(mtime, fileStatus.getModificationTime()); + assertEquals(atime, fileStatus.getAccessTime()); + assertEquals(permission, fileStatus.getPermission()); + assertEquals(owner, fileStatus.getOwner()); + assertEquals(group, fileStatus.getGroup()); + if(symlink == null) { + assertFalse(fileStatus.isSymlink()); + } else { + assertTrue(fileStatus.isSymlink()); + assertEquals(symlink, fileStatus.getSymlink()); + } + assertEquals(path, fileStatus.getPath()); + } + + /** + * Validates the toString method for FileStatus. + * @param fileStatus FileStatus to be validated + */ + private void validateToString(FileStatus fileStatus) throws IOException { + StringBuilder expected = new StringBuilder(); + expected.append("FileStatus{"); + expected.append("path=").append(fileStatus.getPath()).append("; "); + expected.append("isDirectory=").append(fileStatus.isDirectory()).append("; "); + if(!fileStatus.isDirectory()) { + expected.append("length=").append(fileStatus.getLen()).append("; "); + expected.append("replication=").append(fileStatus.getReplication()).append("; "); + expected.append("blocksize=").append(fileStatus.getBlockSize()).append("; "); + } + expected.append("modification_time=").append(fileStatus.getModificationTime()).append("; "); + expected.append("access_time=").append(fileStatus.getAccessTime()).append("; "); + expected.append("owner=").append(fileStatus.getOwner()).append("; "); + expected.append("group=").append(fileStatus.getGroup()).append("; "); + expected.append("permission=").append(fileStatus.getPermission()).append("; "); + if(fileStatus.isSymlink()) { + expected.append("isSymlink=").append(true).append("; "); + expected.append("symlink=").append(fileStatus.getSymlink()).append("}"); + } else { + expected.append("isSymlink=").append(false).append("}"); + } + + assertEquals(expected.toString(), fileStatus.toString()); + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFileSystemInitialization.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFileSystemInitialization.java new file mode 100644 index 00000000000..d3fceecdfad --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFileSystemInitialization.java @@ -0,0 +1,46 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs; + +import org.apache.hadoop.conf.Configuration; + +import java.io.IOException; +import java.net.URL; + +import org.junit.Test; +import static org.junit.Assert.*; + +public class TestFileSystemInitialization { + + /** + * Check if FileSystem can be properly initialized if URLStreamHandlerFactory + * is registered. + */ + @Test + public void testInitializationWithRegisteredStreamFactory() { + Configuration conf = new Configuration(); + URL.setURLStreamHandlerFactory(new FsUrlStreamHandlerFactory(conf)); + try { + FileSystem.getFileSystemClass("file", conf); + } + catch (IOException ok) { + // we might get an exception but this not related to infinite loop problem + assertFalse(false); + } + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellCopy.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellCopy.java index 5b4f585c19d..72df11e6df6 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellCopy.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellCopy.java @@ -357,6 +357,66 @@ public class TestFsShellCopy { assertEquals(0, exit); assertEquals("f1\ndf1\ndf2\ndf3\nf2\n", readFile("out")); } + + + @Test + public void testMoveFileFromLocal() throws Exception { + Path testRoot = new Path(testRootDir, "testPutFile"); + lfs.delete(testRoot, true); + lfs.mkdirs(testRoot); + + Path target = new Path(testRoot, "target"); + Path srcFile = new Path(testRoot, new Path("srcFile")); + lfs.createNewFile(srcFile); + + int exit = shell.run(new String[]{ + "-moveFromLocal", srcFile.toString(), target.toString() }); + assertEquals(0, exit); + assertFalse(lfs.exists(srcFile)); + assertTrue(lfs.exists(target)); + assertTrue(lfs.isFile(target)); + } + + @Test + public void testMoveDirFromLocal() throws Exception { + Path testRoot = new Path(testRootDir, "testPutDir"); + lfs.delete(testRoot, true); + lfs.mkdirs(testRoot); + + Path srcDir = new Path(testRoot, "srcDir"); + lfs.mkdirs(srcDir); + Path targetDir = new Path(testRoot, "target"); + + int exit = shell.run(new String[]{ + "-moveFromLocal", srcDir.toString(), targetDir.toString() }); + assertEquals(0, exit); + assertFalse(lfs.exists(srcDir)); + assertTrue(lfs.exists(targetDir)); + } + + @Test + public void testMoveDirFromLocalDestExists() throws Exception { + Path testRoot = new Path(testRootDir, "testPutDir"); + lfs.delete(testRoot, true); + lfs.mkdirs(testRoot); + + Path srcDir = new Path(testRoot, "srcDir"); + lfs.mkdirs(srcDir); + Path targetDir = new Path(testRoot, "target"); + lfs.mkdirs(targetDir); + + int exit = shell.run(new String[]{ + "-moveFromLocal", srcDir.toString(), targetDir.toString() }); + assertEquals(0, exit); + assertFalse(lfs.exists(srcDir)); + assertTrue(lfs.exists(new Path(targetDir, srcDir.getName()))); + + lfs.mkdirs(srcDir); + exit = shell.run(new String[]{ + "-moveFromLocal", srcDir.toString(), targetDir.toString() }); + assertEquals(1, exit); + assertTrue(lfs.exists(srcDir)); + } private void createFile(Path ... paths) throws IOException { for (Path path : paths) { diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalFileSystem.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalFileSystem.java index eb3d33df377..cf1d2df340c 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalFileSystem.java @@ -19,12 +19,14 @@ package org.apache.hadoop.fs; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem.Statistics; +import org.apache.hadoop.util.Shell; import static org.apache.hadoop.fs.FileSystemTestHelper.*; import java.io.*; import static org.junit.Assert.*; +import static org.junit.Assume.assumeTrue; import org.junit.Before; import org.junit.Test; @@ -262,6 +264,7 @@ public class TestLocalFileSystem { @Test public void testListStatusWithColons() throws IOException { + assumeTrue(!Shell.WINDOWS); Configuration conf = new Configuration(); LocalFileSystem fs = FileSystem.getLocal(conf); File colonFile = new File(TEST_ROOT_DIR, "foo:bar"); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestChRootedFs.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestChRootedFs.java index c52280154d6..f458ec14998 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestChRootedFs.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestChRootedFs.java @@ -25,6 +25,7 @@ import java.util.EnumSet; import static org.apache.hadoop.fs.FileContextTestHelper.*; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.AbstractFileSystem; import org.apache.hadoop.fs.CreateFlag; import org.apache.hadoop.fs.FileContext; import org.apache.hadoop.fs.FileContextTestHelper; @@ -36,6 +37,7 @@ import org.junit.After; import org.junit.Assert; import org.junit.Before; import org.junit.Test; +import org.mockito.Mockito; public class TestChRootedFs { FileContext fc; // The ChRoootedFs @@ -307,4 +309,21 @@ public class TestChRootedFs { fc.getDefaultFileSystem().resolvePath(new Path("/nonExisting")); } + @Test + public void testIsValidNameValidInBaseFs() throws Exception { + AbstractFileSystem baseFs = Mockito.spy(fc.getDefaultFileSystem()); + ChRootedFs chRootedFs = new ChRootedFs(baseFs, new Path("/chroot")); + Mockito.doReturn(true).when(baseFs).isValidName(Mockito.anyString()); + Assert.assertTrue(chRootedFs.isValidName("/test")); + Mockito.verify(baseFs).isValidName("/chroot/test"); + } + + @Test + public void testIsValidNameInvalidInBaseFs() throws Exception { + AbstractFileSystem baseFs = Mockito.spy(fc.getDefaultFileSystem()); + ChRootedFs chRootedFs = new ChRootedFs(baseFs, new Path("/chroot")); + Mockito.doReturn(false).when(baseFs).isValidName(Mockito.anyString()); + Assert.assertFalse(chRootedFs.isValidName("/test")); + Mockito.verify(baseFs).isValidName("/chroot/test"); + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/ViewFileSystemBaseTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/ViewFileSystemBaseTest.java index dd1fe6c5d13..f59e3ab4526 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/ViewFileSystemBaseTest.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/ViewFileSystemBaseTest.java @@ -662,4 +662,15 @@ public class ViewFileSystemBaseTest { public void testInternalSetOwner() throws IOException { fsView.setOwner(new Path("/internalDir"), "foo", "bar"); } + + @Test + public void testCreateNonRecursive() throws IOException { + Path path = FileSystemTestHelper.getTestRootPath(fsView, "/user/foo"); + fsView.createNonRecursive(path, false, 1024, (short)1, 1024L, null); + FileStatus status = fsView.getFileStatus(new Path("/user/foo")); + Assert.assertTrue("Created file should be type file", + fsView.isFile(new Path("/user/foo"))); + Assert.assertTrue("Target of created file should be type file", + fsTarget.isFile(new Path(targetTestRoot,"user/foo"))); + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestActiveStandbyElector.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestActiveStandbyElector.java index 2eba9671a34..c2dc23abccd 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestActiveStandbyElector.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestActiveStandbyElector.java @@ -42,6 +42,7 @@ import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.ha.ActiveStandbyElector.ActiveStandbyElectorCallback; import org.apache.hadoop.ha.ActiveStandbyElector.ActiveNotFoundException; import org.apache.hadoop.ha.HAZKUtil.ZKAuthInfo; +import org.apache.hadoop.test.GenericTestUtils; public class TestActiveStandbyElector { @@ -56,7 +57,8 @@ public class TestActiveStandbyElector { private int sleptFor = 0; ActiveStandbyElectorTester(String hostPort, int timeout, String parent, - List acl, ActiveStandbyElectorCallback app) throws IOException { + List acl, ActiveStandbyElectorCallback app) throws IOException, + KeeperException { super(hostPort, timeout, parent, acl, Collections.emptyList(), app); } @@ -83,7 +85,7 @@ public class TestActiveStandbyElector { ActiveStandbyElector.BREADCRUMB_FILENAME; @Before - public void init() throws IOException { + public void init() throws IOException, KeeperException { count = 0; mockZK = Mockito.mock(ZooKeeper.class); mockApp = Mockito.mock(ActiveStandbyElectorCallback.class); @@ -705,4 +707,18 @@ public class TestActiveStandbyElector { Mockito.eq(ZK_PARENT_NAME), Mockito.any(), Mockito.eq(Ids.OPEN_ACL_UNSAFE), Mockito.eq(CreateMode.PERSISTENT)); } + + /** + * verify the zookeeper connection establishment + */ + @Test + public void testWithoutZKServer() throws Exception { + try { + new ActiveStandbyElector("127.0.0.1", 2000, ZK_PARENT_NAME, + Ids.OPEN_ACL_UNSAFE, Collections. emptyList(), mockApp); + Assert.fail("Did not throw zookeeper connection loss exceptions!"); + } catch (KeeperException ke) { + GenericTestUtils.assertExceptionContains( "ConnectionLoss", ke); + } + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestIPC.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestIPC.java index a0d6de0e9a8..acbf32b021a 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestIPC.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestIPC.java @@ -68,6 +68,7 @@ public class TestIPC { * of the various writables. **/ static boolean WRITABLE_FAULTS_ENABLED = true; + static int WRITABLE_FAULTS_SLEEP = 0; static { Client.setPingInterval(conf, PING_INTERVAL); @@ -206,16 +207,27 @@ public class TestIPC { static void maybeThrowIOE() throws IOException { if (WRITABLE_FAULTS_ENABLED) { + maybeSleep(); throw new IOException("Injected fault"); } } static void maybeThrowRTE() { if (WRITABLE_FAULTS_ENABLED) { + maybeSleep(); throw new RuntimeException("Injected fault"); } } + private static void maybeSleep() { + if (WRITABLE_FAULTS_SLEEP > 0) { + try { + Thread.sleep(WRITABLE_FAULTS_SLEEP); + } catch (InterruptedException ie) { + } + } + } + @SuppressWarnings("unused") private static class IOEOnReadWritable extends LongWritable { public IOEOnReadWritable() {} @@ -370,6 +382,27 @@ public class TestIPC { RTEOnReadWritable.class); } + /** + * Test case that fails a write, but only after taking enough time + * that a ping should have been sent. This is a reproducer for a + * deadlock seen in one iteration of HADOOP-6762. + */ + @Test + public void testIOEOnWriteAfterPingClient() throws Exception { + // start server + Client.setPingInterval(conf, 100); + + try { + WRITABLE_FAULTS_SLEEP = 1000; + doErrorTest(IOEOnWriteWritable.class, + LongWritable.class, + LongWritable.class, + LongWritable.class); + } finally { + WRITABLE_FAULTS_SLEEP = 0; + } + } + private static void assertExceptionContains( Throwable t, String substring) { String msg = StringUtils.stringifyException(t); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestRPC.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestRPC.java index 745eb792842..a4e915a30a3 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestRPC.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestRPC.java @@ -38,6 +38,10 @@ import java.net.ConnectException; import java.net.InetAddress; import java.net.InetSocketAddress; import java.util.Arrays; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.CyclicBarrier; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; import javax.net.SocketFactory; @@ -823,6 +827,96 @@ public class TestRPC { } } + @Test(timeout=90000) + public void testRPCInterruptedSimple() throws Exception { + final Configuration conf = new Configuration(); + Server server = RPC.getServer( + TestProtocol.class, new TestImpl(), ADDRESS, 0, 5, true, conf, null + ); + server.start(); + InetSocketAddress addr = NetUtils.getConnectAddress(server); + + final TestProtocol proxy = (TestProtocol) RPC.getProxy( + TestProtocol.class, TestProtocol.versionID, addr, conf); + // Connect to the server + proxy.ping(); + // Interrupt self, try another call + Thread.currentThread().interrupt(); + try { + proxy.ping(); + fail("Interruption did not cause IPC to fail"); + } catch (IOException ioe) { + if (!ioe.toString().contains("InterruptedException")) { + throw ioe; + } + // clear interrupt status for future tests + Thread.interrupted(); + } + } + + @Test(timeout=30000) + public void testRPCInterrupted() throws IOException, InterruptedException { + final Configuration conf = new Configuration(); + Server server = RPC.getServer( + TestProtocol.class, new TestImpl(), ADDRESS, 0, 5, true, conf, null + ); + + server.start(); + + int numConcurrentRPC = 200; + InetSocketAddress addr = NetUtils.getConnectAddress(server); + final CyclicBarrier barrier = new CyclicBarrier(numConcurrentRPC); + final CountDownLatch latch = new CountDownLatch(numConcurrentRPC); + final AtomicBoolean leaderRunning = new AtomicBoolean(true); + final AtomicReference error = new AtomicReference(); + Thread leaderThread = null; + + for (int i = 0; i < numConcurrentRPC; i++) { + final int num = i; + final TestProtocol proxy = (TestProtocol) RPC.getProxy( + TestProtocol.class, TestProtocol.versionID, addr, conf); + Thread rpcThread = new Thread(new Runnable() { + @Override + public void run() { + try { + barrier.await(); + while (num == 0 || leaderRunning.get()) { + proxy.slowPing(false); + } + + proxy.slowPing(false); + } catch (Exception e) { + if (num == 0) { + leaderRunning.set(false); + } else { + error.set(e); + } + + LOG.error(e); + } finally { + latch.countDown(); + } + } + }); + rpcThread.start(); + + if (leaderThread == null) { + leaderThread = rpcThread; + } + } + // let threads get past the barrier + Thread.sleep(1000); + // stop a single thread + while (leaderRunning.get()) { + leaderThread.interrupt(); + } + + latch.await(); + + // should not cause any other thread to get an error + assertTrue("rpc got exception " + error.get(), error.get() == null); + } + public static void main(String[] args) throws Exception { new TestRPC().testCallsInternal(conf); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestProxyUserFromEnv.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestProxyUserFromEnv.java new file mode 100644 index 00000000000..b83f91b3065 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestProxyUserFromEnv.java @@ -0,0 +1,47 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package org.apache.hadoop.security; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; + +import org.junit.Test; + +public class TestProxyUserFromEnv { + /** Test HADOOP_PROXY_USER for impersonation */ + @Test + public void testProxyUserFromEnvironment() throws IOException { + String proxyUser = "foo.bar"; + System.setProperty(UserGroupInformation.HADOOP_PROXY_USER, proxyUser); + UserGroupInformation ugi = UserGroupInformation.getLoginUser(); + assertEquals(proxyUser, ugi.getUserName()); + + UserGroupInformation realUgi = ugi.getRealUser(); + assertNotNull(realUgi); + // get the expected real user name + Process pp = Runtime.getRuntime().exec("whoami"); + BufferedReader br = new BufferedReader + (new InputStreamReader(pp.getInputStream())); + String realUser = br.readLine().trim(); + assertEquals(realUser, realUgi.getUserName()); + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestNativeLibraryChecker.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestNativeLibraryChecker.java new file mode 100644 index 00000000000..c41b56768b6 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestNativeLibraryChecker.java @@ -0,0 +1,54 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.util; + +import junit.framework.TestCase; + +import org.apache.hadoop.util.ExitUtil.ExitException; +import org.junit.Test; + +public class TestNativeLibraryChecker extends TestCase { + private void expectExit(String [] args) { + try { + // should throw exit exception + NativeLibraryChecker.main(args); + fail("should call exit"); + } catch (ExitException e) { + // pass + ExitUtil.resetFirstExitException(); + } + } + + @Test + public void testNativeLibraryChecker() { + ExitUtil.disableSystemExit(); + // help should return normally + NativeLibraryChecker.main(new String[] {"-h"}); + // illegal argmuments should exit + expectExit(new String[] {"-a", "-h"}); + expectExit(new String[] {"aaa"}); + if (NativeCodeLoader.isNativeCodeLoaded()) { + // no argument should return normally + NativeLibraryChecker.main(new String[0]); + } else { + // no argument should exit + expectExit(new String[0]); + } + } + +} diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/pom.xml b/hadoop-hdfs-project/hadoop-hdfs-httpfs/pom.xml index c5fd8f05f2f..ec02b61d1be 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/pom.xml +++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/pom.xml @@ -34,7 +34,7 @@ Apache Hadoop HttpFS - 6.0.32 + 6.0.36 REPO NOT AVAIL REPO NOT AVAIL REVISION NOT AVAIL @@ -531,7 +531,7 @@ + dest="downloads/apache-tomcat-${tomcat.version}.tar.gz" verbose="true" skipexisting="true"/> @@ -545,7 +545,7 @@ BUILD_DIR=`cygpath --unix '${project.build.directory}'` fi cd $BUILD_DIR/tomcat.exp - tar xzf ${basedir}/downloads/tomcat.tar.gz + tar xzf ${basedir}/downloads/apache-tomcat-${tomcat.version}.tar.gz diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index c89eac3723a..c9bf83cc0c6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -20,6 +20,8 @@ Trunk (Unreleased) HDFS-3495. Update Balancer to support new NetworkTopology with NodeGroup. (Junping Du via szetszwo) + HDFS-4296. Reserve layout version for release 1.2.0. (suresh) + IMPROVEMENTS HDFS-1620. Rename HdfsConstants -> HdfsServerConstants, FSConstants -> @@ -269,6 +271,21 @@ Trunk (Unreleased) HDFS-4260 Fix HDFS tests to set test dir to a valid HDFS path as opposed to the local build path (Chri Nauroth via Sanjay) + HDFS-4269. Datanode rejects all datanode registrations from localhost + in single-node developer setup on Windows. (Chris Nauroth via suresh) + + HADOOP-8957 HDFS tests for AbstractFileSystem#IsValidName should be overridden for + embedded file systems like ViewFs (Chris Nauroth via Sanjay Radia) + + HDFS-4310. fix test org.apache.hadoop.hdfs.server.datanode. + TestStartSecureDataNode (Ivan A. Veselovsky via atm) + + HDFS-4274. BlockPoolSliceScanner does not close verification log during + shutdown. (Chris Nauroth via suresh) + + HDFS-4275. MiniDFSCluster-based tests fail on Windows due to failure + to delete test namenode directory. (Chris Nauroth via suresh) + Release 2.0.3-alpha - Unreleased INCOMPATIBLE CHANGES @@ -410,6 +427,11 @@ Release 2.0.3-alpha - Unreleased HDFS-3680. Allow customized audit logging in HDFS FSNamesystem. (Marcelo Vanzin via atm) + HDFS-4130. BKJM: The reading for editlog at NN starting using bkjm is not efficient. + (Han Xiao via umamahesh) + + HDFS-4326. bump up Tomcat version for HttpFS to 6.0.36. (tucu via acmurthy) + OPTIMIZATIONS BUG FIXES @@ -581,6 +603,30 @@ Release 2.0.3-alpha - Unreleased HDFS-4279. NameNode does not initialize generic conf keys when started with -recover. (Colin Patrick McCabe via atm) + HDFS-4291. edit log unit tests leave stray test_edit_log_file around + (Colin Patrick McCabe via todd) + + HDFS-4292. Sanity check not correct in RemoteBlockReader2.newBlockReader + (Binglin Chang via todd) + + HDFS-4295. Using port 1023 should be valid when starting Secure DataNode + (Stephen Chu via todd) + + HDFS-4294. Backwards compatibility is not maintained for TestVolumeId. + (Ivan A. Veselovsky and Robert Parker via atm) + + HDFS-2264. NamenodeProtocol has the wrong value for clientPrincipal in + KerberosInfo annotation. (atm) + + HDFS-4307. SocketCache should use monotonic time. (Colin Patrick McCabe + via atm) + + HDFS-4315. DNs with multiple BPs can have BPOfferServices fail to start + due to unsynchronized map access. (atm) + + HDFS-4140. fuse-dfs handles open(O_TRUNC) poorly. (Colin Patrick McCabe + via atm) + BREAKDOWN OF HDFS-3077 SUBTASKS HDFS-3077. Quorum-based protocol for reading and writing edit logs. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/BookKeeperJournalManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/BookKeeperJournalManager.java index 6d9a65d2821..5d1814233f8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/BookKeeperJournalManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/BookKeeperJournalManager.java @@ -500,16 +500,18 @@ public class BookKeeperJournalManager implements JournalManager { } } - EditLogInputStream getInputStream(long fromTxId, boolean inProgressOk) - throws IOException { - for (EditLogLedgerMetadata l : getLedgerList(inProgressOk)) { - long lastTxId = l.getLastTxId(); - if (l.isInProgress()) { - lastTxId = recoverLastTxId(l, false); - } - - if (fromTxId >= l.getFirstTxId() && fromTxId <= lastTxId) { - try { + @Override + public void selectInputStreams(Collection streams, + long fromTxId, boolean inProgressOk) throws IOException { + List currentLedgerList = getLedgerList(inProgressOk); + try { + BookKeeperEditLogInputStream elis = null; + for (EditLogLedgerMetadata l : currentLedgerList) { + long lastTxId = l.getLastTxId(); + if (l.isInProgress()) { + lastTxId = recoverLastTxId(l, false); + } + if (fromTxId >= l.getFirstTxId() && fromTxId <= lastTxId) { LedgerHandle h; if (l.isInProgress()) { // we don't want to fence the current journal h = bkc.openLedgerNoRecovery(l.getLedgerId(), @@ -518,42 +520,22 @@ public class BookKeeperJournalManager implements JournalManager { h = bkc.openLedger(l.getLedgerId(), BookKeeper.DigestType.MAC, digestpw.getBytes()); } - BookKeeperEditLogInputStream s = new BookKeeperEditLogInputStream(h, - l); - s.skipTo(fromTxId); - return s; - } catch (BKException e) { - throw new IOException("Could not open ledger for " + fromTxId, e); - } catch (InterruptedException ie) { - Thread.currentThread().interrupt(); - throw new IOException("Interrupted opening ledger for " - + fromTxId, ie); + elis = new BookKeeperEditLogInputStream(h, l); + elis.skipTo(fromTxId); + } else { + return; } + streams.add(elis); + if (elis.getLastTxId() == HdfsConstants.INVALID_TXID) { + return; + } + fromTxId = elis.getLastTxId() + 1; } - } - return null; - } - - @Override - public void selectInputStreams(Collection streams, - long fromTxId, boolean inProgressOk) { - // NOTE: could probably be rewritten more efficiently - while (true) { - EditLogInputStream elis; - try { - elis = getInputStream(fromTxId, inProgressOk); - } catch (IOException e) { - LOG.error(e); - return; - } - if (elis == null) { - return; - } - streams.add(elis); - if (elis.getLastTxId() == HdfsConstants.INVALID_TXID) { - return; - } - fromTxId = elis.getLastTxId() + 1; + } catch (BKException e) { + throw new IOException("Could not open ledger for " + fromTxId, e); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + throw new IOException("Interrupted opening ledger for " + fromTxId, ie); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/test/java/org/apache/hadoop/contrib/bkjournal/TestBookKeeperJournalManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/test/java/org/apache/hadoop/contrib/bkjournal/TestBookKeeperJournalManager.java index 9da904007d3..e4c7e87f91d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/test/java/org/apache/hadoop/contrib/bkjournal/TestBookKeeperJournalManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/test/java/org/apache/hadoop/contrib/bkjournal/TestBookKeeperJournalManager.java @@ -28,6 +28,7 @@ import org.mockito.Mockito; import java.io.IOException; import java.net.URI; +import java.util.ArrayList; import java.util.List; import java.util.ArrayList; import java.util.Random; @@ -315,13 +316,13 @@ public class TestBookKeeperJournalManager { out.close(); bkjm.finalizeLogSegment(1, numTransactions); - - EditLogInputStream in = bkjm.getInputStream(1, true); + List in = new ArrayList(); + bkjm.selectInputStreams(in, 1, true); try { assertEquals(numTransactions, - FSEditLogTestUtil.countTransactionsInStream(in)); + FSEditLogTestUtil.countTransactionsInStream(in.get(0))); } finally { - in.close(); + in.get(0).close(); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/content/xdocs/hdfs_design.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/content/xdocs/hdfs_design.xml deleted file mode 100644 index 28a997ce61b..00000000000 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/content/xdocs/hdfs_design.xml +++ /dev/null @@ -1,536 +0,0 @@ - - - - - - - - -
- - HDFS Architecture Guide - - - - -
- - -
- Introduction -

- The Hadoop Distributed File System (HDFS) is a distributed file system - designed to run on commodity hardware. It has many similarities with existing distributed file systems. However, the differences from - other distributed file systems are significant. HDFS is highly fault-tolerant and is designed to be deployed on low-cost hardware. - HDFS provides high throughput access to application data and is suitable for applications that have large data sets. HDFS relaxes - a few POSIX requirements to enable streaming access to file system data. HDFS was originally built as infrastructure for the - Apache Nutch web search engine project. HDFS is now an Apache Hadoop subproject. - The project URL is http://hadoop.apache.org/hdfs/. -

-
- -
- Assumptions and Goals - -
- Hardware Failure -

- Hardware failure is the norm rather than the exception. An HDFS instance may consist of hundreds or thousands of server machines, - each storing part of the file system’s data. The fact that there are a huge number of components and that each component has - a non-trivial probability of failure means that some component of HDFS is always non-functional. Therefore, detection of faults and quick, - automatic recovery from them is a core architectural goal of HDFS. -

-
- - -
- Streaming Data Access -

- Applications that run on HDFS need streaming access to their data sets. They are not general purpose applications that typically run - on general purpose file systems. HDFS is designed more for batch processing rather than interactive use by users. The emphasis is on - high throughput of data access rather than low latency of data access. POSIX imposes many hard requirements that are not needed for - applications that are targeted for HDFS. POSIX semantics in a few key areas has been traded to increase data throughput rates. -

-
- -
- Large Data Sets -

- Applications that run on HDFS have large data sets. A typical file in HDFS is gigabytes to terabytes in size. Thus, HDFS is tuned to - support large files. It should provide high aggregate data bandwidth and scale to thousands of nodes in a single cluster. It should support - tens of millions of files in a single instance. -

-
- - -
- Appending-Writes and File Syncs -

- Most HDFS applications need a write-once-read-many access model for files. HDFS provides two additional advanced features: hflush and - append. Hflush makes the last block of an unclosed file visible to readers while providing read consistency and data durability. Append - provides a mechanism for opening a closed file to add additional data. -

-

- For complete details of the hflush and append design, see the - Append/Hflush/Read Design document (PDF). -

-
- - -
- “Moving Computation is Cheaper than Moving Data” -

- A computation requested by an application is much more efficient if it is executed near the data it operates on. This is especially true - when the size of the data set is huge. This minimizes network congestion and increases the overall throughput of the system. The - assumption is that it is often better to migrate the computation closer to where the data is located rather than moving the data to where - the application is running. HDFS provides interfaces for applications to move themselves closer to where the data is located. -

-
- - -
- Portability Across Heterogeneous Hardware and Software Platforms -

- HDFS has been designed to be easily portable from one platform to another. This facilitates widespread adoption of HDFS as a - platform of choice for a large set of applications. -

-
-
- - -
- NameNode and DataNodes -

- HDFS has a master/slave architecture. An HDFS cluster consists of a single NameNode, a master server that manages the file - system namespace and regulates access to files by clients. In addition, there are a number of DataNodes, usually one per node - in the cluster, which manage storage attached to the nodes that they run on. HDFS exposes a file system namespace and allows - user data to be stored in files. Internally, a file is split into one or more blocks and these blocks are stored in a set of DataNodes. - The NameNode executes file system namespace operations like opening, closing, and renaming files and directories. It also - determines the mapping of blocks to DataNodes. The DataNodes are responsible for serving read and write requests from the file - system’s clients. The DataNodes also perform block creation, deletion, and replication upon instruction from the NameNode. -

- -
- -

- The NameNode and DataNode are pieces of software designed to run on commodity machines. These machines typically run a - GNU/Linux operating system (OS). HDFS is built using the Java language; any - machine that supports Java can run the NameNode or the DataNode software. Usage of the highly portable Java language means - that HDFS can be deployed on a wide range of machines. A typical deployment has a dedicated machine that runs only the - NameNode software. Each of the other machines in the cluster runs one instance of the DataNode software. The architecture - does not preclude running multiple DataNodes on the same machine but in a real deployment that is rarely the case. -

-

- The existence of a single NameNode in a cluster greatly simplifies the architecture of the system. The NameNode is the arbitrator - and repository for all HDFS metadata. The system is designed in such a way that user data never flows through the NameNode. -

-
- - - -
- The File System Namespace -

- HDFS supports a traditional hierarchical file organization. A user or an application can create directories and store files inside - these directories. The file system namespace hierarchy is similar to most other existing file systems; one can create and - remove files, move a file from one directory to another, or rename a file. HDFS implements user quotas for number of names and - amount of data stored in a particular directory (See - HDFS Quota Admin Guide). In addition, HDFS - supports symbolic links. -

-

- The NameNode maintains the file system namespace. Any change to the file system namespace or its properties is - recorded by the NameNode. An application can specify the number of replicas of a file that should be maintained by - HDFS. The number of copies of a file is called the replication factor of that file. This information is stored by the NameNode. -

-
- - - -
- Data Replication -

- HDFS is designed to reliably store very large files across machines in a large cluster. It stores each file as a sequence - of blocks; all blocks in a file except the last block are the same size. The blocks of a file are replicated for fault tolerance. - The block size and replication factor are configurable per file. An application can specify the number of replicas of a file. - The replication factor can be specified at file creation time and can be changed later. Files in HDFS are strictly one writer at any - time. -

-

- The NameNode makes all decisions regarding replication of blocks. It periodically receives a Heartbeat and a Blockreport - from each of the DataNodes in the cluster. Receipt of a Heartbeat implies that the DataNode is functioning properly. A - Blockreport contains a list of all blocks on a DataNode. -

-
- -
- Replica Placement: The First Baby Steps -

- The placement of replicas is critical to HDFS reliability and performance. Optimizing replica placement distinguishes - HDFS from most other distributed file systems. This is a feature that needs lots of tuning and experience. The purpose - of a rack-aware replica placement policy is to improve data reliability, availability, and network bandwidth utilization. - The current implementation for the replica placement policy is a first effort in this direction. The short-term goals of - implementing this policy are to validate it on production systems, learn more about its behavior, and build a foundation - to test and research more sophisticated policies. -

-

- Large HDFS instances run on a cluster of computers that commonly spread across many racks. Communication - between two nodes in different racks has to go through switches. In most cases, network bandwidth between machines - in the same rack is greater than network bandwidth between machines in different racks. -

-

- The NameNode determines the rack id each DataNode belongs to via the process outlined in - Hadoop Rack Awareness. - A simple but non-optimal policy is to place replicas on unique racks. This prevents losing data when an entire rack - fails and allows use of bandwidth from multiple racks when reading data. This policy evenly distributes replicas in - the cluster which makes it easy to balance load on component failure. However, this policy increases the cost of - writes because a write needs to transfer blocks to multiple racks. -

-

- For the common case, when the replication factor is three, HDFS’s placement policy is to put one replica - on one node in the local rack, another on a node in a different (remote) rack, and the last on a different node in the - same remote rack. This policy cuts the inter-rack write traffic which generally improves write performance. The - chance of rack failure is far less than that of node failure; this policy does not impact data reliability and availability - guarantees. However, it does reduce the aggregate network bandwidth used when reading data since a block is - placed in only two unique racks rather than three. With this policy, the replicas of a file do not evenly distribute - across the racks. One third of replicas are on one node, two thirds of replicas are on one rack, and the other third - are evenly distributed across the remaining racks. This policy improves write performance without compromising - data reliability or read performance. -

-

- In addition to the default placement policy described above, HDFS also provides a pluggable interface for block placement. See - BlockPlacementPolicy. -

-
- -
- Replica Selection -

- To minimize global bandwidth consumption and read latency, HDFS tries to satisfy a read request from a replica - that is closest to the reader. If there exists a replica on the same rack as the reader node, then that replica is - preferred to satisfy the read request. If an HDFS cluster spans multiple data centers, then a replica that is - resident in the local data center is preferred over any remote replica. -

-
- -
- Safemode -

- On startup, the NameNode enters a special state called Safemode. Replication of data blocks does not occur - when the NameNode is in the Safemode state. The NameNode receives Heartbeat and Blockreport messages - from the DataNodes. A Blockreport contains the list of data blocks that a DataNode is hosting. Each block - has a specified minimum number of replicas. A block is considered safely replicated when the minimum number - of replicas of that data block has checked in with the NameNode. After a configurable percentage of safely - replicated data blocks checks in with the NameNode (plus an additional 30 seconds), the NameNode exits - the Safemode state. It then determines the list of data blocks (if any) that still have fewer than the specified - number of replicas. The NameNode then replicates these blocks to other DataNodes. -

-
- -
- -
- The Persistence of File System Metadata -

- The HDFS namespace is stored by the NameNode. The NameNode uses a transaction log called the EditLog - to persistently record every change that occurs to file system metadata. For example, creating a new file in - HDFS causes the NameNode to insert a record into the EditLog indicating this. Similarly, changing the - replication factor of a file causes a new record to be inserted into the EditLog. The NameNode uses a file - in its local host OS file system to store the EditLog. The entire file system namespace, including the mapping - of blocks to files and file system properties, is stored in a file called the FsImage. The FsImage is stored as - a file in the NameNode’s local file system too. -

-

- The NameNode keeps an image of the entire file system namespace and file Blockmap in memory. This key - metadata item is designed to be compact, such that a NameNode with 4 GB of RAM is plenty to support a - huge number of files and directories. When the NameNode starts up, it reads the FsImage and EditLog from - disk, applies all the transactions from the EditLog to the in-memory representation of the FsImage, and flushes - out this new version into a new FsImage on disk. It can then truncate the old EditLog because its transactions - have been applied to the persistent FsImage. This process is called a checkpoint. The - Checkpoint Node is a - separate daemon that can be configured to periodically build checkpoints from the FsImage and EditLog which are - uploaded to the NameNode. The - Backup Node builds - checkpoints like the Checkpoint Node and also maintains an up-to-date copy of the FsImage in memory. -

-

- The DataNode stores HDFS data in files in its local file system. The DataNode has no knowledge about HDFS files. - It stores each block of HDFS data in a separate file in its local file system. The DataNode does not create all files - in the same directory. Instead, it uses a heuristic to determine the optimal number of files per directory and creates - subdirectories appropriately. It is not optimal to create all local files in the same directory because the local file - system might not be able to efficiently support a huge number of files in a single directory. When a DataNode starts - up, it scans through its local file system, generates a list of all HDFS data blocks that correspond to each of these - local files and sends this report to the NameNode: this is the Blockreport. -

-
- - -
- The Communication Protocols -

- All HDFS communication protocols are layered on top of the TCP/IP protocol. A client establishes a connection to - a configurable TCP port on the NameNode machine. - It talks the ClientProtocol with the NameNode. The DataNodes talk to the NameNode using the DataNode Protocol. - A Remote Procedure Call (RPC) abstraction wraps both the - Client Protocol and the DataNode Protocol. By design, the NameNode never initiates any RPCs. Instead, it only - responds to RPC requests issued by DataNodes or clients. -

-
- - -
- Robustness -

- The primary objective of HDFS is to store data reliably even in the presence of failures. The three common types - of failures are NameNode failures, DataNode failures and network partitions. -

- -
- Data Disk Failure, Heartbeats and Re-Replication -

- Each DataNode sends a Heartbeat message to the NameNode periodically. A network partition can cause a - subset of DataNodes to lose connectivity with the NameNode. The NameNode detects this condition by the - absence of a Heartbeat message. The NameNode marks DataNodes without recent Heartbeats as dead and - does not forward any new IO requests to them. Any data that was - registered to a dead DataNode is not available to HDFS any more. DataNode death may cause the replication - factor of some blocks to fall below their specified value. The NameNode constantly tracks which blocks need - to be replicated and initiates replication whenever necessary. The necessity for re-replication may arise due - to many reasons: a DataNode may become unavailable, a replica may become corrupted, a hard disk on a - DataNode may fail, or the replication factor of a file may be increased. -

-
- -
- Cluster Rebalancing -

- The HDFS architecture is compatible with data rebalancing schemes. A scheme might automatically move - data from one DataNode to another if the free space on a DataNode falls below a certain threshold. In the - event of a sudden high demand for a particular file, a scheme might dynamically create additional replicas - and rebalance other data in the cluster. These types of data rebalancing schemes are not yet implemented. -

-
- -
- Data Integrity -

- - It is possible that a block of data fetched from a DataNode arrives corrupted. This corruption can occur - because of faults in a storage device, network faults, or buggy software. The HDFS client software - implements checksum checking on the contents of HDFS files. When a client creates an HDFS file, - it computes a checksum of each block of the file and stores these checksums in a separate hidden - file in the same HDFS namespace. When a client retrieves file contents it verifies that the data it - received from each DataNode matches the checksum stored in the associated checksum file. If not, - then the client can opt to retrieve that block from another DataNode that has a replica of that block. -

-
- - -
- Metadata Disk Failure -

- The FsImage and the EditLog are central data structures of HDFS. A corruption of these files can - cause the HDFS instance to be non-functional. For this reason, the NameNode can be configured - to support maintaining multiple copies of the FsImage and EditLog. Any update to either the FsImage - or EditLog causes each of the FsImages and EditLogs to get updated synchronously. This - synchronous updating of multiple copies of the FsImage and EditLog may degrade the rate of - namespace transactions per second that a NameNode can support. However, this degradation is - acceptable because even though HDFS applications are very data intensive in nature, they are not - metadata intensive. When a NameNode restarts, it selects the latest consistent FsImage and EditLog to use. -

-

- The NameNode machine is a single point of failure for an HDFS cluster. If the NameNode machine fails, - manual intervention is necessary. Currently, automatic restart and failover of the NameNode software to - another machine is not supported. -

-
- -
- Snapshots -

- Snapshots support storing a copy of data at a particular instant of time. One usage of the snapshot - feature may be to roll back a corrupted HDFS instance to a previously known good point in time. - HDFS does not currently support snapshots but will in a future release. -

-
- -
- - -
- - Data Organization - -
- Data Blocks -

- HDFS is designed to support very large files. Applications that are compatible with HDFS are those - that deal with large data sets. These applications write their data only once but they read it one or - more times and require these reads to be satisfied at streaming speeds. HDFS supports - write-once-read-many semantics on files. A typical block size used by HDFS is 64 MB. Thus, - an HDFS file is chopped up into 64 MB chunks, and if possible, each chunk will reside on a different DataNode. -

-
- -
- Replication Pipelining -

- When a client is writing data to an HDFS file with a replication factor of 3, the NameNode retrieves a list of DataNodes using a replication target choosing algorithm. - This list contains the DataNodes that will host a replica of that block. The client then writes to the first DataNode. The first DataNode starts receiving the data in small portions (64 KB, configurable), - writes each portion to its local repository and transfers that portion to the second DataNode in the list. - The second DataNode, in turn starts receiving each portion of the data block, writes that portion to its - repository and then flushes that portion to the third DataNode. Finally, the third DataNode writes the - data to its local repository. Thus, a DataNode can be receiving data from the previous one in the pipeline - and at the same time forwarding data to the next one in the pipeline. Thus, the data is pipelined from - one DataNode to the next. -

-
- -
- -
- - Accessibility - -

- HDFS can be accessed from applications in many different ways. Natively, HDFS provides a - Java API for applications to - use. A C language wrapper for this Java API is also available. In addition, an HTTP browser - can also be used to browse the files of an HDFS instance. Work is in progress to expose - HDFS through the WebDAV protocol. -

- -
- FS Shell -

- HDFS allows user data to be organized in the form of files and directories. It provides a commandline - interface called FS shell that lets a user interact with the data in HDFS. The syntax of this command - set is similar to other shells (e.g. bash, csh) that users are already familiar with. Here are some sample - action/command pairs: -

- - - - - - - - - - - - - - - - -
Action Command
Create a directory named /foodir bin/hadoop dfs -mkdir /foodir
Remove a directory named /foodir bin/hadoop dfs -rmr /foodir
View the contents of a file named /foodir/myfile.txt bin/hadoop dfs -cat /foodir/myfile.txt
-

- FS shell is targeted for applications that need a scripting language to interact with the stored data. -

-
- -
- DFSAdmin -

- The DFSAdmin command set is used for administering an HDFS cluster. These are commands that are - used only by an HDFS administrator. Here are some sample action/command pairs: -

- - - - - - - - - - - - - - -
Action Command
Put the cluster in Safemode bin/hadoop dfsadmin -safemode enter
Generate a list of DataNodes bin/hadoop dfsadmin -report
Recommission or decommission DataNode(s) bin/hadoop dfsadmin -refreshNodes
-
- -
- Browser Interface -

- A typical HDFS install configures a web server to expose the HDFS namespace through - a configurable TCP port. This allows a user to navigate the HDFS namespace and view - the contents of its files using a web browser. -

-
- -
- -
- Space Reclamation - -
- File Deletes and Undeletes -

- When a file is deleted by a user or an application, it is not immediately removed from HDFS. Instead, - HDFS first renames it to a file in the /trash directory. The file can be restored quickly - as long as it remains in /trash. A file remains in /trash for a configurable - amount of time. After the expiry of its life in /trash, the NameNode deletes the file from - the HDFS namespace. The deletion of a file causes the blocks associated with the file to be freed. - Note that there could be an appreciable time delay between the time a file is deleted by a user and - the time of the corresponding increase in free space in HDFS. -

-

- A user can Undelete a file after deleting it as long as it remains in the /trash directory. - If a user wants to undelete a file that he/she has deleted, he/she can navigate the /trash - directory and retrieve the file. The /trash directory contains only the latest copy of the file - that was deleted. The /trash directory is just like any other directory with one special - feature: HDFS applies specified policies to automatically delete files from this directory. - By default, the trash feature is disabled. It can be enabled by setting the fs.trash.interval property in core-site.xml to a non-zero value (set as minutes of retention required). The property needs to exist on both client and server side configurations. -

-
- -
- Decrease Replication Factor -

- When the replication factor of a file is reduced, the NameNode selects excess replicas that can be deleted. - The next Heartbeat transfers this information to the DataNode. The DataNode then removes the corresponding - blocks and the corresponding free space appears in the cluster. Once again, there might be a time delay - between the completion of the setReplication API call and the appearance of free space in the cluster. -

-
-
- - -
- References -

- HDFS Java API: - - http://hadoop.apache.org/core/docs/current/api/ - -

-

- HDFS source code: - - http://hadoop.apache.org/hdfs/version_control.html - -

-
- - -
- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader.java index 7a95626afd2..2bcd96e7644 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader.java @@ -404,7 +404,7 @@ public class RemoteBlockReader extends FSInputChecker implements BlockReader { long firstChunkOffset = checksumInfo.getChunkOffset(); if ( firstChunkOffset < 0 || firstChunkOffset > startOffset || - firstChunkOffset >= (startOffset + checksum.getBytesPerChecksum())) { + firstChunkOffset <= (startOffset - checksum.getBytesPerChecksum())) { throw new IOException("BlockReader: error in first chunk offset (" + firstChunkOffset + ") startOffset is " + startOffset + " for file " + file); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader2.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader2.java index c24a59b87dd..b9a5c76ec31 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader2.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader2.java @@ -413,7 +413,7 @@ public class RemoteBlockReader2 implements BlockReader { long firstChunkOffset = checksumInfo.getChunkOffset(); if ( firstChunkOffset < 0 || firstChunkOffset > startOffset || - firstChunkOffset >= (startOffset + checksum.getBytesPerChecksum())) { + firstChunkOffset <= (startOffset - checksum.getBytesPerChecksum())) { throw new IOException("BlockReader: error in first chunk offset (" + firstChunkOffset + ") startOffset is " + startOffset + " for file " + file); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/SocketCache.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/SocketCache.java index 06d2a2baeb5..596b0176c40 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/SocketCache.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/SocketCache.java @@ -37,6 +37,7 @@ import org.apache.hadoop.hdfs.protocol.datatransfer.IOStreamPair; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.util.Daemon; import org.apache.hadoop.util.StringUtils; +import org.apache.hadoop.util.Time; /** * A cache of input stream sockets to Data Node. @@ -53,7 +54,7 @@ class SocketCache { public SocketAndStreams(Socket s, IOStreamPair ioStreams) { this.sock = s; this.ioStreams = ioStreams; - this.createTime = System.currentTimeMillis(); + this.createTime = Time.monotonicNow(); } @Override @@ -205,7 +206,7 @@ class SocketCache { Entry entry = iter.next(); // if oldest socket expired, remove it if (entry == null || - System.currentTimeMillis() - entry.getValue().getCreateTime() < + Time.monotonicNow() - entry.getValue().getCreateTime() < expiryPeriod) { break; } @@ -236,13 +237,13 @@ class SocketCache { * older than expiryPeriod minutes */ private void run() throws InterruptedException { - for(long lastExpiryTime = System.currentTimeMillis(); + for(long lastExpiryTime = Time.monotonicNow(); !Thread.interrupted(); Thread.sleep(expiryPeriod)) { - final long elapsed = System.currentTimeMillis() - lastExpiryTime; + final long elapsed = Time.monotonicNow() - lastExpiryTime; if (elapsed >= expiryPeriod) { evictExpired(expiryPeriod); - lastExpiryTime = System.currentTimeMillis(); + lastExpiryTime = Time.monotonicNow(); } } clear(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LayoutVersion.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LayoutVersion.java index 06f6cd41c32..7e77fe160df 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LayoutVersion.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LayoutVersion.java @@ -82,10 +82,11 @@ public class LayoutVersion { EDITS_CHESKUM(-28, "Support checksum for editlog"), UNUSED(-29, "Skipped version"), FSIMAGE_NAME_OPTIMIZATION(-30, "Store only last part of path in fsimage"), - RESERVED_REL20_203(-31, -19, "Reserved for release 0.20.203"), - RESERVED_REL20_204(-32, "Reserved for release 0.20.204"), - RESERVED_REL22(-33, -27, "Reserved for release 0.22"), - RESERVED_REL23(-34, -30, "Reserved for release 0.23"), + RESERVED_REL20_203(-31, -19, "Reserved for release 0.20.203", true, + DELEGATION_TOKEN), + RESERVED_REL20_204(-32, -31, "Reserved for release 0.20.204", true), + RESERVED_REL22(-33, -27, "Reserved for release 0.22", true), + RESERVED_REL23(-34, -30, "Reserved for release 0.23", true), FEDERATION(-35, "Support for namenode federation"), LEASE_REASSIGNMENT(-36, "Support for persisting lease holder reassignment"), STORED_TXIDS(-37, "Transaction IDs are stored in edits log and image files"), @@ -95,33 +96,40 @@ public class LayoutVersion { OPTIMIZE_PERSIST_BLOCKS(-40, "Serialize block lists with delta-encoded variable length ints, " + "add OP_UPDATE_BLOCKS"), - SNAPSHOT(-41, "Support for snapshot feature"); + RESERVED_REL1_2_0(-41, -32, "Reserved for release 1.2.0", true, CONCAT), + SNAPSHOT(-41, -39, "Support for snapshot feature", false); final int lv; final int ancestorLV; final String description; + final boolean reserved; + final Feature[] specialFeatures; /** - * Feature that is added at {@code currentLV}. + * Feature that is added at layout version {@code lv} - 1. * @param lv new layout version with the addition of this feature * @param description description of the feature */ Feature(final int lv, final String description) { - this(lv, lv + 1, description); + this(lv, lv + 1, description, false); } /** - * Feature that is added at {@code currentLV}. + * Feature that is added at layout version {@code ancestoryLV}. * @param lv new layout version with the addition of this feature - * @param ancestorLV layout version from which the new lv is derived - * from. + * @param ancestorLV layout version from which the new lv is derived from. * @param description description of the feature + * @param reserved true when this is a layout version reserved for previous + * verions + * @param features set of features that are to be enabled for this version */ - Feature(final int lv, final int ancestorLV, - final String description) { + Feature(final int lv, final int ancestorLV, final String description, + boolean reserved, Feature... features) { this.lv = lv; this.ancestorLV = ancestorLV; this.description = description; + this.reserved = reserved; + specialFeatures = features; } /** @@ -147,6 +155,10 @@ public class LayoutVersion { public String getDescription() { return description; } + + public boolean isReservedForOldRelease() { + return reserved; + } } // Build layout version and corresponding feature matrix @@ -172,19 +184,14 @@ public class LayoutVersion { map.put(f.ancestorLV, ancestorSet); } EnumSet featureSet = EnumSet.copyOf(ancestorSet); + if (f.specialFeatures != null) { + for (Feature specialFeature : f.specialFeatures) { + featureSet.add(specialFeature); + } + } featureSet.add(f); map.put(f.lv, featureSet); } - - // Special initialization for 0.20.203 and 0.20.204 - // to add Feature#DELEGATION_TOKEN - specialInit(Feature.RESERVED_REL20_203.lv, Feature.DELEGATION_TOKEN); - specialInit(Feature.RESERVED_REL20_204.lv, Feature.DELEGATION_TOKEN); - } - - private static void specialInit(int lv, Feature f) { - EnumSet set = map.get(lv); - set.add(f); } /** @@ -223,6 +230,11 @@ public class LayoutVersion { */ public static int getCurrentLayoutVersion() { Feature[] values = Feature.values(); - return values[values.length - 1].lv; + for (int i = values.length -1; i >= 0; i--) { + if (!values[i].isReservedForOldRelease()) { + return values[i].lv; + } + } + throw new AssertionError("All layout versions are reserved."); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java index c8544f7256f..93bf5ff0ab8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java @@ -633,7 +633,9 @@ public class DatanodeManager { // Mostly called inside an RPC, update ip and peer hostname String hostname = dnAddress.getHostName(); String ip = dnAddress.getHostAddress(); - if (hostname.equals(ip)) { + if (!isNameResolved(dnAddress)) { + // Reject registration of unresolved datanode to prevent performance + // impact of repetitive DNS lookups later. LOG.warn("Unresolved datanode registration from " + ip); throw new DisallowedDatanodeException(nodeReg); } @@ -1061,6 +1063,22 @@ public class DatanodeManager { } return names; } + + /** + * Checks if name resolution was successful for the given address. If IP + * address and host name are the same, then it means name resolution has + * failed. As a special case, the loopback address is also considered + * acceptable. This is particularly important on Windows, where 127.0.0.1 does + * not resolve to "localhost". + * + * @param address InetAddress to check + * @return boolean true if name resolution successful or address is loopback + */ + private static boolean isNameResolved(InetAddress address) { + String hostname = address.getHostName(); + String ip = address.getHostAddress(); + return !hostname.equals(ip) || address.isLoopbackAddress(); + } private void setDatanodeDead(DatanodeDescriptor node) { node.setLastUpdate(0); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolSliceScanner.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolSliceScanner.java index 2d618910f7d..ad23998ecb9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolSliceScanner.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolSliceScanner.java @@ -602,6 +602,15 @@ class BlockPoolSliceScanner { lastScanTime.set(Time.now()); } } + + /** + * Shuts down this BlockPoolSliceScanner and releases any internal resources. + */ + void shutdown() { + if (verificationLog != null) { + verificationLog.close(); + } + } private void scan() { if (LOG.isDebugEnabled()) { @@ -610,7 +619,8 @@ class BlockPoolSliceScanner { try { adjustThrottler(); - while (datanode.shouldRun && !Thread.interrupted() + while (datanode.shouldRun + && !datanode.blockScanner.blockScannerThread.isInterrupted() && datanode.isBPServiceAlive(blockPoolId)) { long now = Time.now(); synchronized (this) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataBlockScanner.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataBlockScanner.java index 95a883a87d3..cf80c99a58a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataBlockScanner.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataBlockScanner.java @@ -100,6 +100,11 @@ public class DataBlockScanner implements Runnable { } bpScanner.scanBlockPoolSlice(); } + + // Call shutdown for each allocated BlockPoolSliceScanner. + for (BlockPoolSliceScanner bpss: blockPoolScannerMap.values()) { + bpss.shutdown(); + } } // Wait for at least one block pool to be up @@ -232,9 +237,21 @@ public class DataBlockScanner implements Runnable { } } - public synchronized void shutdown() { + public void shutdown() { + synchronized (this) { + if (blockScannerThread != null) { + blockScannerThread.interrupt(); + } + } + + // We cannot join within the synchronized block, because it would create a + // deadlock situation. blockScannerThread calls other synchronized methods. if (blockScannerThread != null) { - blockScannerThread.interrupt(); + try { + blockScannerThread.join(); + } catch (InterruptedException e) { + // shutting down anyway + } } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataStorage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataStorage.java index 870d71f1592..cd6b7ccd910 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataStorage.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataStorage.java @@ -26,6 +26,7 @@ import java.io.RandomAccessFile; import java.nio.channels.FileLock; import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.HashMap; import java.util.Iterator; import java.util.Map; @@ -78,7 +79,7 @@ public class DataStorage extends Storage { // BlockPoolStorage is map of private Map bpStorageMap - = new HashMap(); + = Collections.synchronizedMap(new HashMap()); DataStorage() { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/SecureDataNodeStarter.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/SecureDataNodeStarter.java index c5e9c9ca851..0fda3060858 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/SecureDataNodeStarter.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/SecureDataNodeStarter.java @@ -140,7 +140,7 @@ public class SecureDataNodeStarter implements Daemon { System.err.println("Successfully obtained privileged resources (streaming port = " + ss + " ) (http listener port = " + listener.getConnection() +")"); - if ((ss.getLocalPort() >= 1023 || listener.getPort() >= 1023) && + if ((ss.getLocalPort() > 1023 || listener.getPort() > 1023) && UserGroupInformation.isSecurityEnabled()) { throw new RuntimeException("Cannot start secure datanode with unprivileged ports"); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java index 122b2b85c8a..35f7b01a49a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java @@ -339,11 +339,13 @@ class NameNodeRpcServer implements NamenodeProtocols { "Unexpected not positive size: "+size); } namesystem.checkOperation(OperationCategory.READ); + namesystem.checkSuperuserPrivilege(); return namesystem.getBlockManager().getBlocks(datanode, size); } @Override // NamenodeProtocol public ExportedBlockKeys getBlockKeys() throws IOException { + namesystem.checkSuperuserPrivilege(); return namesystem.getBlockManager().getBlockKeys(); } @@ -352,6 +354,7 @@ class NameNodeRpcServer implements NamenodeProtocols { int errorCode, String msg) throws IOException { namesystem.checkOperation(OperationCategory.UNCHECKED); + namesystem.checkSuperuserPrivilege(); verifyRequest(registration); LOG.info("Error report from " + registration + ": " + msg); if (errorCode == FATAL) { @@ -362,6 +365,7 @@ class NameNodeRpcServer implements NamenodeProtocols { @Override // NamenodeProtocol public NamenodeRegistration register(NamenodeRegistration registration) throws IOException { + namesystem.checkSuperuserPrivilege(); verifyLayoutVersion(registration.getVersion()); NamenodeRegistration myRegistration = nn.setRegistration(); namesystem.registerBackupNode(registration, myRegistration); @@ -371,6 +375,7 @@ class NameNodeRpcServer implements NamenodeProtocols { @Override // NamenodeProtocol public NamenodeCommand startCheckpoint(NamenodeRegistration registration) throws IOException { + namesystem.checkSuperuserPrivilege(); verifyRequest(registration); if(!nn.isRole(NamenodeRole.NAMENODE)) throw new IOException("Only an ACTIVE node can invoke startCheckpoint."); @@ -380,6 +385,7 @@ class NameNodeRpcServer implements NamenodeProtocols { @Override // NamenodeProtocol public void endCheckpoint(NamenodeRegistration registration, CheckpointSignature sig) throws IOException { + namesystem.checkSuperuserPrivilege(); namesystem.endCheckpoint(registration, sig); } @@ -756,17 +762,20 @@ class NameNodeRpcServer implements NamenodeProtocols { @Override // NamenodeProtocol public long getTransactionID() throws IOException { namesystem.checkOperation(OperationCategory.UNCHECKED); + namesystem.checkSuperuserPrivilege(); return namesystem.getFSImage().getLastAppliedOrWrittenTxId(); } @Override // NamenodeProtocol public long getMostRecentCheckpointTxId() throws IOException { namesystem.checkOperation(OperationCategory.UNCHECKED); + namesystem.checkSuperuserPrivilege(); return namesystem.getFSImage().getMostRecentCheckpointTxId(); } @Override // NamenodeProtocol public CheckpointSignature rollEditLog() throws IOException { + namesystem.checkSuperuserPrivilege(); return namesystem.rollEditLog(); } @@ -774,6 +783,7 @@ class NameNodeRpcServer implements NamenodeProtocols { public RemoteEditLogManifest getEditLogManifest(long sinceTxId) throws IOException { namesystem.checkOperation(OperationCategory.READ); + namesystem.checkSuperuserPrivilege(); return namesystem.getEditLog().getEditLogManifest(sinceTxId); } @@ -950,6 +960,7 @@ class NameNodeRpcServer implements NamenodeProtocols { @Override // DatanodeProtocol, NamenodeProtocol public NamespaceInfo versionRequest() throws IOException { + namesystem.checkSuperuserPrivilege(); return namesystem.getNamespaceInfo(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/NamenodeProtocol.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/NamenodeProtocol.java index 96b502b5aa5..44574697cdc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/NamenodeProtocol.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/NamenodeProtocol.java @@ -32,8 +32,7 @@ import org.apache.hadoop.security.KerberosInfo; * It's used to get part of the name node state *****************************************************************************/ @KerberosInfo( - serverPrincipal = DFSConfigKeys.DFS_NAMENODE_USER_NAME_KEY, - clientPrincipal = DFSConfigKeys.DFS_NAMENODE_USER_NAME_KEY) + serverPrincipal = DFSConfigKeys.DFS_NAMENODE_USER_NAME_KEY) @InterfaceAudience.Private public interface NamenodeProtocol { /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/fuse_connect.c b/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/fuse_connect.c index 2a39d85263d..218c5c9ab2a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/fuse_connect.c +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/fuse_connect.c @@ -131,7 +131,6 @@ static enum authConf discoverAuthConf(void) int fuseConnectInit(const char *nnUri, int port) { - const char *timerPeriod; int ret; gTimerPeriod = FUSE_CONN_DEFAULT_TIMER_PERIOD; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/fuse_impls_open.c b/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/fuse_impls_open.c index ecd772f63f7..9ca2650ddd5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/fuse_impls_open.c +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/fuse_impls_open.c @@ -24,12 +24,77 @@ #include #include +static int get_hdfs_open_flags_from_info(hdfsFS fs, const char *path, + int flags, int *outflags, const hdfsFileInfo *info); + +/** + * Given a set of FUSE flags, determine the libhdfs flags we need. + * + * This is complicated by two things: + * 1. libhdfs doesn't support O_RDWR at all; + * 2. when given O_WRONLY, libhdfs will truncate the file unless O_APPEND is + * also given. In other words, there is an implicit O_TRUNC. + * + * Probably the next iteration of the libhdfs interface should not use the POSIX + * flags at all, since, as you can see, they don't really match up very closely + * to the POSIX meaning. However, for the time being, this is the API. + * + * @param fs The libhdfs object + * @param path The path we're opening + * @param flags The FUSE flags + * + * @return negative error code on failure; flags otherwise. + */ +static int64_t get_hdfs_open_flags(hdfsFS fs, const char *path, int flags) +{ + int hasContent; + int64_t ret; + hdfsFileInfo *info; + + if ((flags & O_ACCMODE) == O_RDONLY) { + return O_RDONLY; + } + if (flags & O_TRUNC) { + /* If we're opening for write or read/write, O_TRUNC means we should blow + * away the file which is there and create our own file. + * */ + return O_WRONLY; + } + info = hdfsGetPathInfo(fs, path); + if (info) { + if (info->mSize == 0) { + // If the file has zero length, we shouldn't feel bad about blowing it + // away. + ret = O_WRONLY; + } else if ((flags & O_ACCMODE) == O_RDWR) { + // HACK: translate O_RDWR requests into O_RDONLY if the file already + // exists and has non-zero length. + ret = O_RDONLY; + } else { // O_WRONLY + // HACK: translate O_WRONLY requests into append if the file already + // exists. + ret = O_WRONLY | O_APPEND; + } + } else { // !info + if (flags & O_CREAT) { + ret = O_WRONLY; + } else { + ret = -ENOENT; + } + } + if (info) { + hdfsFreeFileInfo(info, 1); + } + return ret; +} + int dfs_open(const char *path, struct fuse_file_info *fi) { hdfsFS fs = NULL; dfs_context *dfs = (dfs_context*)fuse_get_context()->private_data; dfs_fh *fh = NULL; - int mutexInit = 0, ret; + int mutexInit = 0, ret, flags = 0; + int64_t flagRet; TRACE1("open", path) @@ -38,10 +103,6 @@ int dfs_open(const char *path, struct fuse_file_info *fi) assert('/' == *path); assert(dfs); - // 0x8000 is always passed in and hadoop doesn't like it, so killing it here - // bugbug figure out what this flag is and report problem to Hadoop JIRA - int flags = (fi->flags & 0x7FFF); - // retrieve dfs specific data fh = (dfs_fh*)calloc(1, sizeof (dfs_fh)); if (!fh) { @@ -57,22 +118,12 @@ int dfs_open(const char *path, struct fuse_file_info *fi) goto error; } fs = hdfsConnGetFs(fh->conn); - - if (flags & O_RDWR) { - hdfsFileInfo *info = hdfsGetPathInfo(fs, path); - if (info == NULL) { - // File does not exist (maybe?); interpret it as a O_WRONLY - // If the actual error was something else, we'll get it again when - // we try to open the file. - flags ^= O_RDWR; - flags |= O_WRONLY; - } else { - // File exists; open this as read only. - flags ^= O_RDWR; - flags |= O_RDONLY; - } + flagRet = get_hdfs_open_flags(fs, path, fi->flags); + if (flagRet < 0) { + ret = -flagRet; + goto error; } - + flags = flagRet; if ((fh->hdfsFH = hdfsOpenFile(fs, path, flags, 0, 0, 0)) == NULL) { ERROR("Could not open file %s (errno=%d)", path, errno); if (errno == 0 || errno == EINTERNAL) { @@ -91,7 +142,7 @@ int dfs_open(const char *path, struct fuse_file_info *fi) } mutexInit = 1; - if (fi->flags & O_WRONLY || fi->flags & O_CREAT) { + if ((flags & O_ACCMODE) == O_WRONLY) { fh->buf = NULL; } else { assert(dfs->rdbuffer_size > 0); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/fuse_init.c b/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/fuse_init.c index 1ec11c1a0b5..4da6da0fa91 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/fuse_init.c +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/fuse_init.c @@ -98,7 +98,7 @@ static void dfsPrintOptions(FILE *fp, const struct options *o) o->attribute_timeout, o->rdbuffer_size, o->direct_io); } -void *dfs_init(void) +void *dfs_init(struct fuse_conn_info *conn) { int ret; @@ -143,6 +143,45 @@ void *dfs_init(void) exit(EXIT_FAILURE); } } + +#ifdef FUSE_CAP_ATOMIC_O_TRUNC + // If FUSE_CAP_ATOMIC_O_TRUNC is set, open("foo", O_CREAT | O_TRUNC) will + // result in dfs_open being called with O_TRUNC. + // + // If this capability is not present, fuse will try to use multiple + // operation to "simulate" open(O_TRUNC). This doesn't work very well with + // HDFS. + // Unfortunately, this capability is only implemented on Linux 2.6.29 or so. + // See HDFS-4140 for details. + if (conn->capable & FUSE_CAP_ATOMIC_O_TRUNC) { + conn->want |= FUSE_CAP_ATOMIC_O_TRUNC; + } +#endif + +#ifdef FUSE_CAP_ASYNC_READ + // We're OK with doing reads at the same time as writes. + if (conn->capable & FUSE_CAP_ASYNC_READ) { + conn->want |= FUSE_CAP_ASYNC_READ; + } +#endif + +#ifdef FUSE_CAP_BIG_WRITES + // Yes, we can read more than 4kb at a time. In fact, please do! + if (conn->capable & FUSE_CAP_BIG_WRITES) { + conn->want |= FUSE_CAP_BIG_WRITES; + } +#endif + +#ifdef FUSE_CAP_DONT_MASK + if ((options.no_permissions) && (conn->capable & FUSE_CAP_DONT_MASK)) { + // If we're handing permissions ourselves, we don't want the kernel + // applying its own umask. HDFS already implements its own per-user + // umasks! Sadly, this only actually does something on kernels 2.6.31 and + // later. + conn->want |= FUSE_CAP_DONT_MASK; + } +#endif + return (void*)dfs; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/fuse_init.h b/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/fuse_init.h index 6f17af8af21..681ab912da7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/fuse_init.h +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/fuse_init.h @@ -19,13 +19,15 @@ #ifndef __FUSE_INIT_H__ #define __FUSE_INIT_H__ +struct fuse_conn_info; + /** * These are responsible for initializing connections to dfs and internal * data structures and then freeing them. * i.e., what happens on mount and unmount. * */ -void *dfs_init(); +void *dfs_init(struct fuse_conn_info *conn); void dfs_destroy (void *ptr); #endif diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/test/fuse_workload.c b/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/test/fuse_workload.c index 9252ead1bbf..78fdbc66f6e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/test/fuse_workload.c +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/test/fuse_workload.c @@ -16,6 +16,8 @@ * limitations under the License. */ +#define FUSE_USE_VERSION 26 + #include "fuse-dfs/test/fuse_workload.h" #include "libhdfs/expect.h" #include "util/posix_util.h" @@ -23,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -138,13 +141,89 @@ static int safeRead(int fd, void *buf, int c) return amt; } +/* Bug: HDFS-2551. + * When a program writes a file, closes it, and immediately re-opens it, + * it might not appear to have the correct length. This is because FUSE + * invokes the release() callback asynchronously. + * + * To work around this, we keep retrying until the file length is what we + * expect. + */ +static int closeWorkaroundHdfs2551(int fd, const char *path, off_t expectedSize) +{ + int ret, try; + struct stat stBuf; + + RETRY_ON_EINTR_GET_ERRNO(ret, close(fd)); + EXPECT_ZERO(ret); + for (try = 0; try < MAX_TRIES; try++) { + EXPECT_ZERO(stat(path, &stBuf)); + EXPECT_NONZERO(S_ISREG(stBuf.st_mode)); + if (stBuf.st_size == expectedSize) { + return 0; + } + sleepNoSig(1); + } + fprintf(stderr, "FUSE_WORKLOAD: error: expected file %s to have length " + "%lld; instead, it had length %lld\n", + path, (long long)expectedSize, (long long)stBuf.st_size); + return -EIO; +} + +#ifdef FUSE_CAP_ATOMIC_O_TRUNC + +/** + * Test that we can create a file, write some contents to it, close that file, + * and then successfully re-open with O_TRUNC. + */ +static int testOpenTrunc(const char *base) +{ + int fd, err; + char path[PATH_MAX]; + const char * const SAMPLE1 = "this is the first file that we wrote."; + const char * const SAMPLE2 = "this is the second file that we wrote. " + "It's #2!"; + + snprintf(path, sizeof(path), "%s/trunc.txt", base); + fd = open(path, O_CREAT | O_TRUNC | O_WRONLY, 0644); + if (fd < 0) { + err = errno; + fprintf(stderr, "TEST_ERROR: testOpenTrunc(%s): first open " + "failed with error %d\n", path, err); + return -err; + } + EXPECT_ZERO(safeWrite(fd, SAMPLE1, strlen(SAMPLE1))); + EXPECT_ZERO(closeWorkaroundHdfs2551(fd, path, strlen(SAMPLE1))); + fd = open(path, O_CREAT | O_TRUNC | O_WRONLY, 0644); + if (fd < 0) { + err = errno; + fprintf(stderr, "TEST_ERROR: testOpenTrunc(%s): second open " + "failed with error %d\n", path, err); + return -err; + } + EXPECT_ZERO(safeWrite(fd, SAMPLE2, strlen(SAMPLE2))); + EXPECT_ZERO(closeWorkaroundHdfs2551(fd, path, strlen(SAMPLE2))); + return 0; +} + +#else + +static int testOpenTrunc(const char *base) +{ + fprintf(stderr, "FUSE_WORKLOAD: We lack FUSE_CAP_ATOMIC_O_TRUNC support. " + "Not testing open(O_TRUNC).\n"); + return 0; +} + +#endif + int runFuseWorkloadImpl(const char *root, const char *pcomp, struct fileCtx *ctx) { char base[PATH_MAX], tmp[PATH_MAX], *tmpBuf; char src[PATH_MAX], dst[PATH_MAX]; struct stat stBuf; - int ret, i, try; + int ret, i; struct utimbuf tbuf; struct statvfs stvBuf; @@ -241,34 +320,9 @@ int runFuseWorkloadImpl(const char *root, const char *pcomp, EXPECT_ZERO(safeWrite(ctx[i].fd, ctx[i].str, ctx[i].strLen)); } for (i = 0; i < NUM_FILE_CTX; i++) { - RETRY_ON_EINTR_GET_ERRNO(ret, close(ctx[i].fd)); - EXPECT_ZERO(ret); + EXPECT_ZERO(closeWorkaroundHdfs2551(ctx[i].fd, ctx[i].path, ctx[i].strLen)); ctx[i].fd = -1; } - for (i = 0; i < NUM_FILE_CTX; i++) { - /* Bug: HDFS-2551. - * When a program writes a file, closes it, and immediately re-opens it, - * it might not appear to have the correct length. This is because FUSE - * invokes the release() callback asynchronously. - * - * To work around this, we keep retrying until the file length is what we - * expect. - */ - for (try = 0; try < MAX_TRIES; try++) { - EXPECT_ZERO(stat(ctx[i].path, &stBuf)); - EXPECT_NONZERO(S_ISREG(stBuf.st_mode)); - if (ctx[i].strLen == stBuf.st_size) { - break; - } - sleepNoSig(1); - } - if (try == MAX_TRIES) { - fprintf(stderr, "FUSE_WORKLOAD: error: expected file %s to have length " - "%d; instead, it had length %lld\n", - ctx[i].path, ctx[i].strLen, (long long)stBuf.st_size); - return -EIO; - } - } for (i = 0; i < NUM_FILE_CTX; i++) { ctx[i].fd = open(ctx[i].path, O_RDONLY); if (ctx[i].fd < 0) { @@ -308,6 +362,7 @@ int runFuseWorkloadImpl(const char *root, const char *pcomp, for (i = 0; i < NUM_FILE_CTX; i++) { free(ctx[i].path); } + EXPECT_ZERO(testOpenTrunc(base)); EXPECT_ZERO(recursiveDelete(base)); return 0; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HdfsDesign.apt.vm b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HdfsDesign.apt.vm new file mode 100644 index 00000000000..21cb6450ddf --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HdfsDesign.apt.vm @@ -0,0 +1,512 @@ +~~ Licensed under the Apache License, Version 2.0 (the "License"); +~~ you may not use this file except in compliance with the License. +~~ You may obtain a copy of the License at +~~ +~~ http://www.apache.org/licenses/LICENSE-2.0 +~~ +~~ Unless required by applicable law or agreed to in writing, software +~~ distributed under the License is distributed on an "AS IS" BASIS, +~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +~~ See the License for the specific language governing permissions and +~~ limitations under the License. See accompanying LICENSE file. + + --- + HDFS Architecture + --- + Dhruba Borthakur + --- + ${maven.build.timestamp} + +%{toc|section=1|fromDepth=0} + +HDFS Architecture + +Introduction + + The Hadoop Distributed File System (HDFS) is a distributed file system + designed to run on commodity hardware. It has many similarities with + existing distributed file systems. However, the differences from other + distributed file systems are significant. HDFS is highly fault-tolerant + and is designed to be deployed on low-cost hardware. HDFS provides high + throughput access to application data and is suitable for applications + that have large data sets. HDFS relaxes a few POSIX requirements to + enable streaming access to file system data. HDFS was originally built + as infrastructure for the Apache Nutch web search engine project. HDFS + is part of the Apache Hadoop Core project. The project URL is + {{http://hadoop.apache.org/}}. + +Assumptions and Goals + +Hardware Failure + + Hardware failure is the norm rather than the exception. An HDFS + instance may consist of hundreds or thousands of server machines, each + storing part of the file system’s data. The fact that there are a huge + number of components and that each component has a non-trivial + probability of failure means that some component of HDFS is always + non-functional. Therefore, detection of faults and quick, automatic + recovery from them is a core architectural goal of HDFS. + +Streaming Data Access + + Applications that run on HDFS need streaming access to their data sets. + They are not general purpose applications that typically run on general + purpose file systems. HDFS is designed more for batch processing rather + than interactive use by users. The emphasis is on high throughput of + data access rather than low latency of data access. POSIX imposes many + hard requirements that are not needed for applications that are + targeted for HDFS. POSIX semantics in a few key areas has been traded + to increase data throughput rates. + +Large Data Sets + + Applications that run on HDFS have large data sets. A typical file in + HDFS is gigabytes to terabytes in size. Thus, HDFS is tuned to support + large files. It should provide high aggregate data bandwidth and scale + to hundreds of nodes in a single cluster. It should support tens of + millions of files in a single instance. + +Simple Coherency Model + + HDFS applications need a write-once-read-many access model for files. A + file once created, written, and closed need not be changed. This + assumption simplifies data coherency issues and enables high throughput + data access. A Map/Reduce application or a web crawler application fits + perfectly with this model. There is a plan to support appending-writes + to files in the future. + +“Moving Computation is Cheaper than Moving Data” + + A computation requested by an application is much more efficient if it + is executed near the data it operates on. This is especially true when + the size of the data set is huge. This minimizes network congestion and + increases the overall throughput of the system. The assumption is that + it is often better to migrate the computation closer to where the data + is located rather than moving the data to where the application is + running. HDFS provides interfaces for applications to move themselves + closer to where the data is located. + +Portability Across Heterogeneous Hardware and Software Platforms + + HDFS has been designed to be easily portable from one platform to + another. This facilitates widespread adoption of HDFS as a platform of + choice for a large set of applications. + +NameNode and DataNodes + + HDFS has a master/slave architecture. An HDFS cluster consists of a + single NameNode, a master server that manages the file system namespace + and regulates access to files by clients. In addition, there are a + number of DataNodes, usually one per node in the cluster, which manage + storage attached to the nodes that they run on. HDFS exposes a file + system namespace and allows user data to be stored in files. + Internally, a file is split into one or more blocks and these blocks + are stored in a set of DataNodes. The NameNode executes file system + namespace operations like opening, closing, and renaming files and + directories. It also determines the mapping of blocks to DataNodes. The + DataNodes are responsible for serving read and write requests from the + file system’s clients. The DataNodes also perform block creation, + deletion, and replication upon instruction from the NameNode. + + +[images/hdfsarchitecture.png] HDFS Architecture + + The NameNode and DataNode are pieces of software designed to run on + commodity machines. These machines typically run a GNU/Linux operating + system (OS). HDFS is built using the Java language; any machine that + supports Java can run the NameNode or the DataNode software. Usage of + the highly portable Java language means that HDFS can be deployed on a + wide range of machines. A typical deployment has a dedicated machine + that runs only the NameNode software. Each of the other machines in the + cluster runs one instance of the DataNode software. The architecture + does not preclude running multiple DataNodes on the same machine but in + a real deployment that is rarely the case. + + The existence of a single NameNode in a cluster greatly simplifies the + architecture of the system. The NameNode is the arbitrator and + repository for all HDFS metadata. The system is designed in such a way + that user data never flows through the NameNode. + +The File System Namespace + + HDFS supports a traditional hierarchical file organization. A user or + an application can create directories and store files inside these + directories. The file system namespace hierarchy is similar to most + other existing file systems; one can create and remove files, move a + file from one directory to another, or rename a file. HDFS does not yet + implement user quotas or access permissions. HDFS does not support hard + links or soft links. However, the HDFS architecture does not preclude + implementing these features. + + The NameNode maintains the file system namespace. Any change to the + file system namespace or its properties is recorded by the NameNode. An + application can specify the number of replicas of a file that should be + maintained by HDFS. The number of copies of a file is called the + replication factor of that file. This information is stored by the + NameNode. + +Data Replication + + HDFS is designed to reliably store very large files across machines in + a large cluster. It stores each file as a sequence of blocks; all + blocks in a file except the last block are the same size. The blocks of + a file are replicated for fault tolerance. The block size and + replication factor are configurable per file. An application can + specify the number of replicas of a file. The replication factor can be + specified at file creation time and can be changed later. Files in HDFS + are write-once and have strictly one writer at any time. + + The NameNode makes all decisions regarding replication of blocks. It + periodically receives a Heartbeat and a Blockreport from each of the + DataNodes in the cluster. Receipt of a Heartbeat implies that the + DataNode is functioning properly. A Blockreport contains a list of all + blocks on a DataNode. + +[images/hdfsdatanodes.png] HDFS DataNodes + +Replica Placement: The First Baby Steps + + The placement of replicas is critical to HDFS reliability and + performance. Optimizing replica placement distinguishes HDFS from most + other distributed file systems. This is a feature that needs lots of + tuning and experience. The purpose of a rack-aware replica placement + policy is to improve data reliability, availability, and network + bandwidth utilization. The current implementation for the replica + placement policy is a first effort in this direction. The short-term + goals of implementing this policy are to validate it on production + systems, learn more about its behavior, and build a foundation to test + and research more sophisticated policies. + + Large HDFS instances run on a cluster of computers that commonly spread + across many racks. Communication between two nodes in different racks + has to go through switches. In most cases, network bandwidth between + machines in the same rack is greater than network bandwidth between + machines in different racks. + + The NameNode determines the rack id each DataNode belongs to via the + process outlined in {{{../hadoop-common/ClusterSetup.html#Hadoop+Rack+Awareness}Hadoop Rack Awareness}}. A simple but non-optimal policy + is to place replicas on unique racks. This prevents losing data when an + entire rack fails and allows use of bandwidth from multiple racks when + reading data. This policy evenly distributes replicas in the cluster + which makes it easy to balance load on component failure. However, this + policy increases the cost of writes because a write needs to transfer + blocks to multiple racks. + + For the common case, when the replication factor is three, HDFS’s + placement policy is to put one replica on one node in the local rack, + another on a different node in the local rack, and the last on a + different node in a different rack. This policy cuts the inter-rack + write traffic which generally improves write performance. The chance of + rack failure is far less than that of node failure; this policy does + not impact data reliability and availability guarantees. However, it + does reduce the aggregate network bandwidth used when reading data + since a block is placed in only two unique racks rather than three. + With this policy, the replicas of a file do not evenly distribute + across the racks. One third of replicas are on one node, two thirds of + replicas are on one rack, and the other third are evenly distributed + across the remaining racks. This policy improves write performance + without compromising data reliability or read performance. + + The current, default replica placement policy described here is a work + in progress. + +Replica Selection + + To minimize global bandwidth consumption and read latency, HDFS tries + to satisfy a read request from a replica that is closest to the reader. + If there exists a replica on the same rack as the reader node, then + that replica is preferred to satisfy the read request. If angg/ HDFS + cluster spans multiple data centers, then a replica that is resident in + the local data center is preferred over any remote replica. + +Safemode + + On startup, the NameNode enters a special state called Safemode. + Replication of data blocks does not occur when the NameNode is in the + Safemode state. The NameNode receives Heartbeat and Blockreport + messages from the DataNodes. A Blockreport contains the list of data + blocks that a DataNode is hosting. Each block has a specified minimum + number of replicas. A block is considered safely replicated when the + minimum number of replicas of that data block has checked in with the + NameNode. After a configurable percentage of safely replicated data + blocks checks in with the NameNode (plus an additional 30 seconds), the + NameNode exits the Safemode state. It then determines the list of data + blocks (if any) that still have fewer than the specified number of + replicas. The NameNode then replicates these blocks to other DataNodes. + +The Persistence of File System Metadata + + The HDFS namespace is stored by the NameNode. The NameNode uses a + transaction log called the EditLog to persistently record every change + that occurs to file system metadata. For example, creating a new file + in HDFS causes the NameNode to insert a record into the EditLog + indicating this. Similarly, changing the replication factor of a file + causes a new record to be inserted into the EditLog. The NameNode uses + a file in its local host OS file system to store the EditLog. The + entire file system namespace, including the mapping of blocks to files + and file system properties, is stored in a file called the FsImage. The + FsImage is stored as a file in the NameNode’s local file system too. + + The NameNode keeps an image of the entire file system namespace and + file Blockmap in memory. This key metadata item is designed to be + compact, such that a NameNode with 4 GB of RAM is plenty to support a + huge number of files and directories. When the NameNode starts up, it + reads the FsImage and EditLog from disk, applies all the transactions + from the EditLog to the in-memory representation of the FsImage, and + flushes out this new version into a new FsImage on disk. It can then + truncate the old EditLog because its transactions have been applied to + the persistent FsImage. This process is called a checkpoint. In the + current implementation, a checkpoint only occurs when the NameNode + starts up. Work is in progress to support periodic checkpointing in the + near future. + + The DataNode stores HDFS data in files in its local file system. The + DataNode has no knowledge about HDFS files. It stores each block of + HDFS data in a separate file in its local file system. The DataNode + does not create all files in the same directory. Instead, it uses a + heuristic to determine the optimal number of files per directory and + creates subdirectories appropriately. It is not optimal to create all + local files in the same directory because the local file system might + not be able to efficiently support a huge number of files in a single + directory. When a DataNode starts up, it scans through its local file + system, generates a list of all HDFS data blocks that correspond to + each of these local files and sends this report to the NameNode: this + is the Blockreport. + +The Communication Protocols + + All HDFS communication protocols are layered on top of the TCP/IP + protocol. A client establishes a connection to a configurable TCP port + on the NameNode machine. It talks the ClientProtocol with the NameNode. + The DataNodes talk to the NameNode using the DataNode Protocol. A + Remote Procedure Call (RPC) abstraction wraps both the Client Protocol + and the DataNode Protocol. By design, the NameNode never initiates any + RPCs. Instead, it only responds to RPC requests issued by DataNodes or + clients. + +Robustness + + The primary objective of HDFS is to store data reliably even in the + presence of failures. The three common types of failures are NameNode + failures, DataNode failures and network partitions. + +Data Disk Failure, Heartbeats and Re-Replication + + Each DataNode sends a Heartbeat message to the NameNode periodically. A + network partition can cause a subset of DataNodes to lose connectivity + with the NameNode. The NameNode detects this condition by the absence + of a Heartbeat message. The NameNode marks DataNodes without recent + Heartbeats as dead and does not forward any new IO requests to them. + Any data that was registered to a dead DataNode is not available to + HDFS any more. DataNode death may cause the replication factor of some + blocks to fall below their specified value. The NameNode constantly + tracks which blocks need to be replicated and initiates replication + whenever necessary. The necessity for re-replication may arise due to + many reasons: a DataNode may become unavailable, a replica may become + corrupted, a hard disk on a DataNode may fail, or the replication + factor of a file may be increased. + +Cluster Rebalancing + + The HDFS architecture is compatible with data rebalancing schemes. A + scheme might automatically move data from one DataNode to another if + the free space on a DataNode falls below a certain threshold. In the + event of a sudden high demand for a particular file, a scheme might + dynamically create additional replicas and rebalance other data in the + cluster. These types of data rebalancing schemes are not yet + implemented. + +Data Integrity + + It is possible that a block of data fetched from a DataNode arrives + corrupted. This corruption can occur because of faults in a storage + device, network faults, or buggy software. The HDFS client software + implements checksum checking on the contents of HDFS files. When a + client creates an HDFS file, it computes a checksum of each block of + the file and stores these checksums in a separate hidden file in the + same HDFS namespace. When a client retrieves file contents it verifies + that the data it received from each DataNode matches the checksum + stored in the associated checksum file. If not, then the client can opt + to retrieve that block from another DataNode that has a replica of that + block. + +Metadata Disk Failure + + The FsImage and the EditLog are central data structures of HDFS. A + corruption of these files can cause the HDFS instance to be + non-functional. For this reason, the NameNode can be configured to + support maintaining multiple copies of the FsImage and EditLog. Any + update to either the FsImage or EditLog causes each of the FsImages and + EditLogs to get updated synchronously. This synchronous updating of + multiple copies of the FsImage and EditLog may degrade the rate of + namespace transactions per second that a NameNode can support. However, + this degradation is acceptable because even though HDFS applications + are very data intensive in nature, they are not metadata intensive. + When a NameNode restarts, it selects the latest consistent FsImage and + EditLog to use. + + The NameNode machine is a single point of failure for an HDFS cluster. + If the NameNode machine fails, manual intervention is necessary. + Currently, automatic restart and failover of the NameNode software to + another machine is not supported. + +Snapshots + + Snapshots support storing a copy of data at a particular instant of + time. One usage of the snapshot feature may be to roll back a corrupted + HDFS instance to a previously known good point in time. HDFS does not + currently support snapshots but will in a future release. + +Data Organization + +Data Blocks + + HDFS is designed to support very large files. Applications that are + compatible with HDFS are those that deal with large data sets. These + applications write their data only once but they read it one or more + times and require these reads to be satisfied at streaming speeds. HDFS + supports write-once-read-many semantics on files. A typical block size + used by HDFS is 64 MB. Thus, an HDFS file is chopped up into 64 MB + chunks, and if possible, each chunk will reside on a different + DataNode. + +Staging + + A client request to create a file does not reach the NameNode + immediately. In fact, initially the HDFS client caches the file data + into a temporary local file. Application writes are transparently + redirected to this temporary local file. When the local file + accumulates data worth over one HDFS block size, the client contacts + the NameNode. The NameNode inserts the file name into the file system + hierarchy and allocates a data block for it. The NameNode responds to + the client request with the identity of the DataNode and the + destination data block. Then the client flushes the block of data from + the local temporary file to the specified DataNode. When a file is + closed, the remaining un-flushed data in the temporary local file is + transferred to the DataNode. The client then tells the NameNode that + the file is closed. At this point, the NameNode commits the file + creation operation into a persistent store. If the NameNode dies before + the file is closed, the file is lost. + + The above approach has been adopted after careful consideration of + target applications that run on HDFS. These applications need streaming + writes to files. If a client writes to a remote file directly without + any client side buffering, the network speed and the congestion in the + network impacts throughput considerably. This approach is not without + precedent. Earlier distributed file systems, e.g. AFS, have used client + side caching to improve performance. A POSIX requirement has been + relaxed to achieve higher performance of data uploads. + +Replication Pipelining + + When a client is writing data to an HDFS file, its data is first + written to a local file as explained in the previous section. Suppose + the HDFS file has a replication factor of three. When the local file + accumulates a full block of user data, the client retrieves a list of + DataNodes from the NameNode. This list contains the DataNodes that will + host a replica of that block. The client then flushes the data block to + the first DataNode. The first DataNode starts receiving the data in + small portions (4 KB), writes each portion to its local repository and + transfers that portion to the second DataNode in the list. The second + DataNode, in turn starts receiving each portion of the data block, + writes that portion to its repository and then flushes that portion to + the third DataNode. Finally, the third DataNode writes the data to its + local repository. Thus, a DataNode can be receiving data from the + previous one in the pipeline and at the same time forwarding data to + the next one in the pipeline. Thus, the data is pipelined from one + DataNode to the next. + +Accessibility + + HDFS can be accessed from applications in many different ways. + Natively, HDFS provides a + {{{http://hadoop.apache.org/docs/current/api/}FileSystem Java API}} + for applications to use. A C language wrapper for this Java API is also + available. In addition, an HTTP browser can also be used to browse the files + of an HDFS instance. Work is in progress to expose HDFS through the WebDAV + protocol. + +FS Shell + + HDFS allows user data to be organized in the form of files and + directories. It provides a commandline interface called FS shell that + lets a user interact with the data in HDFS. The syntax of this command + set is similar to other shells (e.g. bash, csh) that users are already + familiar with. Here are some sample action/command pairs: + +*---------+---------+ +|| Action | Command +*---------+---------+ +| Create a directory named <<>> | <<>> +*---------+---------+ +| Remove a directory named <<>> | <<>> +*---------+---------+ +| View the contents of a file named <<>> | <<>> +*---------+---------+ + + FS shell is targeted for applications that need a scripting language to + interact with the stored data. + +DFSAdmin + + The DFSAdmin command set is used for administering an HDFS cluster. + These are commands that are used only by an HDFS administrator. Here + are some sample action/command pairs: + +*---------+---------+ +|| Action | Command +*---------+---------+ +|Put the cluster in Safemode | <<>> +*---------+---------+ +|Generate a list of DataNodes | <<>> +*---------+---------+ +|Recommission or decommission DataNode(s) | <<>> +*---------+---------+ + +Browser Interface + + A typical HDFS install configures a web server to expose the HDFS + namespace through a configurable TCP port. This allows a user to + navigate the HDFS namespace and view the contents of its files using a + web browser. + +Space Reclamation + +File Deletes and Undeletes + + When a file is deleted by a user or an application, it is not + immediately removed from HDFS. Instead, HDFS first renames it to a file + in the <<>> directory. The file can be restored quickly as long as it + remains in <<>>. A file remains in <<>> for a configurable amount + of time. After the expiry of its life in <<>>, the NameNode deletes + the file from the HDFS namespace. The deletion of a file causes the + blocks associated with the file to be freed. Note that there could be + an appreciable time delay between the time a file is deleted by a user + and the time of the corresponding increase in free space in HDFS. + + A user can Undelete a file after deleting it as long as it remains in + the <<>> directory. If a user wants to undelete a file that he/she + has deleted, he/she can navigate the <<>> directory and retrieve the + file. The <<>> directory contains only the latest copy of the file + that was deleted. The <<>> directory is just like any other directory + with one special feature: HDFS applies specified policies to + automatically delete files from this directory. The current default + policy is to delete files from <<>> that are more than 6 hours old. + In the future, this policy will be configurable through a well defined + interface. + +Decrease Replication Factor + + When the replication factor of a file is reduced, the NameNode selects + excess replicas that can be deleted. The next Heartbeat transfers this + information to the DataNode. The DataNode then removes the + corresponding blocks and the corresponding free space appears in the + cluster. Once again, there might be a time delay between the completion + of the setReplication API call and the appearance of free space in the + cluster. + +References + + Hadoop {{{http://hadoop.apache.org/docs/current/api/}JavaDoc API}}. + + HDFS source code: {{http://hadoop.apache.org/version_control.html}} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/resources/images/hdfs-logo.jpg b/hadoop-hdfs-project/hadoop-hdfs/src/site/resources/images/hdfs-logo.jpg similarity index 100% rename from hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/resources/images/hdfs-logo.jpg rename to hadoop-hdfs-project/hadoop-hdfs/src/site/resources/images/hdfs-logo.jpg diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/resources/images/hdfsarchitecture.gif b/hadoop-hdfs-project/hadoop-hdfs/src/site/resources/images/hdfsarchitecture.gif similarity index 100% rename from hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/resources/images/hdfsarchitecture.gif rename to hadoop-hdfs-project/hadoop-hdfs/src/site/resources/images/hdfsarchitecture.gif diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/resources/images/hdfsarchitecture.odg b/hadoop-hdfs-project/hadoop-hdfs/src/site/resources/images/hdfsarchitecture.odg similarity index 100% rename from hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/resources/images/hdfsarchitecture.odg rename to hadoop-hdfs-project/hadoop-hdfs/src/site/resources/images/hdfsarchitecture.odg diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/resources/images/hdfsarchitecture.png b/hadoop-hdfs-project/hadoop-hdfs/src/site/resources/images/hdfsarchitecture.png similarity index 100% rename from hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/resources/images/hdfsarchitecture.png rename to hadoop-hdfs-project/hadoop-hdfs/src/site/resources/images/hdfsarchitecture.png diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/resources/images/hdfsdatanodes.gif b/hadoop-hdfs-project/hadoop-hdfs/src/site/resources/images/hdfsdatanodes.gif similarity index 100% rename from hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/resources/images/hdfsdatanodes.gif rename to hadoop-hdfs-project/hadoop-hdfs/src/site/resources/images/hdfsdatanodes.gif diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/resources/images/hdfsdatanodes.odg b/hadoop-hdfs-project/hadoop-hdfs/src/site/resources/images/hdfsdatanodes.odg similarity index 100% rename from hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/resources/images/hdfsdatanodes.odg rename to hadoop-hdfs-project/hadoop-hdfs/src/site/resources/images/hdfsdatanodes.odg diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/resources/images/hdfsdatanodes.png b/hadoop-hdfs-project/hadoop-hdfs/src/site/resources/images/hdfsdatanodes.png similarity index 100% rename from hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/resources/images/hdfsdatanodes.png rename to hadoop-hdfs-project/hadoop-hdfs/src/site/resources/images/hdfsdatanodes.png diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/resources/images/hdfsproxy-forward.jpg b/hadoop-hdfs-project/hadoop-hdfs/src/site/resources/images/hdfsproxy-forward.jpg similarity index 100% rename from hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/resources/images/hdfsproxy-forward.jpg rename to hadoop-hdfs-project/hadoop-hdfs/src/site/resources/images/hdfsproxy-forward.jpg diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/resources/images/hdfsproxy-overview.jpg b/hadoop-hdfs-project/hadoop-hdfs/src/site/resources/images/hdfsproxy-overview.jpg similarity index 100% rename from hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/resources/images/hdfsproxy-overview.jpg rename to hadoop-hdfs-project/hadoop-hdfs/src/site/resources/images/hdfsproxy-overview.jpg diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/resources/images/hdfsproxy-server.jpg b/hadoop-hdfs-project/hadoop-hdfs/src/site/resources/images/hdfsproxy-server.jpg similarity index 100% rename from hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/resources/images/hdfsproxy-server.jpg rename to hadoop-hdfs-project/hadoop-hdfs/src/site/resources/images/hdfsproxy-server.jpg diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestHDFSFileContextMainOperations.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestHDFSFileContextMainOperations.java index 018d3886a6e..c5250023e47 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestHDFSFileContextMainOperations.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestHDFSFileContextMainOperations.java @@ -255,7 +255,22 @@ public class TestHDFSFileContextMainOperations extends Assert.assertFalse(fs.exists(src1)); // ensure src1 is already renamed Assert.assertTrue(fs.exists(dst1)); // ensure rename dst exists } - + + @Test + public void testIsValidNameInvalidNames() { + String[] invalidNames = { + "/foo/../bar", + "/foo/./bar", + "/foo/:/bar", + "/foo:bar" + }; + + for (String invalidName: invalidNames) { + Assert.assertFalse(invalidName + " is not valid", + fc.getDefaultFileSystem().isValidName(invalidName)); + } + } + private void oldRename(Path src, Path dst, boolean renameSucceeds, boolean exception) throws Exception { DistributedFileSystem fs = (DistributedFileSystem) cluster.getFileSystem(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestVolumeId.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestVolumeId.java index da6f192a757..3584f21b1a4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestVolumeId.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestVolumeId.java @@ -51,7 +51,7 @@ public class TestVolumeId { } @SuppressWarnings("unchecked") - private void testEq(final boolean eq, Comparable id1, Comparable id2) { + private void testEq(final boolean eq, Comparable id1, Comparable id2) { final int h1 = id1.hashCode(); final int h2 = id2.hashCode(); @@ -99,8 +99,8 @@ public class TestVolumeId { } @SuppressWarnings("unchecked") - private void testEqMany(final boolean eq, Comparable... volumeIds) { - Comparable vidNext; + private void testEqMany(final boolean eq, Comparable... volumeIds) { + Comparable vidNext; int sum = 0; for (int i=0; i ancestorSet = LayoutVersion.map.get(ancestorLV); assertNotNull(ancestorSet); for (Feature feature : ancestorSet) { - assertTrue(LayoutVersion.supports(feature, lv)); + assertTrue("LV " + lv + " does nto support " + feature + + " supported by the ancestor LV " + f.ancestorLV, + LayoutVersion.supports(feature, lv)); } } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockRecovery.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockRecovery.java index a400e850594..a5792ad217f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockRecovery.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockRecovery.java @@ -97,6 +97,7 @@ public class TestBlockRecovery { MiniDFSCluster.getBaseDirectory() + "data"; private DataNode dn; private Configuration conf; + private boolean tearDownDone; private final static long RECOVERY_ID = 3000L; private final static String CLUSTER_ID = "testClusterID"; private final static String POOL_ID = "BP-TEST"; @@ -121,6 +122,7 @@ public class TestBlockRecovery { */ @Before public void startUp() throws IOException { + tearDownDone = false; conf = new HdfsConfiguration(); conf.set(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY, DATA_DIR); conf.set(DFSConfigKeys.DFS_DATANODE_ADDRESS_KEY, "0.0.0.0:0"); @@ -177,7 +179,7 @@ public class TestBlockRecovery { */ @After public void tearDown() throws IOException { - if (dn != null) { + if (!tearDownDone && dn != null) { try { dn.shutdown(); } catch(Exception e) { @@ -188,6 +190,7 @@ public class TestBlockRecovery { Assert.assertTrue( "Cannot delete data-node dirs", FileUtil.fullyDelete(dir)); } + tearDownDone = true; } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestStartSecureDataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestStartSecureDataNode.java index ba5587276c4..13114a8bd72 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestStartSecureDataNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestStartSecureDataNode.java @@ -17,24 +17,14 @@ package org.apache.hadoop.hdfs.server.datanode; -import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; - -import java.io.IOException; -import java.security.PrivilegedExceptionAction; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.MiniDFSCluster; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod; import static org.apache.hadoop.security.SecurityUtilTestHelper.isExternalKdcRunning; import org.junit.Assume; import org.junit.Before; @@ -67,7 +57,7 @@ public class TestStartSecureDataNode { } @Test - public void testSecureNameNode() throws IOException, InterruptedException { + public void testSecureNameNode() throws Exception { MiniDFSCluster cluster = null; try { String nnPrincipal = @@ -105,9 +95,9 @@ public class TestStartSecureDataNode { .build(); cluster.waitActive(); assertTrue(cluster.isDataNodeUp()); - } catch (Exception ex) { ex.printStackTrace(); + throw ex; } finally { if (cluster != null) { cluster.shutdown(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java index d6659b21858..c81e939ec07 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java @@ -1209,22 +1209,19 @@ public class TestEditLog { * */ static void validateNoCrash(byte garbage[]) throws IOException { - final String TEST_LOG_NAME = "test_edit_log"; + final File TEST_LOG_NAME = new File(TEST_DIR, "test_edit_log"); EditLogFileOutputStream elfos = null; - File file = null; EditLogFileInputStream elfis = null; try { - file = new File(TEST_LOG_NAME); - elfos = new EditLogFileOutputStream(file, 0); + elfos = new EditLogFileOutputStream(TEST_LOG_NAME, 0); elfos.create(); elfos.writeRaw(garbage, 0, garbage.length); elfos.setReadyToFlush(); elfos.flushAndSync(true); elfos.close(); elfos = null; - file = new File(TEST_LOG_NAME); - elfis = new EditLogFileInputStream(file); + elfis = new EditLogFileInputStream(TEST_LOG_NAME); // verify that we can read everything without killing the JVM or // throwing an exception other than IOException diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeRecovery.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeRecovery.java index 1539467da11..93b588d0c8c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeRecovery.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeRecovery.java @@ -59,6 +59,8 @@ import com.google.common.collect.Sets; public class TestNameNodeRecovery { private static final Log LOG = LogFactory.getLog(TestNameNodeRecovery.class); private static StartupOption recoverStartOpt = StartupOption.RECOVER; + private static final File TEST_DIR = new File( + System.getProperty("test.build.data","build/test/data")); static { recoverStartOpt.setForce(MetaRecoveryContext.FORCE_ALL); @@ -66,15 +68,13 @@ public class TestNameNodeRecovery { } static void runEditLogTest(EditLogTestSetup elts) throws IOException { - final String TEST_LOG_NAME = "test_edit_log"; + final File TEST_LOG_NAME = new File(TEST_DIR, "test_edit_log"); final OpInstanceCache cache = new OpInstanceCache(); EditLogFileOutputStream elfos = null; - File file = null; EditLogFileInputStream elfis = null; try { - file = new File(TEST_LOG_NAME); - elfos = new EditLogFileOutputStream(file, 0); + elfos = new EditLogFileOutputStream(TEST_LOG_NAME, 0); elfos.create(); elts.addTransactionsToLog(elfos, cache); @@ -82,8 +82,7 @@ public class TestNameNodeRecovery { elfos.flushAndSync(true); elfos.close(); elfos = null; - file = new File(TEST_LOG_NAME); - elfis = new EditLogFileInputStream(file); + elfis = new EditLogFileInputStream(TEST_LOG_NAME); // reading through normally will get you an exception Set validTxIds = elts.getValidTxIds(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHdfsWithMultipleNameNodes.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHdfsWithMultipleNameNodes.java index 5d3272af7ac..335c346cee1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHdfsWithMultipleNameNodes.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHdfsWithMultipleNameNodes.java @@ -56,7 +56,6 @@ public class TestWebHdfsWithMultipleNameNodes { ((Log4JLogger)NameNode.stateChangeLog).getLogger().setLevel(Level.OFF); ((Log4JLogger)LeaseManager.LOG).getLogger().setLevel(Level.OFF); ((Log4JLogger)LogFactory.getLog(FSNamesystem.class)).getLogger().setLevel(Level.OFF); - ((Log4JLogger)DataNode.LOG).getLogger().setLevel(Level.OFF); } private static final Configuration conf = new HdfsConfiguration(); diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index bad1eae0bad..81c1bb9c352 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -14,6 +14,11 @@ Trunk (Unreleased) MAPREDUCE-4049. Experimental api to allow for alternate shuffle plugins. (Avner BenHanoch via acmurthy) + MAPREDUCE-4807. Allow MapOutputBuffer to be pluggable. (masokan via tucu) + + MAPREDUCE-4887. Add RehashPartitioner, to smooth distributions + with poor implementations of Object#hashCode(). (Radim Kolar via cutting) + IMPROVEMENTS MAPREDUCE-3787. [Gridmix] Optimize job monitoring and STRESS mode for @@ -71,8 +76,14 @@ Trunk (Unreleased) MAPREDUCE-4735. Make arguments in TestDFSIO case insensitive. (Brandon Li via suresh) + MAPREDUCE-4809. Change visibility of classes for pluggable sort changes. + (masokan via tucu) + BUG FIXES + MAPREDUCE-4272. SortedRanges.Range#compareTo is not spec compliant. + (Yu Gao via llu) + MAPREDUCE-4356. [Rumen] Provide access to the method ParsedTask.obtainTaskAttempts(). (ravigummadi) @@ -171,6 +182,16 @@ Release 2.0.3-alpha - Unreleased MAPREDUCE-4723. Fix warnings found by findbugs 2. (Sandy Ryza via eli) + MAPREDUCE-4703. Add the ability to start the MiniMRClientCluster using + the configurations used before it is being stopped. (ahmed.radwan via tucu) + + MAPREDUCE-4845. ClusterStatus.getMaxMemory() and getUsedMemory() exist in + MR1 but not MR2. (Sandy Ryza via tomwhite) + + MAPREDUCE-4899. Implemented a MR specific plugin for tracking finished + applications that YARN's ResourceManager doesn't keep track off anymore + (Derek Dagit via vinodkv) + OPTIMIZATIONS BUG FIXES @@ -205,6 +226,12 @@ Release 2.0.3-alpha - Unreleased MAPREDUCE-4800. Cleanup o.a.h.mapred.MapTaskStatus - remove unused code. (kkambatl via tucu) + MAPREDUCE-4861. Cleanup: Remove unused mapreduce.security.token.DelegationTokenRenewal. + (kkambatl via tucu) + + MAPREDUCE-4856. TestJobOutputCommitter uses same directory as + TestJobCleanup. (Sandy Ryza via tomwhite) + Release 2.0.2-alpha - 2012-09-07 INCOMPATIBLE CHANGES @@ -610,6 +637,20 @@ Release 0.23.6 - UNRELEASED MAPREDUCE-4836. Elapsed time for running tasks on AM web UI tasks page is 0 (Ravi Prakash via jeagles) + MAPREDUCE-4842. Shuffle race can hang reducer (Mariappan Asokan via jlowe) + + MAPREDUCE-4833. Task can get stuck in FAIL_CONTAINER_CLEANUP (Robert + Parker via jlowe) + + MAPREDUCE-4793. Problem with adding resources when using both -files and + -file to hadoop streaming (jlowe) + + MAPREDUCE-4890. Invalid TaskImpl state transitions when task fails while + speculating (jlowe) + + MAPREDUCE-4902. Fix typo "receievd" should be "received" in log output + (Albert Chu via jlowe) + Release 0.23.5 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/dev-support/findbugs-exclude.xml b/hadoop-mapreduce-project/dev-support/findbugs-exclude.xml index f548885ff43..08d4c2e7f68 100644 --- a/hadoop-mapreduce-project/dev-support/findbugs-exclude.xml +++ b/hadoop-mapreduce-project/dev-support/findbugs-exclude.xml @@ -138,11 +138,6 @@
- - - - - diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskImpl.java index e2ebeb554cf..8b3a084e925 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskImpl.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskImpl.java @@ -231,7 +231,12 @@ public abstract class TaskImpl implements Task, EventHandler { // Transitions from FAILED state .addTransition(TaskStateInternal.FAILED, TaskStateInternal.FAILED, EnumSet.of(TaskEventType.T_KILL, - TaskEventType.T_ADD_SPEC_ATTEMPT)) + TaskEventType.T_ADD_SPEC_ATTEMPT, + TaskEventType.T_ATTEMPT_COMMIT_PENDING, + TaskEventType.T_ATTEMPT_FAILED, + TaskEventType.T_ATTEMPT_KILLED, + TaskEventType.T_ATTEMPT_LAUNCHED, + TaskEventType.T_ATTEMPT_SUCCEEDED)) // Transitions from KILLED state .addTransition(TaskStateInternal.KILLED, TaskStateInternal.KILLED, @@ -941,6 +946,13 @@ public abstract class TaskImpl implements Task, EventHandler { task.handleTaskAttemptCompletion( taskAttemptId, TaskAttemptCompletionEventStatus.TIPFAILED); + + // issue kill to all non finished attempts + for (TaskAttempt taskAttempt : task.attempts.values()) { + task.killUnfinishedAttempt + (taskAttempt, "Task has failed. Killing attempt!"); + } + task.inProgressAttempts.clear(); if (task.historyTaskStartGenerated) { TaskFailedEvent taskFailedEvent = createTaskFailedEvent(task, attempt.getDiagnostics(), diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/launcher/ContainerLauncherImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/launcher/ContainerLauncherImpl.java index fa97d692ee3..058f7e5da8b 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/launcher/ContainerLauncherImpl.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/launcher/ContainerLauncherImpl.java @@ -191,12 +191,9 @@ public class ContainerLauncherImpl extends AbstractService implements @SuppressWarnings("unchecked") public synchronized void kill() { - if(isCompletelyDone()) { - return; - } if(this.state == ContainerState.PREP) { this.state = ContainerState.KILLED_BEFORE_LAUNCH; - } else { + } else if (!isCompletelyDone()) { LOG.info("KILLING " + taskAttemptID); ContainerManager proxy = null; diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskImpl.java index 5e416d99e83..2c3732da529 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskImpl.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskImpl.java @@ -602,4 +602,73 @@ public class TestTaskImpl { assertTaskScheduledState(); assertEquals(3, taskAttempts.size()); } + + @Test + public void testFailedTransitions() { + mockTask = new MockTaskImpl(jobId, partition, dispatcher.getEventHandler(), + remoteJobConfFile, conf, taskAttemptListener, committer, jobToken, + credentials, clock, + completedTasksFromPreviousRun, startCount, + metrics, appContext, TaskType.MAP) { + @Override + protected int getMaxAttempts() { + return 1; + } + }; + TaskId taskId = getNewTaskID(); + scheduleTaskAttempt(taskId); + launchTaskAttempt(getLastAttempt().getAttemptId()); + + // add three more speculative attempts + mockTask.handle(new TaskTAttemptEvent(getLastAttempt().getAttemptId(), + TaskEventType.T_ADD_SPEC_ATTEMPT)); + launchTaskAttempt(getLastAttempt().getAttemptId()); + mockTask.handle(new TaskTAttemptEvent(getLastAttempt().getAttemptId(), + TaskEventType.T_ADD_SPEC_ATTEMPT)); + launchTaskAttempt(getLastAttempt().getAttemptId()); + mockTask.handle(new TaskTAttemptEvent(getLastAttempt().getAttemptId(), + TaskEventType.T_ADD_SPEC_ATTEMPT)); + launchTaskAttempt(getLastAttempt().getAttemptId()); + assertEquals(4, taskAttempts.size()); + + // have the first attempt fail, verify task failed due to no retries + MockTaskAttemptImpl taskAttempt = taskAttempts.get(0); + taskAttempt.setState(TaskAttemptState.FAILED); + mockTask.handle(new TaskTAttemptEvent(taskAttempt.getAttemptId(), + TaskEventType.T_ATTEMPT_FAILED)); + assertEquals(TaskState.FAILED, mockTask.getState()); + + // verify task can no longer be killed + mockTask.handle(new TaskEvent(taskId, TaskEventType.T_KILL)); + assertEquals(TaskState.FAILED, mockTask.getState()); + + // verify speculative doesn't launch new tasks + mockTask.handle(new TaskTAttemptEvent(getLastAttempt().getAttemptId(), + TaskEventType.T_ADD_SPEC_ATTEMPT)); + mockTask.handle(new TaskTAttemptEvent(getLastAttempt().getAttemptId(), + TaskEventType.T_ATTEMPT_LAUNCHED)); + assertEquals(TaskState.FAILED, mockTask.getState()); + assertEquals(4, taskAttempts.size()); + + // verify attempt events from active tasks don't knock task out of FAILED + taskAttempt = taskAttempts.get(1); + taskAttempt.setState(TaskAttemptState.COMMIT_PENDING); + mockTask.handle(new TaskTAttemptEvent(taskAttempt.getAttemptId(), + TaskEventType.T_ATTEMPT_COMMIT_PENDING)); + assertEquals(TaskState.FAILED, mockTask.getState()); + taskAttempt.setState(TaskAttemptState.FAILED); + mockTask.handle(new TaskTAttemptEvent(taskAttempt.getAttemptId(), + TaskEventType.T_ATTEMPT_FAILED)); + assertEquals(TaskState.FAILED, mockTask.getState()); + taskAttempt = taskAttempts.get(2); + taskAttempt.setState(TaskAttemptState.SUCCEEDED); + mockTask.handle(new TaskTAttemptEvent(taskAttempt.getAttemptId(), + TaskEventType.T_ATTEMPT_SUCCEEDED)); + assertEquals(TaskState.FAILED, mockTask.getState()); + taskAttempt = taskAttempts.get(3); + taskAttempt.setState(TaskAttemptState.KILLED); + mockTask.handle(new TaskTAttemptEvent(taskAttempt.getAttemptId(), + TaskEventType.T_ATTEMPT_KILLED)); + assertEquals(TaskState.FAILED, mockTask.getState()); + } } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/launcher/TestContainerLauncherImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/launcher/TestContainerLauncherImpl.java index 838daea0872..a53bbe69072 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/launcher/TestContainerLauncherImpl.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/launcher/TestContainerLauncherImpl.java @@ -6,8 +6,12 @@ import static org.mockito.Mockito.mock; import static org.mockito.Mockito.never; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; +import static org.mockito.Mockito.atLeast; +import org.mockito.ArgumentCaptor; import java.net.InetSocketAddress; +import java.util.concurrent.BrokenBarrierException; +import java.util.concurrent.CyclicBarrier; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -18,15 +22,21 @@ import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId; import org.apache.hadoop.mapreduce.v2.api.records.TaskId; import org.apache.hadoop.mapreduce.v2.api.records.TaskType; import org.apache.hadoop.mapreduce.v2.app.AppContext; +import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEventType; import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncher.EventType; import org.apache.hadoop.mapreduce.v2.util.MRBuilderUtils; import org.apache.hadoop.yarn.api.ContainerManager; +import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusResponse; import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest; import org.apache.hadoop.yarn.api.protocolrecords.StartContainerResponse; import org.apache.hadoop.yarn.api.protocolrecords.StopContainerRequest; +import org.apache.hadoop.yarn.api.protocolrecords.StopContainerResponse; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.event.Event; import org.apache.hadoop.yarn.event.EventHandler; +import org.apache.hadoop.yarn.exceptions.YarnRemoteException; import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.ipc.YarnRPC; @@ -272,6 +282,150 @@ public class TestContainerLauncherImpl { } finally { ut.stop(); verify(mockCM).stopContainer(any(StopContainerRequest.class)); -} + } } + + @SuppressWarnings({ "rawtypes", "unchecked" }) + @Test + public void testContainerCleaned() throws Exception { + LOG.info("STARTING testContainerCleaned"); + + CyclicBarrier startLaunchBarrier = new CyclicBarrier(2); + CyclicBarrier completeLaunchBarrier = new CyclicBarrier(2); + + YarnRPC mockRpc = mock(YarnRPC.class); + AppContext mockContext = mock(AppContext.class); + + EventHandler mockEventHandler = mock(EventHandler.class); + when(mockContext.getEventHandler()).thenReturn(mockEventHandler); + + ContainerManager mockCM = new ContainerManagerForTest(startLaunchBarrier, completeLaunchBarrier); + when(mockRpc.getProxy(eq(ContainerManager.class), + any(InetSocketAddress.class), any(Configuration.class))) + .thenReturn(mockCM); + + ContainerLauncherImplUnderTest ut = + new ContainerLauncherImplUnderTest(mockContext, mockRpc); + + Configuration conf = new Configuration(); + ut.init(conf); + ut.start(); + try { + ContainerId contId = makeContainerId(0l, 0, 0, 1); + TaskAttemptId taskAttemptId = makeTaskAttemptId(0l, 0, 0, TaskType.MAP, 0); + String cmAddress = "127.0.0.1:8000"; + StartContainerResponse startResp = + recordFactory.newRecordInstance(StartContainerResponse.class); + startResp.setServiceResponse(ShuffleHandler.MAPREDUCE_SHUFFLE_SERVICEID, + ShuffleHandler.serializeMetaData(80)); + + + LOG.info("inserting launch event"); + ContainerRemoteLaunchEvent mockLaunchEvent = + mock(ContainerRemoteLaunchEvent.class); + when(mockLaunchEvent.getType()) + .thenReturn(EventType.CONTAINER_REMOTE_LAUNCH); + when(mockLaunchEvent.getContainerID()) + .thenReturn(contId); + when(mockLaunchEvent.getTaskAttemptID()).thenReturn(taskAttemptId); + when(mockLaunchEvent.getContainerMgrAddress()).thenReturn(cmAddress); + ut.handle(mockLaunchEvent); + + startLaunchBarrier.await(); + + + LOG.info("inserting cleanup event"); + ContainerLauncherEvent mockCleanupEvent = + mock(ContainerLauncherEvent.class); + when(mockCleanupEvent.getType()) + .thenReturn(EventType.CONTAINER_REMOTE_CLEANUP); + when(mockCleanupEvent.getContainerID()) + .thenReturn(contId); + when(mockCleanupEvent.getTaskAttemptID()).thenReturn(taskAttemptId); + when(mockCleanupEvent.getContainerMgrAddress()).thenReturn(cmAddress); + ut.handle(mockCleanupEvent); + + completeLaunchBarrier.await(); + + ut.waitForPoolToIdle(); + + ArgumentCaptor arg = ArgumentCaptor.forClass(Event.class); + verify(mockEventHandler, atLeast(2)).handle(arg.capture()); + boolean containerCleaned = false; + + for (int i =0; i < arg.getAllValues().size(); i++) { + LOG.info(arg.getAllValues().get(i).toString()); + Event currentEvent = arg.getAllValues().get(i); + if (currentEvent.getType() == TaskAttemptEventType.TA_CONTAINER_CLEANED) { + containerCleaned = true; + } + } + assert(containerCleaned); + + } finally { + ut.stop(); + } + } + + private static class ContainerManagerForTest implements ContainerManager { + + private CyclicBarrier startLaunchBarrier; + private CyclicBarrier completeLaunchBarrier; + + ContainerManagerForTest (CyclicBarrier startLaunchBarrier, CyclicBarrier completeLaunchBarrier) { + this.startLaunchBarrier = startLaunchBarrier; + this.completeLaunchBarrier = completeLaunchBarrier; + } + @Override + public StartContainerResponse startContainer(StartContainerRequest request) + throws YarnRemoteException { + try { + startLaunchBarrier.await(); + completeLaunchBarrier.await(); + //To ensure the kill is started before the launch + Thread.sleep(100); + } catch (InterruptedException e) { + e.printStackTrace(); + } catch (BrokenBarrierException e) { + e.printStackTrace(); + } + + throw new ContainerException("Force fail CM"); + + } + + @Override + public StopContainerResponse stopContainer(StopContainerRequest request) + throws YarnRemoteException { + + return null; + } + + @Override + public GetContainerStatusResponse getContainerStatus( + GetContainerStatusRequest request) throws YarnRemoteException { + + return null; + } + } + + @SuppressWarnings("serial") + private static class ContainerException extends YarnRemoteException { + + public ContainerException(String message) { + super(message); + } + + @Override + public String getRemoteTrace() { + return null; + } + + @Override + public YarnRemoteException getCause() { + return null; + } + + } + } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/ClusterStatus.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/ClusterStatus.java index 8886b26638f..c527a2776ab 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/ClusterStatus.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/ClusterStatus.java @@ -175,6 +175,8 @@ public class ClusterStatus implements Writable { } + public static final int UNINITIALIZED_MEMORY_VALUE = -1; + private int numActiveTrackers; private Collection activeTrackers = new ArrayList(); private int numBlacklistedTrackers; @@ -384,6 +386,22 @@ public class ClusterStatus implements Writable { public JobTrackerStatus getJobTrackerStatus() { return status; } + + /** + * Returns UNINITIALIZED_MEMORY_VALUE (-1) + */ + @Deprecated + public long getMaxMemory() { + return UNINITIALIZED_MEMORY_VALUE; + } + + /** + * Returns UNINITIALIZED_MEMORY_VALUE (-1) + */ + @Deprecated + public long getUsedMemory() { + return UNINITIALIZED_MEMORY_VALUE; + } /** * Gets the list of blacklisted trackers along with reasons for blacklisting. diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/IndexRecord.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/IndexRecord.java new file mode 100644 index 00000000000..3996534bd54 --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/IndexRecord.java @@ -0,0 +1,37 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.mapred; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +@InterfaceAudience.LimitedPrivate({"MapReduce"}) +@InterfaceStability.Unstable +public class IndexRecord { + public long startOffset; + public long rawLength; + public long partLength; + + public IndexRecord() { } + + public IndexRecord(long startOffset, long rawLength, long partLength) { + this.startOffset = startOffset; + this.rawLength = rawLength; + this.partLength = partLength; + } +} diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/MapOutputCollector.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/MapOutputCollector.java new file mode 100644 index 00000000000..368c0168997 --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/MapOutputCollector.java @@ -0,0 +1,65 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.mapred; + +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +import org.apache.hadoop.mapred.Task.TaskReporter; + +@InterfaceAudience.LimitedPrivate({"MapReduce"}) +@InterfaceStability.Unstable +public interface MapOutputCollector { + public void init(Context context + ) throws IOException, ClassNotFoundException; + public void collect(K key, V value, int partition + ) throws IOException, InterruptedException; + public void close() throws IOException, InterruptedException; + + public void flush() throws IOException, InterruptedException, + ClassNotFoundException; + + @InterfaceAudience.LimitedPrivate({"MapReduce"}) + @InterfaceStability.Unstable + public static class Context { + private final MapTask mapTask; + private final JobConf jobConf; + private final TaskReporter reporter; + + public Context(MapTask mapTask, JobConf jobConf, TaskReporter reporter) { + this.mapTask = mapTask; + this.jobConf = jobConf; + this.reporter = reporter; + } + + public MapTask getMapTask() { + return mapTask; + } + + public JobConf getJobConf() { + return jobConf; + } + + public TaskReporter getReporter() { + return reporter; + } + } +} diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/MapTask.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/MapTask.java index 16fb4d21452..cd4ba51c31c 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/MapTask.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/MapTask.java @@ -34,6 +34,8 @@ import java.util.concurrent.locks.ReentrantLock; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; @@ -54,6 +56,7 @@ import org.apache.hadoop.io.serializer.Serializer; import org.apache.hadoop.mapred.IFile.Writer; import org.apache.hadoop.mapred.Merger.Segment; import org.apache.hadoop.mapred.SortedRanges.SkipRangeIterator; +import org.apache.hadoop.mapreduce.JobContext; import org.apache.hadoop.mapreduce.MRJobConfig; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.TaskCounter; @@ -71,7 +74,9 @@ import org.apache.hadoop.util.StringInterner; import org.apache.hadoop.util.StringUtils; /** A Map task. */ -class MapTask extends Task { +@InterfaceAudience.LimitedPrivate({"MapReduce"}) +@InterfaceStability.Unstable +public class MapTask extends Task { /** * The size of each record in the index file for the map-outputs. */ @@ -338,6 +343,10 @@ class MapTask extends Task { done(umbilical, reporter); } + public Progress getSortPhase() { + return sortPhase; + } + @SuppressWarnings("unchecked") private T getSplitDetails(Path file, long offset) throws IOException { @@ -366,6 +375,22 @@ class MapTask extends Task { return split; } + @SuppressWarnings("unchecked") + private MapOutputCollector + createSortingCollector(JobConf job, TaskReporter reporter) + throws IOException, ClassNotFoundException { + MapOutputCollector collector + = (MapOutputCollector) + ReflectionUtils.newInstance( + job.getClass(JobContext.MAP_OUTPUT_COLLECTOR_CLASS_ATTR, + MapOutputBuffer.class, MapOutputCollector.class), job); + LOG.info("Map output collector class = " + collector.getClass().getName()); + MapOutputCollector.Context context = + new MapOutputCollector.Context(this, job, reporter); + collector.init(context); + return collector; + } + @SuppressWarnings("unchecked") private void runOldMapper(final JobConf job, @@ -388,11 +413,14 @@ class MapTask extends Task { int numReduceTasks = conf.getNumReduceTasks(); LOG.info("numReduceTasks: " + numReduceTasks); - MapOutputCollector collector = null; + MapOutputCollector collector = null; if (numReduceTasks > 0) { - collector = new MapOutputBuffer(umbilical, job, reporter); + collector = createSortingCollector(job, reporter); } else { - collector = new DirectMapOutputCollector(umbilical, job, reporter); + collector = new DirectMapOutputCollector(); + MapOutputCollector.Context context = + new MapOutputCollector.Context(this, job, reporter); + collector.init(context); } MapRunnable runner = ReflectionUtils.newInstance(job.getMapRunnerClass(), job); @@ -638,7 +666,7 @@ class MapTask extends Task { TaskUmbilicalProtocol umbilical, TaskReporter reporter ) throws IOException, ClassNotFoundException { - collector = new MapOutputBuffer(umbilical, job, reporter); + collector = createSortingCollector(job, reporter); partitions = jobContext.getNumReduceTasks(); if (partitions > 1) { partitioner = (org.apache.hadoop.mapreduce.Partitioner) @@ -734,17 +762,6 @@ class MapTask extends Task { output.close(mapperContext); } - interface MapOutputCollector { - - public void collect(K key, V value, int partition - ) throws IOException, InterruptedException; - public void close() throws IOException, InterruptedException; - - public void flush() throws IOException, InterruptedException, - ClassNotFoundException; - - } - class DirectMapOutputCollector implements MapOutputCollector { @@ -752,14 +769,18 @@ class MapTask extends Task { private TaskReporter reporter = null; - private final Counters.Counter mapOutputRecordCounter; - private final Counters.Counter fileOutputByteCounter; - private final List fsStats; + private Counters.Counter mapOutputRecordCounter; + private Counters.Counter fileOutputByteCounter; + private List fsStats; + + public DirectMapOutputCollector() { + } @SuppressWarnings("unchecked") - public DirectMapOutputCollector(TaskUmbilicalProtocol umbilical, - JobConf job, TaskReporter reporter) throws IOException { - this.reporter = reporter; + public void init(MapOutputCollector.Context context + ) throws IOException, ClassNotFoundException { + this.reporter = context.getReporter(); + JobConf job = context.getJobConf(); String finalName = getOutputName(getPartition()); FileSystem fs = FileSystem.get(job); @@ -814,25 +835,27 @@ class MapTask extends Task { } } - private class MapOutputBuffer + @InterfaceAudience.LimitedPrivate({"MapReduce"}) + @InterfaceStability.Unstable + public static class MapOutputBuffer implements MapOutputCollector, IndexedSortable { - final int partitions; - final JobConf job; - final TaskReporter reporter; - final Class keyClass; - final Class valClass; - final RawComparator comparator; - final SerializationFactory serializationFactory; - final Serializer keySerializer; - final Serializer valSerializer; - final CombinerRunner combinerRunner; - final CombineOutputCollector combineCollector; + private int partitions; + private JobConf job; + private TaskReporter reporter; + private Class keyClass; + private Class valClass; + private RawComparator comparator; + private SerializationFactory serializationFactory; + private Serializer keySerializer; + private Serializer valSerializer; + private CombinerRunner combinerRunner; + private CombineOutputCollector combineCollector; // Compression for map-outputs - final CompressionCodec codec; + private CompressionCodec codec; // k/v accounting - final IntBuffer kvmeta; // metadata overlay on backing store + private IntBuffer kvmeta; // metadata overlay on backing store int kvstart; // marks origin of spill metadata int kvend; // marks end of spill metadata int kvindex; // marks end of fully serialized records @@ -856,15 +879,15 @@ class MapTask extends Task { private static final int METASIZE = NMETA * 4; // size in bytes // spill accounting - final int maxRec; - final int softLimit; + private int maxRec; + private int softLimit; boolean spillInProgress;; int bufferRemaining; volatile Throwable sortSpillException = null; int numSpills = 0; - final int minSpillsForCombine; - final IndexedSorter sorter; + private int minSpillsForCombine; + private IndexedSorter sorter; final ReentrantLock spillLock = new ReentrantLock(); final Condition spillDone = spillLock.newCondition(); final Condition spillReady = spillLock.newCondition(); @@ -872,12 +895,12 @@ class MapTask extends Task { volatile boolean spillThreadRunning = false; final SpillThread spillThread = new SpillThread(); - final FileSystem rfs; + private FileSystem rfs; // Counters - final Counters.Counter mapOutputByteCounter; - final Counters.Counter mapOutputRecordCounter; - final Counters.Counter fileOutputByteCounter; + private Counters.Counter mapOutputByteCounter; + private Counters.Counter mapOutputRecordCounter; + private Counters.Counter fileOutputByteCounter; final ArrayList indexCacheList = new ArrayList(); @@ -885,12 +908,23 @@ class MapTask extends Task { private int indexCacheMemoryLimit; private static final int INDEX_CACHE_MEMORY_LIMIT_DEFAULT = 1024 * 1024; + private MapTask mapTask; + private MapOutputFile mapOutputFile; + private Progress sortPhase; + private Counters.Counter spilledRecordsCounter; + + public MapOutputBuffer() { + } + @SuppressWarnings("unchecked") - public MapOutputBuffer(TaskUmbilicalProtocol umbilical, JobConf job, - TaskReporter reporter - ) throws IOException, ClassNotFoundException { - this.job = job; - this.reporter = reporter; + public void init(MapOutputCollector.Context context + ) throws IOException, ClassNotFoundException { + job = context.getJobConf(); + reporter = context.getReporter(); + mapTask = context.getMapTask(); + mapOutputFile = mapTask.getMapOutputFile(); + sortPhase = mapTask.getSortPhase(); + spilledRecordsCounter = reporter.getCounter(TaskCounter.SPILLED_RECORDS); partitions = job.getNumReduceTasks(); rfs = ((LocalFileSystem)FileSystem.getLocal(job)).getRaw(); @@ -967,7 +1001,7 @@ class MapTask extends Task { if (combinerRunner != null) { final Counters.Counter combineOutputCounter = reporter.getCounter(TaskCounter.COMBINE_OUTPUT_RECORDS); - combineCollector= new CombineOutputCollector(combineOutputCounter, reporter, conf); + combineCollector= new CombineOutputCollector(combineOutputCounter, reporter, job); } else { combineCollector = null; } @@ -1118,6 +1152,10 @@ class MapTask extends Task { } } + private TaskAttemptID getTaskID() { + return mapTask.getTaskID(); + } + /** * Set the point from which meta and serialization data expand. The meta * indices are aligned with the buffer, so metadata never spans the ends of @@ -1490,7 +1528,7 @@ class MapTask extends Task { if (lspillException instanceof Error) { final String logMsg = "Task " + getTaskID() + " failed : " + StringUtils.stringifyException(lspillException); - reportFatalError(getTaskID(), lspillException, logMsg); + mapTask.reportFatalError(getTaskID(), lspillException, logMsg); } throw new IOException("Spill failed", lspillException); } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/SortedRanges.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/SortedRanges.java index a6531fce3d8..cdc2695c398 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/SortedRanges.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/SortedRanges.java @@ -271,7 +271,7 @@ class SortedRanges implements Writable{ } public boolean equals(Object o) { - if(o!=null && o instanceof Range) { + if (o instanceof Range) { Range range = (Range)o; return startIndex==range.startIndex && length==range.length; @@ -285,10 +285,11 @@ class SortedRanges implements Writable{ } public int compareTo(Range o) { - if(this.equals(o)) { - return 0; - } - return (this.startIndex > o.startIndex) ? 1:-1; + // Ensure sgn(x.compareTo(y) == -sgn(y.compareTo(x)) + return this.startIndex < o.startIndex ? -1 : + (this.startIndex > o.startIndex ? 1 : + (this.length < o.length ? -1 : + (this.length > o.length ? 1 : 0))); } public void readFields(DataInput in) throws IOException { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/SpillRecord.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/SpillRecord.java index dc673350d96..93a2d04cbfc 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/SpillRecord.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/SpillRecord.java @@ -26,6 +26,8 @@ import java.util.zip.CheckedInputStream; import java.util.zip.CheckedOutputStream; import java.util.zip.Checksum; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.ChecksumException; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; @@ -34,7 +36,9 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.util.PureJavaCrc32; -class SpillRecord { +@InterfaceAudience.LimitedPrivate({"MapReduce"}) +@InterfaceStability.Unstable +public class SpillRecord { /** Backing store */ private final ByteBuffer buf; @@ -143,17 +147,3 @@ class SpillRecord { } } - -class IndexRecord { - long startOffset; - long rawLength; - long partLength; - - public IndexRecord() { } - - public IndexRecord(long startOffset, long rawLength, long partLength) { - this.startOffset = startOffset; - this.rawLength = rawLength; - this.partLength = partLength; - } -} diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Task.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Task.java index 40340295836..f983a8d386e 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Task.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Task.java @@ -61,8 +61,7 @@ import org.apache.hadoop.mapreduce.MRConfig; import org.apache.hadoop.mapreduce.MRJobConfig; import org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer; import org.apache.hadoop.mapreduce.task.ReduceContextImpl; -import org.apache.hadoop.yarn.util.ResourceCalculatorPlugin; -import org.apache.hadoop.yarn.util.ResourceCalculatorPlugin.*; +import org.apache.hadoop.yarn.util.ResourceCalculatorProcessTree; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.util.Progress; import org.apache.hadoop.util.Progressable; @@ -169,7 +168,7 @@ abstract public class Task implements Writable, Configurable { private Iterator currentRecIndexIterator = skipRanges.skipRangeIterator(); - private ResourceCalculatorPlugin resourceCalculator = null; + private ResourceCalculatorProcessTree pTree; private long initCpuCumulativeTime = 0; protected JobConf conf; @@ -372,7 +371,7 @@ abstract public class Task implements Writable, Configurable { * Return current state of the task. * needs to be synchronized as communication thread * sends the state every second - * @return + * @return task state */ synchronized TaskStatus.State getState(){ return this.taskStatus.getRunState(); @@ -558,15 +557,15 @@ abstract public class Task implements Writable, Configurable { } } committer.setupTask(taskContext); - Class clazz = - conf.getClass(MRConfig.RESOURCE_CALCULATOR_PLUGIN, - null, ResourceCalculatorPlugin.class); - resourceCalculator = ResourceCalculatorPlugin - .getResourceCalculatorPlugin(clazz, conf); - LOG.info(" Using ResourceCalculatorPlugin : " + resourceCalculator); - if (resourceCalculator != null) { - initCpuCumulativeTime = - resourceCalculator.getProcResourceValues().getCumulativeCpuTime(); + Class clazz = + conf.getClass(MRConfig.RESOURCE_CALCULATOR_PROCESS_TREE, + null, ResourceCalculatorProcessTree.class); + pTree = ResourceCalculatorProcessTree + .getResourceCalculatorProcessTree(System.getenv().get("JVM_PID"), clazz, conf); + LOG.info(" Using ResourceCalculatorProcessTree : " + pTree); + if (pTree != null) { + pTree.updateProcessTree(); + initCpuCumulativeTime = pTree.getCumulativeCpuTime(); } } @@ -584,9 +583,9 @@ abstract public class Task implements Writable, Configurable { return status; } - @InterfaceAudience.Private + @InterfaceAudience.LimitedPrivate({"MapReduce"}) @InterfaceStability.Unstable - protected class TaskReporter + public class TaskReporter extends org.apache.hadoop.mapreduce.StatusReporter implements Runnable, Reporter { private TaskUmbilicalProtocol umbilical; @@ -817,14 +816,14 @@ abstract public class Task implements Writable, Configurable { // Update generic resource counters updateHeapUsageCounter(); - // Updating resources specified in ResourceCalculatorPlugin - if (resourceCalculator == null) { + // Updating resources specified in ResourceCalculatorProcessTree + if (pTree == null) { return; } - ProcResourceValues res = resourceCalculator.getProcResourceValues(); - long cpuTime = res.getCumulativeCpuTime(); - long pMem = res.getPhysicalMemorySize(); - long vMem = res.getVirtualMemorySize(); + pTree.updateProcessTree(); + long cpuTime = pTree.getCumulativeCpuTime(); + long pMem = pTree.getCumulativeRssmem(); + long vMem = pTree.getCumulativeVmem(); // Remove the CPU time consumed previously by JVM reuse cpuTime -= initCpuCumulativeTime; counters.findCounter(TaskCounter.CPU_MILLISECONDS).setValue(cpuTime); @@ -1466,9 +1465,9 @@ abstract public class Task implements Writable, Configurable { return reducerContext; } - @InterfaceAudience.Private + @InterfaceAudience.LimitedPrivate({"MapReduce"}) @InterfaceStability.Unstable - protected static abstract class CombinerRunner { + public static abstract class CombinerRunner { protected final Counters.Counter inputCounter; protected final JobConf job; protected final TaskReporter reporter; @@ -1486,13 +1485,13 @@ abstract public class Task implements Writable, Configurable { * @param iterator the key/value pairs to use as input * @param collector the output collector */ - abstract void combine(RawKeyValueIterator iterator, + public abstract void combine(RawKeyValueIterator iterator, OutputCollector collector ) throws IOException, InterruptedException, ClassNotFoundException; @SuppressWarnings("unchecked") - static + public static CombinerRunner create(JobConf job, TaskAttemptID taskId, Counters.Counter inputCounter, @@ -1542,7 +1541,7 @@ abstract public class Task implements Writable, Configurable { } @SuppressWarnings("unchecked") - protected void combine(RawKeyValueIterator kvIter, + public void combine(RawKeyValueIterator kvIter, OutputCollector combineCollector ) throws IOException { Reducer combiner = @@ -1611,7 +1610,7 @@ abstract public class Task implements Writable, Configurable { @SuppressWarnings("unchecked") @Override - void combine(RawKeyValueIterator iterator, + public void combine(RawKeyValueIterator iterator, OutputCollector collector ) throws IOException, InterruptedException, ClassNotFoundException { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRConfig.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRConfig.java index dc1ff658f67..375391320be 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRConfig.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRConfig.java @@ -55,8 +55,8 @@ public interface MRConfig { public static final long DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT = 7*24*60*60*1000; // 7 days - public static final String RESOURCE_CALCULATOR_PLUGIN = - "mapreduce.job.resourcecalculatorplugin"; + public static final String RESOURCE_CALCULATOR_PROCESS_TREE = + "mapreduce.job.process-tree.class"; public static final String STATIC_RESOLUTIONS = "mapreduce.job.net.static.resolutions"; diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java index b086a12b614..df15673cd13 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java @@ -30,6 +30,9 @@ public interface MRJobConfig { public static final String MAP_CLASS_ATTR = "mapreduce.job.map.class"; + public static final String MAP_OUTPUT_COLLECTOR_CLASS_ATTR + = "mapreduce.job.map.output.collector.class"; + public static final String COMBINE_CLASS_ATTR = "mapreduce.job.combine.class"; public static final String REDUCE_CLASS_ATTR = "mapreduce.job.reduce.class"; diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/partition/RehashPartitioner.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/partition/RehashPartitioner.java new file mode 100644 index 00000000000..ffc3938a815 --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/partition/RehashPartitioner.java @@ -0,0 +1,48 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.mapreduce.lib.partition; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.mapreduce.Partitioner; + +/** + * This partitioner rehashes values returned by {@link Object#hashCode()} + * to get smoother distribution between partitions which may improve + * reduce reduce time in some cases and should harm things in no cases. + * This partitioner is suggested with Integer and Long keys with simple + * patterns in their distributions. + * @since 2.0.3 + */ +@InterfaceAudience.Public +@InterfaceStability.Stable +public class RehashPartitioner extends Partitioner { + + /** prime number seed for increasing hash quality */ + private static final int SEED = 1591267453; + + /** Rehash {@link Object#hashCode()} to partition. */ + public int getPartition(K key, V value, int numReduceTasks) { + int h = SEED ^ key.hashCode(); + h ^= (h >>> 20) ^ (h >>> 12); + h = h ^ (h >>> 7) ^ (h >>> 4); + + return (h & Integer.MAX_VALUE) % numReduceTasks; + } +} diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/security/token/DelegationTokenRenewal.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/security/token/DelegationTokenRenewal.java deleted file mode 100644 index 90007770691..00000000000 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/security/token/DelegationTokenRenewal.java +++ /dev/null @@ -1,318 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.mapreduce.security.token; - -import java.io.IOException; -import java.security.PrivilegedExceptionAction; -import java.util.Collection; -import java.util.Collections; -import java.util.Date; -import java.util.HashSet; -import java.util.Iterator; -import java.util.Set; -import java.util.Timer; -import java.util.TimerTask; -import java.util.concurrent.LinkedBlockingQueue; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.mapreduce.JobID; -import org.apache.hadoop.security.Credentials; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.security.token.Token; -import org.apache.hadoop.util.StringUtils; - - -@InterfaceAudience.Private -@InterfaceStability.Unstable -public class DelegationTokenRenewal { - private static final Log LOG = LogFactory.getLog(DelegationTokenRenewal.class); - public static final String SCHEME = "hdfs"; - - /** - * class that is used for keeping tracks of DT to renew - * - */ - private static class DelegationTokenToRenew { - public final Token token; - public final JobID jobId; - public final Configuration conf; - public long expirationDate; - public TimerTask timerTask; - - public DelegationTokenToRenew( - JobID jId, Token t, - Configuration newConf, long newExpirationDate) { - token = t; - jobId = jId; - conf = newConf; - expirationDate = newExpirationDate; - timerTask = null; - if(token==null || jobId==null || conf==null) { - throw new IllegalArgumentException("invalid params for Renew Token" + - ";t="+token+";j="+jobId+";c="+conf); - } - } - public void setTimerTask(TimerTask tTask) { - timerTask = tTask; - } - @Override - public String toString() { - return token + ";exp="+expirationDate; - } - @Override - public boolean equals (Object obj) { - if (obj == this) { - return true; - } else if (obj == null || getClass() != obj.getClass()) { - return false; - } else { - return token.equals(((DelegationTokenToRenew)obj).token); - } - } - @Override - public int hashCode() { - return token.hashCode(); - } - } - - // global single timer (daemon) - private static Timer renewalTimer = new Timer(true); - - //delegation token canceler thread - private static DelegationTokenCancelThread dtCancelThread = - new DelegationTokenCancelThread(); - static { - dtCancelThread.start(); - } - - - //managing the list of tokens using Map - // jobId=>List - private static Set delegationTokens = - Collections.synchronizedSet(new HashSet()); - - private static class DelegationTokenCancelThread extends Thread { - private static class TokenWithConf { - Token token; - Configuration conf; - TokenWithConf(Token token, Configuration conf) { - this.token = token; - this.conf = conf; - } - } - private LinkedBlockingQueue queue = - new LinkedBlockingQueue(); - - public DelegationTokenCancelThread() { - super("Delegation Token Canceler"); - setDaemon(true); - } - public void cancelToken(Token token, - Configuration conf) { - TokenWithConf tokenWithConf = new TokenWithConf(token, conf); - while (!queue.offer(tokenWithConf)) { - LOG.warn("Unable to add token " + token + " for cancellation. " + - "Will retry.."); - try { - Thread.sleep(100); - } catch (InterruptedException e) { - throw new RuntimeException(e); - } - } - } - - public void run() { - while (true) { - TokenWithConf tokenWithConf = null; - try { - tokenWithConf = queue.take(); - final TokenWithConf current = tokenWithConf; - - if (LOG.isDebugEnabled()) { - LOG.debug("Canceling token " + tokenWithConf.token.getService()); - } - // need to use doAs so that http can find the kerberos tgt - UserGroupInformation.getLoginUser().doAs( - new PrivilegedExceptionAction() { - - @Override - public Void run() throws Exception { - current.token.cancel(current.conf); - return null; - } - }); - } catch (IOException e) { - LOG.warn("Failed to cancel token " + tokenWithConf.token + " " + - StringUtils.stringifyException(e)); - } catch (InterruptedException ie) { - return; - } catch (Throwable t) { - LOG.warn("Got exception " + StringUtils.stringifyException(t) + - ". Exiting.."); - System.exit(-1); - } - } - } - } - //adding token - private static void addTokenToList(DelegationTokenToRenew t) { - delegationTokens.add(t); - } - - public static synchronized void registerDelegationTokensForRenewal( - JobID jobId, Credentials ts, Configuration conf) throws IOException { - if(ts==null) - return; //nothing to add - - Collection > tokens = ts.getAllTokens(); - long now = System.currentTimeMillis(); - - for (Token t : tokens) { - // first renew happens immediately - if (t.isManaged()) { - DelegationTokenToRenew dtr = new DelegationTokenToRenew(jobId, t, conf, - now); - - addTokenToList(dtr); - - setTimerForTokenRenewal(dtr, true); - LOG.info("registering token for renewal for service =" + t.getService() - + " and jobID = " + jobId); - } - } - } - - /** - * Task - to renew a token - * - */ - private static class RenewalTimerTask extends TimerTask { - private DelegationTokenToRenew dttr; - - RenewalTimerTask(DelegationTokenToRenew t) { dttr = t; } - - @Override - public void run() { - Token token = dttr.token; - long newExpirationDate=0; - try { - // need to use doAs so that http can find the kerberos tgt - dttr.expirationDate = UserGroupInformation.getLoginUser().doAs( - new PrivilegedExceptionAction() { - - @Override - public Long run() throws Exception { - return dttr.token.renew(dttr.conf); - } - }); - - if (LOG.isDebugEnabled()) { - LOG.debug("renewing for:" + token.getService() + ";newED=" - + dttr.expirationDate); - } - setTimerForTokenRenewal(dttr, false);// set the next one - } catch (Exception e) { - LOG.error("Exception renewing token" + token + ". Not rescheduled", e); - removeFailedDelegationToken(dttr); - } - } - } - - /** - * find the soonest expiring token and set it for renew - */ - private static void setTimerForTokenRenewal( - DelegationTokenToRenew token, boolean firstTime) { - - // calculate timer time - long now = System.currentTimeMillis(); - long renewIn; - if(firstTime) { - renewIn = now; - } else { - long expiresIn = (token.expirationDate - now); - renewIn = now + expiresIn - expiresIn/10; // little before expiration - } - - // need to create new timer every time - TimerTask tTask = new RenewalTimerTask(token); - token.setTimerTask(tTask); // keep reference to the timer - - renewalTimer.schedule(token.timerTask, new Date(renewIn)); - } - - /** - * removing all tokens renewals - */ - static public void close() { - renewalTimer.cancel(); - delegationTokens.clear(); - } - - // cancel a token - private static void cancelToken(DelegationTokenToRenew t) { - dtCancelThread.cancelToken(t.token, t.conf); - } - - /** - * removing failed DT - * @param jobId - */ - private static void removeFailedDelegationToken(DelegationTokenToRenew t) { - JobID jobId = t.jobId; - if (LOG.isDebugEnabled()) - LOG.debug("removing failed delegation token for jobid=" + jobId + - ";t=" + t.token.getService()); - delegationTokens.remove(t); - // cancel the timer - if(t.timerTask!=null) - t.timerTask.cancel(); - } - - /** - * removing DT for completed jobs - * @param jobId - */ - public static void removeDelegationTokenRenewalForJob(JobID jobId) { - synchronized (delegationTokens) { - Iterator it = delegationTokens.iterator(); - while(it.hasNext()) { - DelegationTokenToRenew dttr = it.next(); - if (dttr.jobId.equals(jobId)) { - if (LOG.isDebugEnabled()) - LOG.debug("removing delegation token for jobid=" + jobId + - ";t=" + dttr.token.getService()); - - // cancel the timer - if(dttr.timerTask!=null) - dttr.timerTask.cancel(); - - // cancel the token - cancelToken(dttr); - - it.remove(); - } - } - } - } -} diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/ExceptionReporter.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/ExceptionReporter.java index f893cec5b1a..34a9a42b6b5 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/ExceptionReporter.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/ExceptionReporter.java @@ -17,9 +17,14 @@ */ package org.apache.hadoop.mapreduce.task.reduce; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + /** * An interface for reporting exceptions to other threads */ -interface ExceptionReporter { +@InterfaceAudience.LimitedPrivate({"MapReduce"}) +@InterfaceStability.Unstable +public interface ExceptionReporter { void reportException(Throwable t); } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/Fetcher.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/Fetcher.java index ef8a2a95256..f2cbc6e4ed7 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/Fetcher.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/Fetcher.java @@ -282,7 +282,7 @@ class Fetcher extends Thread { LOG.debug("url="+msgToEncode+";encHash="+encHash+";replyHash="+replyHash); // verify that replyHash is HMac of encHash SecureShuffleUtils.verifyReply(replyHash, encHash, jobTokenSecret); - LOG.info("for url="+msgToEncode+" sent hash and receievd reply"); + LOG.info("for url="+msgToEncode+" sent hash and received reply"); } catch (IOException ie) { boolean connectExcpt = ie instanceof ConnectException; ioErrs.increment(1); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/MapHost.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/MapHost.java index 7c5e621c5cc..935931dcefd 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/MapHost.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/MapHost.java @@ -20,9 +20,14 @@ package org.apache.hadoop.mapreduce.task.reduce; import java.util.ArrayList; import java.util.List; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + import org.apache.hadoop.mapreduce.TaskAttemptID; -class MapHost { +@InterfaceAudience.LimitedPrivate({"MapReduce"}) +@InterfaceStability.Unstable +public class MapHost { public static enum State { IDLE, // No map outputs available diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/MapOutput.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/MapOutput.java index aab0cccc793..fbe7096abfd 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/MapOutput.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/MapOutput.java @@ -24,6 +24,8 @@ import java.util.concurrent.atomic.AtomicInteger; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocalDirAllocator; @@ -33,7 +35,9 @@ import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.MapOutputFile; import org.apache.hadoop.mapreduce.TaskAttemptID; -class MapOutput { +@InterfaceAudience.LimitedPrivate({"MapReduce"}) +@InterfaceStability.Unstable +public class MapOutput { private static final Log LOG = LogFactory.getLog(MapOutput.class); private static AtomicInteger ID = new AtomicInteger(0); @@ -62,7 +66,7 @@ class MapOutput { private final boolean primaryMapOutput; - MapOutput(TaskAttemptID mapId, MergeManager merger, long size, + public MapOutput(TaskAttemptID mapId, MergeManager merger, long size, JobConf conf, LocalDirAllocator localDirAllocator, int fetcher, boolean primaryMapOutput, MapOutputFile mapOutputFile) throws IOException { @@ -87,7 +91,7 @@ class MapOutput { this.primaryMapOutput = primaryMapOutput; } - MapOutput(TaskAttemptID mapId, MergeManager merger, int size, + public MapOutput(TaskAttemptID mapId, MergeManager merger, int size, boolean primaryMapOutput) { this.id = ID.incrementAndGet(); this.mapId = mapId; diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/MergeManager.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/MergeManager.java index 29503ceb814..c75f14274dc 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/MergeManager.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/MergeManager.java @@ -58,8 +58,10 @@ import org.apache.hadoop.mapreduce.task.reduce.MapOutput.MapOutputComparator; import org.apache.hadoop.util.Progress; import org.apache.hadoop.util.ReflectionUtils; -@SuppressWarnings(value={"unchecked", "deprecation"}) -@InterfaceAudience.Private +import com.google.common.annotations.VisibleForTesting; + +@SuppressWarnings(value={"unchecked"}) +@InterfaceAudience.LimitedPrivate({"MapReduce"}) @InterfaceStability.Unstable public class MergeManager { @@ -85,7 +87,7 @@ public class MergeManager { Set> inMemoryMapOutputs = new TreeSet>(new MapOutputComparator()); - private final InMemoryMerger inMemoryMerger; + private final MergeThread, K,V> inMemoryMerger; Set onDiskMapOutputs = new TreeSet(); private final OnDiskMerger onDiskMerger; @@ -179,6 +181,8 @@ public class MergeManager { + singleShuffleMemoryLimitPercent); } + usedMemory = 0L; + commitMemory = 0L; this.maxSingleShuffleLimit = (long)(memoryLimit * singleShuffleMemoryLimitPercent); this.memToMemMergeOutputsThreshold = @@ -210,7 +214,7 @@ public class MergeManager { this.memToMemMerger = null; } - this.inMemoryMerger = new InMemoryMerger(this); + this.inMemoryMerger = createInMemoryMerger(); this.inMemoryMerger.start(); this.onDiskMerger = new OnDiskMerger(this); @@ -219,11 +223,19 @@ public class MergeManager { this.mergePhase = mergePhase; } + protected MergeThread, K,V> createInMemoryMerger() { + return new InMemoryMerger(this); + } TaskAttemptID getReduceId() { return reduceId; } + @VisibleForTesting + ExceptionReporter getExceptionReporter() { + return exceptionReporter; + } + public void waitForInMemoryMerge() throws InterruptedException { inMemoryMerger.waitForMerge(); } @@ -288,7 +300,6 @@ public class MergeManager { } synchronized void unreserve(long size) { - commitMemory -= size; usedMemory -= size; } @@ -300,24 +311,20 @@ public class MergeManager { commitMemory+= mapOutput.getSize(); - synchronized (inMemoryMerger) { - // Can hang if mergeThreshold is really low. - if (!inMemoryMerger.isInProgress() && commitMemory >= mergeThreshold) { - LOG.info("Starting inMemoryMerger's merge since commitMemory=" + - commitMemory + " > mergeThreshold=" + mergeThreshold + - ". Current usedMemory=" + usedMemory); - inMemoryMapOutputs.addAll(inMemoryMergedMapOutputs); - inMemoryMergedMapOutputs.clear(); - inMemoryMerger.startMerge(inMemoryMapOutputs); - } + // Can hang if mergeThreshold is really low. + if (commitMemory >= mergeThreshold) { + LOG.info("Starting inMemoryMerger's merge since commitMemory=" + + commitMemory + " > mergeThreshold=" + mergeThreshold + + ". Current usedMemory=" + usedMemory); + inMemoryMapOutputs.addAll(inMemoryMergedMapOutputs); + inMemoryMergedMapOutputs.clear(); + inMemoryMerger.startMerge(inMemoryMapOutputs); + commitMemory = 0L; // Reset commitMemory. } if (memToMemMerger != null) { - synchronized (memToMemMerger) { - if (!memToMemMerger.isInProgress() && - inMemoryMapOutputs.size() >= memToMemMergeOutputsThreshold) { - memToMemMerger.startMerge(inMemoryMapOutputs); - } + if (inMemoryMapOutputs.size() >= memToMemMergeOutputsThreshold) { + memToMemMerger.startMerge(inMemoryMapOutputs); } } } @@ -333,11 +340,8 @@ public class MergeManager { public synchronized void closeOnDiskFile(Path file) { onDiskMapOutputs.add(file); - synchronized (onDiskMerger) { - if (!onDiskMerger.isInProgress() && - onDiskMapOutputs.size() >= (2 * ioSortFactor - 1)) { - onDiskMerger.startMerge(onDiskMapOutputs); - } + if (onDiskMapOutputs.size() >= (2 * ioSortFactor - 1)) { + onDiskMerger.startMerge(onDiskMapOutputs); } } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/MergeThread.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/MergeThread.java index f5d89a3efc7..568f4e6ffec 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/MergeThread.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/MergeThread.java @@ -20,8 +20,10 @@ package org.apache.hadoop.mapreduce.task.reduce; import java.io.IOException; import java.util.ArrayList; import java.util.Iterator; +import java.util.LinkedList; import java.util.List; import java.util.Set; +import java.util.concurrent.atomic.AtomicInteger; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -30,8 +32,8 @@ abstract class MergeThread extends Thread { private static final Log LOG = LogFactory.getLog(MergeThread.class); - private volatile boolean inProgress = false; - private List inputs = new ArrayList(); + private AtomicInteger numPending = new AtomicInteger(0); + private LinkedList> pendingToBeMerged; protected final MergeManager manager; private final ExceptionReporter reporter; private boolean closed = false; @@ -39,6 +41,7 @@ abstract class MergeThread extends Thread { public MergeThread(MergeManager manager, int mergeFactor, ExceptionReporter reporter) { + this.pendingToBeMerged = new LinkedList>(); this.manager = manager; this.mergeFactor = mergeFactor; this.reporter = reporter; @@ -50,53 +53,55 @@ abstract class MergeThread extends Thread { interrupt(); } - public synchronized boolean isInProgress() { - return inProgress; - } - - public synchronized void startMerge(Set inputs) { + public void startMerge(Set inputs) { if (!closed) { - inProgress = true; - this.inputs = new ArrayList(); + numPending.incrementAndGet(); + List toMergeInputs = new ArrayList(); Iterator iter=inputs.iterator(); for (int ctr = 0; iter.hasNext() && ctr < mergeFactor; ++ctr) { - this.inputs.add(iter.next()); + toMergeInputs.add(iter.next()); iter.remove(); } - LOG.info(getName() + ": Starting merge with " + this.inputs.size() + + LOG.info(getName() + ": Starting merge with " + toMergeInputs.size() + " segments, while ignoring " + inputs.size() + " segments"); - notifyAll(); + synchronized(pendingToBeMerged) { + pendingToBeMerged.addLast(toMergeInputs); + pendingToBeMerged.notifyAll(); + } } } public synchronized void waitForMerge() throws InterruptedException { - while (inProgress) { + while (numPending.get() > 0) { wait(); } } public void run() { while (true) { + List inputs = null; try { // Wait for notification to start the merge... - synchronized (this) { - while (!inProgress) { - wait(); + synchronized (pendingToBeMerged) { + while(pendingToBeMerged.size() <= 0) { + pendingToBeMerged.wait(); } + // Pickup the inputs to merge. + inputs = pendingToBeMerged.removeFirst(); } // Merge merge(inputs); } catch (InterruptedException ie) { + numPending.set(0); return; } catch(Throwable t) { + numPending.set(0); reporter.reportException(t); return; } finally { synchronized (this) { - // Clear inputs - inputs = null; - inProgress = false; + numPending.decrementAndGet(); notifyAll(); } } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/Shuffle.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/Shuffle.java index fc22979797a..047e6435ccf 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/Shuffle.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/Shuffle.java @@ -39,7 +39,7 @@ import org.apache.hadoop.mapreduce.MRJobConfig; import org.apache.hadoop.mapreduce.TaskAttemptID; import org.apache.hadoop.util.Progress; -@InterfaceAudience.LimitedPrivate("mapreduce") +@InterfaceAudience.LimitedPrivate({"MapReduce"}) @InterfaceStability.Unstable @SuppressWarnings({"unchecked", "rawtypes"}) public class Shuffle implements ShuffleConsumerPlugin, ExceptionReporter { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/ShuffleClientMetrics.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/ShuffleClientMetrics.java index d327aa45946..92c69a60a5a 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/ShuffleClientMetrics.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/ShuffleClientMetrics.java @@ -17,6 +17,9 @@ */ package org.apache.hadoop.mapreduce.task.reduce; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapreduce.MRJobConfig; import org.apache.hadoop.mapreduce.TaskAttemptID; @@ -25,7 +28,9 @@ import org.apache.hadoop.metrics.MetricsRecord; import org.apache.hadoop.metrics.MetricsUtil; import org.apache.hadoop.metrics.Updater; -class ShuffleClientMetrics implements Updater { +@InterfaceAudience.LimitedPrivate({"MapReduce"}) +@InterfaceStability.Unstable +public class ShuffleClientMetrics implements Updater { private MetricsRecord shuffleMetrics = null; private int numFailedFetches = 0; diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/LinuxResourceCalculatorPlugin.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/LinuxResourceCalculatorPlugin.java index 7898dcae5cb..c7bdea102ea 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/LinuxResourceCalculatorPlugin.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/LinuxResourceCalculatorPlugin.java @@ -409,7 +409,7 @@ public class LinuxResourceCalculatorPlugin extends ResourceCalculatorPlugin { @Override public ProcResourceValues getProcResourceValues() { - pTree = pTree.getProcessTree(); + pTree.updateProcessTree(); long cpuTime = pTree.getCumulativeCpuTime(); long pMem = pTree.getCumulativeRssmem(); long vMem = pTree.getCumulativeVmem(); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/ProcfsBasedProcessTree.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/ProcfsBasedProcessTree.java index 99c2e7e180d..a510eb569a4 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/ProcfsBasedProcessTree.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/ProcfsBasedProcessTree.java @@ -166,12 +166,10 @@ public class ProcfsBasedProcessTree extends ProcessTree { } /** - * Get the process-tree with latest state. If the root-process is not alive, - * an empty tree will be returned. - * - * @return the process-tree with latest state. + * Update the process-tree with latest state. If the root-process is not alive, + * tree will become empty. */ - public ProcfsBasedProcessTree getProcessTree() { + public void updateProcessTree() { if (!pid.equals(deadPid)) { // Get the list of processes List processList = getProcessList(); @@ -197,7 +195,7 @@ public class ProcfsBasedProcessTree extends ProcessTree { } if (me == null) { - return this; + return; } // Add each process to its parent. @@ -239,7 +237,6 @@ public class ProcfsBasedProcessTree extends ProcessTree { LOG.debug(this.toString()); } } - return this; } /** diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml index 00ac075bca2..63992b6ed5d 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml @@ -938,4 +938,12 @@ jhs/_HOST@REALM.TLD + + mapreduce.job.map.output.collector.class + org.apache.hadoop.mapred.MapTask$MapOutputBuffer + + It defines the MapOutputCollector implementation to use. + + + diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/lib/partition/TestRehashPartitioner.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/lib/partition/TestRehashPartitioner.java new file mode 100644 index 00000000000..d2048c1686c --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/lib/partition/TestRehashPartitioner.java @@ -0,0 +1,71 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.mapreduce.lib.partition; + +import static org.junit.Assert.*; + +import java.util.Arrays; +import java.util.Collections; + +import org.apache.commons.lang.ArrayUtils; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.NullWritable; + +import org.junit.*; + +public class TestRehashPartitioner { + + /** number of partitions */ + private static final int PARTITIONS = 32; + + /** step in sequence */ + private static final int STEP = 3; + + /** end of test sequence */ + private static final int END = 100000; + + /** maximum error for considering too big/small bucket */ + private static final double MAX_ERROR = 0.20; + + /** maximum number of oddly sized buckets */ + private static final double MAX_BADBUCKETS = 0.10; + + /** test partitioner for patterns */ + @Test + public void testPatterns() { + int results[] = new int[PARTITIONS]; + RehashPartitioner p = new RehashPartitioner < IntWritable, NullWritable> (); + /* test sequence 4, 8, 12, ... 128 */ + for(int i = 0; i < END; i+= STEP) { + results[p.getPartition(new IntWritable(i), null, PARTITIONS)]++; + } + int badbuckets = 0; + Integer min = Collections.min(Arrays.asList(ArrayUtils.toObject(results))); + Integer max = Collections.max(Arrays.asList(ArrayUtils.toObject(results))); + Integer avg = (int) Math.round((max+min)/2.0); + System.out.println("Dumping buckets distribution: min="+min+" avg="+avg+" max="+max); + for (int i = 0; i < PARTITIONS; i++) { + double var = (results[i]-avg)/(double)(avg); + System.out.println("bucket "+i+" "+results[i]+" items, variance "+var); + if (Math.abs(var) > MAX_ERROR) + badbuckets++; + } + System.out.println(badbuckets + " of "+PARTITIONS+" are too small or large buckets"); + assertTrue("too many overflow buckets", badbuckets < PARTITIONS * MAX_BADBUCKETS); + } +} diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/task/reduce/TestMergeManager.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/task/reduce/TestMergeManager.java new file mode 100644 index 00000000000..a8669639b2a --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/task/reduce/TestMergeManager.java @@ -0,0 +1,196 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.mapreduce.task.reduce; + +import static org.mockito.Mockito.mock; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.BrokenBarrierException; +import java.util.concurrent.CyclicBarrier; +import java.util.concurrent.atomic.AtomicInteger; + +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.io.BoundedByteArrayOutputStream; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.MapOutputFile; +import org.apache.hadoop.mapreduce.MRJobConfig; +import org.apache.hadoop.mapreduce.task.reduce.MapOutput.Type; +import org.junit.Assert; +import org.junit.Test; + +public class TestMergeManager { + + @Test(timeout=10000) + public void testMemoryMerge() throws Exception { + final int TOTAL_MEM_BYTES = 10000; + final int OUTPUT_SIZE = 7950; + JobConf conf = new JobConf(); + conf.setFloat(MRJobConfig.SHUFFLE_INPUT_BUFFER_PERCENT, 1.0f); + conf.setLong(MRJobConfig.REDUCE_MEMORY_TOTAL_BYTES, TOTAL_MEM_BYTES); + conf.setFloat(MRJobConfig.SHUFFLE_MEMORY_LIMIT_PERCENT, 0.8f); + conf.setFloat(MRJobConfig.SHUFFLE_MERGE_PERCENT, 0.9f); + TestExceptionReporter reporter = new TestExceptionReporter(); + CyclicBarrier mergeStart = new CyclicBarrier(2); + CyclicBarrier mergeComplete = new CyclicBarrier(2); + StubbedMergeManager mgr = new StubbedMergeManager(conf, reporter, + mergeStart, mergeComplete); + + // reserve enough map output to cause a merge when it is committed + MapOutput out1 = mgr.reserve(null, OUTPUT_SIZE, 0); + Assert.assertEquals("Should be a memory merge", + Type.MEMORY, out1.getType()); + fillOutput(out1); + MapOutput out2 = mgr.reserve(null, OUTPUT_SIZE, 0); + Assert.assertEquals("Should be a memory merge", + Type.MEMORY, out2.getType()); + fillOutput(out2); + + // next reservation should be a WAIT + MapOutput out3 = mgr.reserve(null, OUTPUT_SIZE, 0); + Assert.assertEquals("Should be told to wait", + Type.WAIT, out3.getType()); + + // trigger the first merge and wait for merge thread to start merging + // and free enough output to reserve more + out1.commit(); + out2.commit(); + mergeStart.await(); + + Assert.assertEquals(1, mgr.getNumMerges()); + + // reserve enough map output to cause another merge when committed + out1 = mgr.reserve(null, OUTPUT_SIZE, 0); + Assert.assertEquals("Should be a memory merge", + Type.MEMORY, out1.getType()); + fillOutput(out1); + out2 = mgr.reserve(null, OUTPUT_SIZE, 0); + Assert.assertEquals("Should be a memory merge", + Type.MEMORY, out2.getType()); + fillOutput(out2); + + // next reservation should be a WAIT + out3 = mgr.reserve(null, OUTPUT_SIZE, 0); + Assert.assertEquals("Should be told to wait", + Type.WAIT, out3.getType()); + + // commit output *before* merge thread completes + out1.commit(); + out2.commit(); + + // allow the first merge to complete + mergeComplete.await(); + + // start the second merge and verify + mergeStart.await(); + Assert.assertEquals(2, mgr.getNumMerges()); + + // trigger the end of the second merge + mergeComplete.await(); + + Assert.assertEquals(2, mgr.getNumMerges()); + Assert.assertEquals("exception reporter invoked", + 0, reporter.getNumExceptions()); + } + + private void fillOutput(MapOutput output) throws IOException { + BoundedByteArrayOutputStream stream = output.getArrayStream(); + int count = stream.getLimit(); + for (int i=0; i < count; ++i) { + stream.write(i); + } + } + + private static class StubbedMergeManager extends MergeManager { + private TestMergeThread mergeThread; + + public StubbedMergeManager(JobConf conf, ExceptionReporter reporter, + CyclicBarrier mergeStart, CyclicBarrier mergeComplete) { + super(null, conf, mock(LocalFileSystem.class), null, null, null, null, + null, null, null, null, reporter, null, mock(MapOutputFile.class)); + mergeThread.setSyncBarriers(mergeStart, mergeComplete); + } + + @Override + protected MergeThread, Text, Text> createInMemoryMerger() { + mergeThread = new TestMergeThread(this, getExceptionReporter()); + return mergeThread; + } + + public int getNumMerges() { + return mergeThread.getNumMerges(); + } + } + + private static class TestMergeThread + extends MergeThread, Text, Text> { + private AtomicInteger numMerges; + private CyclicBarrier mergeStart; + private CyclicBarrier mergeComplete; + + public TestMergeThread(MergeManager mergeManager, + ExceptionReporter reporter) { + super(mergeManager, Integer.MAX_VALUE, reporter); + numMerges = new AtomicInteger(0); + } + + public synchronized void setSyncBarriers( + CyclicBarrier mergeStart, CyclicBarrier mergeComplete) { + this.mergeStart = mergeStart; + this.mergeComplete = mergeComplete; + } + + public int getNumMerges() { + return numMerges.get(); + } + + @Override + public void merge(List> inputs) + throws IOException { + synchronized (this) { + numMerges.incrementAndGet(); + for (MapOutput input : inputs) { + manager.unreserve(input.getSize()); + } + } + + try { + mergeStart.await(); + mergeComplete.await(); + } catch (InterruptedException e) { + } catch (BrokenBarrierException e) { + } + } + } + + private static class TestExceptionReporter implements ExceptionReporter { + private List exceptions = new ArrayList(); + + @Override + public void reportException(Throwable t) { + exceptions.add(t); + t.printStackTrace(); + } + + public int getNumExceptions() { + return exceptions.size(); + } + } +} diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs-plugins/pom.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs-plugins/pom.xml new file mode 100644 index 00000000000..e1648eb7497 --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs-plugins/pom.xml @@ -0,0 +1,46 @@ + + + + + hadoop-mapreduce-client + org.apache.hadoop + 3.0.0-SNAPSHOT + + 4.0.0 + org.apache.hadoop + hadoop-mapreduce-client-hs-plugins + 3.0.0-SNAPSHOT + hadoop-mapreduce-client-hs-plugins + + + + ${project.parent.basedir}/../ + + + + + org.apache.hadoop + hadoop-yarn-common + + + org.apache.hadoop + hadoop-mapreduce-client-common + + + + diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs-plugins/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/MapReduceTrackingUriPlugin.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs-plugins/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/MapReduceTrackingUriPlugin.java new file mode 100644 index 00000000000..6d148a7825c --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs-plugins/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/MapReduceTrackingUriPlugin.java @@ -0,0 +1,61 @@ +/** +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +package org.apache.hadoop.mapreduce.v2.hs.webapp; + +import java.net.URI; +import java.net.URISyntaxException; + +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapreduce.v2.jobhistory.JHAdminConfig; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.util.TrackingUriPlugin; + + +public class MapReduceTrackingUriPlugin extends TrackingUriPlugin implements + Configurable { + + @Override + public void setConf(Configuration conf) { + Configuration jobConf = null; + // Force loading of mapred configuration. + if (conf != null) { + jobConf = new JobConf(conf); + } else { + jobConf = new JobConf(); + } + super.setConf(jobConf); + } + + /** + * Gets the URI to access the given application on MapReduce history server + * @param id the ID for which a URI is returned + * @return the tracking URI + * @throws URISyntaxException + */ + @Override + public URI getTrackingUri(ApplicationId id) throws URISyntaxException { + String jobSuffix = id.toString().replaceFirst("^application_", "job_"); + String historyServerAddress = + this.getConf().get(JHAdminConfig.MR_HISTORY_WEBAPP_ADDRESS); + return new URI("http://" + historyServerAddress + "/jobhistory/job/" + + jobSuffix); + } +} diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs-plugins/src/test/java/org/apache/hadoop/mapreduce/v2/hs/webapp/TestMapReduceTrackingUriPlugin.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs-plugins/src/test/java/org/apache/hadoop/mapreduce/v2/hs/webapp/TestMapReduceTrackingUriPlugin.java new file mode 100644 index 00000000000..1b5cea9ff2f --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs-plugins/src/test/java/org/apache/hadoop/mapreduce/v2/hs/webapp/TestMapReduceTrackingUriPlugin.java @@ -0,0 +1,47 @@ +/** +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +package org.apache.hadoop.mapreduce.v2.hs.webapp; + +import static org.junit.Assert.*; + +import java.net.URI; +import java.net.URISyntaxException; + +import org.apache.hadoop.mapreduce.v2.jobhistory.JHAdminConfig; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.util.BuilderUtils; +import org.junit.Test; + +public class TestMapReduceTrackingUriPlugin { + @Test + public void testProducesHistoryServerUriForAppId() throws URISyntaxException { + final String historyAddress = "example.net:424242"; + YarnConfiguration conf = new YarnConfiguration(); + conf.set(JHAdminConfig.MR_HISTORY_WEBAPP_ADDRESS, historyAddress); + MapReduceTrackingUriPlugin plugin = new MapReduceTrackingUriPlugin(); + plugin.setConf(conf); + ApplicationId id = BuilderUtils.newApplicationId(6384623l, 5); + String jobSuffix = id.toString().replaceFirst("^application_", "job_"); + URI expected = + new URI("http://" + historyAddress + "/jobhistory/job/" + jobSuffix); + URI actual = plugin.getTrackingUri(id); + assertEquals(expected, actual); + } +} \ No newline at end of file diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsJobsBlock.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsJobsBlock.java index 95715c7b74c..f9048c0a4ea 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsJobsBlock.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsJobsBlock.java @@ -78,12 +78,12 @@ public class HsJobsBlock extends HtmlBlock { .append(dateFormat.format(new Date(job.getFinishTime()))).append("\",\"") .append("") .append(job.getId()).append("\",\"") - .append(StringEscapeUtils.escapeHtml(job.getName())) - .append("\",\"") - .append(StringEscapeUtils.escapeHtml(job.getUserName())) - .append("\",\"") - .append(StringEscapeUtils.escapeHtml(job.getQueueName())) - .append("\",\"") + .append(StringEscapeUtils.escapeJavaScript(StringEscapeUtils.escapeHtml( + job.getName()))).append("\",\"") + .append(StringEscapeUtils.escapeJavaScript(StringEscapeUtils.escapeHtml( + job.getUserName()))).append("\",\"") + .append(StringEscapeUtils.escapeJavaScript(StringEscapeUtils.escapeHtml( + job.getQueueName()))).append("\",\"") .append(job.getState()).append("\",\"") .append(String.valueOf(job.getMapsTotal())).append("\",\"") .append(String.valueOf(job.getMapsCompleted())).append("\",\"") diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/MiniMRClientCluster.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/MiniMRClientCluster.java index dc4687b5801..eda104e1d7d 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/MiniMRClientCluster.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/MiniMRClientCluster.java @@ -31,6 +31,11 @@ public interface MiniMRClientCluster { public void start() throws IOException; + /** + * Stop and start back the cluster using the same configuration. + */ + public void restart() throws IOException; + public void stop() throws IOException; public Configuration getConfig() throws IOException; diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/MiniMRClientClusterFactory.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/MiniMRClientClusterFactory.java index f26ace18702..105d3646219 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/MiniMRClientClusterFactory.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/MiniMRClientClusterFactory.java @@ -67,6 +67,10 @@ public class MiniMRClientClusterFactory { MiniMRYarnCluster miniMRYarnCluster = new MiniMRYarnCluster(caller .getName(), noOfNMs); + job.getConfiguration().set("minimrclientcluster.caller.name", + caller.getName()); + job.getConfiguration().setInt("minimrclientcluster.nodemanagers.number", + noOfNMs); miniMRYarnCluster.init(job.getConfiguration()); miniMRYarnCluster.start(); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/MiniMRYarnClusterAdapter.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/MiniMRYarnClusterAdapter.java index 81329a97c39..74ef06f791d 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/MiniMRYarnClusterAdapter.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/MiniMRYarnClusterAdapter.java @@ -18,8 +18,13 @@ package org.apache.hadoop.mapred; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.mapreduce.v2.MiniMRYarnCluster; +import org.apache.hadoop.mapreduce.v2.jobhistory.JHAdminConfig; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.service.Service.STATE; /** * An adapter for MiniMRYarnCluster providing a MiniMRClientCluster interface. @@ -29,6 +34,8 @@ public class MiniMRYarnClusterAdapter implements MiniMRClientCluster { private MiniMRYarnCluster miniMRYarnCluster; + private static final Log LOG = LogFactory.getLog(MiniMRYarnClusterAdapter.class); + public MiniMRYarnClusterAdapter(MiniMRYarnCluster miniMRYarnCluster) { this.miniMRYarnCluster = miniMRYarnCluster; } @@ -48,4 +55,22 @@ public class MiniMRYarnClusterAdapter implements MiniMRClientCluster { miniMRYarnCluster.stop(); } + @Override + public void restart() { + if (!miniMRYarnCluster.getServiceState().equals(STATE.STARTED)){ + LOG.warn("Cannot restart the mini cluster, start it first"); + return; + } + Configuration oldConf = new Configuration(getConfig()); + String callerName = oldConf.get("minimrclientcluster.caller.name", + this.getClass().getName()); + int noOfNMs = oldConf.getInt("minimrclientcluster.nodemanagers.number", 1); + oldConf.setBoolean(YarnConfiguration.YARN_MINICLUSTER_FIXED_PORTS, true); + oldConf.setBoolean(JHAdminConfig.MR_HISTORY_MINICLUSTER_FIXED_PORTS, true); + stop(); + miniMRYarnCluster = new MiniMRYarnCluster(callerName, noOfNMs); + miniMRYarnCluster.init(oldConf); + miniMRYarnCluster.start(); + } + } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMerge.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMerge.java new file mode 100644 index 00000000000..46a514f0f3e --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMerge.java @@ -0,0 +1,405 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.mapred; + +import java.io.ByteArrayOutputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.io.Writer; + +import org.apache.hadoop.conf.Configuration; + +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.fs.Path; + +import org.apache.hadoop.hdfs.MiniDFSCluster; + +import org.apache.hadoop.io.DataOutputBuffer; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.WritableUtils; + +import org.apache.hadoop.io.serializer.SerializationFactory; +import org.apache.hadoop.io.serializer.Serializer; + +import org.apache.hadoop.mapred.Task.TaskReporter; + +import junit.framework.TestCase; + +@SuppressWarnings(value={"unchecked", "deprecation"}) +/** + * This test tests the support for a merge operation in Hadoop. The input files + * are already sorted on the key. This test implements an external + * MapOutputCollector implementation that just copies the records to different + * partitions while maintaining the sort order in each partition. The Hadoop + * framework's merge on the reduce side will merge the partitions created to + * generate the final output which is sorted on the key. + */ +public class TestMerge extends TestCase { + private static final int NUM_HADOOP_DATA_NODES = 2; + // Number of input files is same as the number of mappers. + private static final int NUM_MAPPERS = 10; + // Number of reducers. + private static final int NUM_REDUCERS = 4; + // Number of lines per input file. + private static final int NUM_LINES = 1000; + // Where MR job's input will reside. + private static final Path INPUT_DIR = new Path("/testplugin/input"); + // Where output goes. + private static final Path OUTPUT = new Path("/testplugin/output"); + + public void testMerge() throws Exception { + MiniDFSCluster dfsCluster = null; + MiniMRClientCluster mrCluster = null; + FileSystem fileSystem = null; + try { + Configuration conf = new Configuration(); + // Start the mini-MR and mini-DFS clusters + dfsCluster = new MiniDFSCluster(conf, NUM_HADOOP_DATA_NODES, true, null); + fileSystem = dfsCluster.getFileSystem(); + mrCluster = MiniMRClientClusterFactory.create(this.getClass(), + NUM_HADOOP_DATA_NODES, conf); + // Generate input. + createInput(fileSystem); + // Run the test. + runMergeTest(new JobConf(mrCluster.getConfig()), fileSystem); + } finally { + if (dfsCluster != null) { + dfsCluster.shutdown(); + } + if (mrCluster != null) { + mrCluster.stop(); + } + } + } + + private void createInput(FileSystem fs) throws Exception { + fs.delete(INPUT_DIR, true); + for (int i = 0; i < NUM_MAPPERS; i++) { + OutputStream os = fs.create(new Path(INPUT_DIR, "input_" + i + ".txt")); + Writer writer = new OutputStreamWriter(os); + for (int j = 0; j < NUM_LINES; j++) { + // Create sorted key, value pairs. + int k = j + 1; + String formattedNumber = String.format("%09d", k); + writer.write(formattedNumber + " " + formattedNumber + "\n"); + } + writer.close(); + } + } + + private void runMergeTest(JobConf job, FileSystem fileSystem) + throws Exception { + // Delete any existing output. + fileSystem.delete(OUTPUT, true); + job.setJobName("MergeTest"); + JobClient client = new JobClient(job); + RunningJob submittedJob = null; + FileInputFormat.setInputPaths(job, INPUT_DIR); + FileOutputFormat.setOutputPath(job, OUTPUT); + job.set("mapreduce.output.textoutputformat.separator", " "); + job.setInputFormat(TextInputFormat.class); + job.setMapOutputKeyClass(Text.class); + job.setMapOutputValueClass(Text.class); + job.setOutputKeyClass(Text.class); + job.setOutputValueClass(Text.class); + job.setMapperClass(MyMapper.class); + job.setPartitionerClass(MyPartitioner.class); + job.setOutputFormat(TextOutputFormat.class); + job.setNumReduceTasks(NUM_REDUCERS); + job.set(JobContext.MAP_OUTPUT_COLLECTOR_CLASS_ATTR, + MapOutputCopier.class.getName()); + try { + submittedJob = client.submitJob(job); + try { + if (! client.monitorAndPrintJob(job, submittedJob)) { + throw new IOException("Job failed!"); + } + } catch(InterruptedException ie) { + Thread.currentThread().interrupt(); + } + } catch(IOException ioe) { + System.err.println("Job failed with: " + ioe); + } finally { + verifyOutput(submittedJob, fileSystem); + } + } + + private void verifyOutput(RunningJob submittedJob, FileSystem fileSystem) + throws Exception { + FSDataInputStream dis = null; + long numValidRecords = 0; + long numInvalidRecords = 0; + long numMappersLaunched = NUM_MAPPERS; + String prevKeyValue = "000000000"; + Path[] fileList = + FileUtil.stat2Paths(fileSystem.listStatus(OUTPUT, + new Utils.OutputFileUtils.OutputFilesFilter())); + for (Path outFile : fileList) { + try { + dis = fileSystem.open(outFile); + String record; + while((record = dis.readLine()) != null) { + // Split the line into key and value. + int blankPos = record.indexOf(" "); + String keyString = record.substring(0, blankPos); + String valueString = record.substring(blankPos+1); + // Check for sorted output and correctness of record. + if (keyString.compareTo(prevKeyValue) >= 0 + && keyString.equals(valueString)) { + prevKeyValue = keyString; + numValidRecords++; + } else { + numInvalidRecords++; + } + } + } finally { + if (dis != null) { + dis.close(); + dis = null; + } + } + } + // Make sure we got all input records in the output in sorted order. + assertEquals((long)(NUM_MAPPERS*NUM_LINES), numValidRecords); + // Make sure there is no extraneous invalid record. + assertEquals(0, numInvalidRecords); + } + + /** + * A mapper implementation that assumes that key text contains valid integers + * in displayable form. + */ + public static class MyMapper extends MapReduceBase + implements Mapper { + private Text keyText; + private Text valueText; + + public MyMapper() { + keyText = new Text(); + valueText = new Text(); + } + + @Override + public void map(LongWritable key, Text value, + OutputCollector output, + Reporter reporter) throws IOException { + String record = value.toString(); + int blankPos = record.indexOf(" "); + keyText.set(record.substring(0, blankPos)); + valueText.set(record.substring(blankPos+1)); + output.collect(keyText, valueText); + } + + public void close() throws IOException { + } + } + + /** + * Partitioner implementation to make sure that output is in total sorted + * order. We basically route key ranges to different reducers such that + * key values monotonically increase with the partition number. For example, + * in this test, the keys are numbers from 1 to 1000 in the form "000000001" + * to "000001000" in each input file. The keys "000000001" to "000000250" are + * routed to partition 0, "000000251" to "000000500" are routed to partition 1 + * and so on since we have 4 reducers. + */ + static class MyPartitioner implements Partitioner { + public MyPartitioner() { + } + + public void configure(JobConf job) { + } + + public int getPartition(Text key, Text value, int numPartitions) { + int keyValue = 0; + try { + keyValue = Integer.parseInt(key.toString()); + } catch(NumberFormatException nfe) { + keyValue = 0; + } + int partitionNumber = (numPartitions*(Math.max(0, keyValue-1)))/NUM_LINES; + return partitionNumber; + } + } + + /** + * Implementation of map output copier(that avoids sorting) on the map side. + * It maintains keys in the input order within each partition created for + * reducers. + */ + static class MapOutputCopier + implements MapOutputCollector { + private static final int BUF_SIZE = 128*1024; + private MapTask mapTask; + private JobConf jobConf; + private TaskReporter reporter; + private int numberOfPartitions; + private Class keyClass; + private Class valueClass; + private KeyValueWriter recordWriters[]; + private ByteArrayOutputStream outStreams[]; + + public MapOutputCopier() { + } + + @SuppressWarnings("unchecked") + public void init(MapOutputCollector.Context context) + throws IOException, ClassNotFoundException { + this.mapTask = context.getMapTask(); + this.jobConf = context.getJobConf(); + this.reporter = context.getReporter(); + numberOfPartitions = jobConf.getNumReduceTasks(); + keyClass = (Class)jobConf.getMapOutputKeyClass(); + valueClass = (Class)jobConf.getMapOutputValueClass(); + recordWriters = new KeyValueWriter[numberOfPartitions]; + outStreams = new ByteArrayOutputStream[numberOfPartitions]; + + // Create output streams for partitions. + for (int i = 0; i < numberOfPartitions; i++) { + outStreams[i] = new ByteArrayOutputStream(); + recordWriters[i] = new KeyValueWriter(jobConf, outStreams[i], + keyClass, valueClass); + } + } + + public synchronized void collect(K key, V value, int partitionNumber + ) throws IOException, InterruptedException { + if (partitionNumber >= 0 && partitionNumber < numberOfPartitions) { + recordWriters[partitionNumber].write(key, value); + } else { + throw new IOException("Invalid partition number: " + partitionNumber); + } + reporter.progress(); + } + + public void close() throws IOException, InterruptedException { + long totalSize = 0; + for (int i = 0; i < numberOfPartitions; i++) { + recordWriters[i].close(); + outStreams[i].close(); + totalSize += outStreams[i].size(); + } + MapOutputFile mapOutputFile = mapTask.getMapOutputFile(); + Path finalOutput = mapOutputFile.getOutputFileForWrite(totalSize); + Path indexPath = mapOutputFile.getOutputIndexFileForWrite( + numberOfPartitions*mapTask.MAP_OUTPUT_INDEX_RECORD_LENGTH); + // Copy partitions to final map output. + copyPartitions(finalOutput, indexPath); + } + + public void flush() throws IOException, InterruptedException, + ClassNotFoundException { + } + + private void copyPartitions(Path mapOutputPath, Path indexPath) + throws IOException { + FileSystem localFs = FileSystem.getLocal(jobConf); + FileSystem rfs = ((LocalFileSystem)localFs).getRaw(); + FSDataOutputStream rawOutput = rfs.create(mapOutputPath, true, BUF_SIZE); + SpillRecord spillRecord = new SpillRecord(numberOfPartitions); + IndexRecord indexRecord = new IndexRecord(); + for (int i = 0; i < numberOfPartitions; i++) { + indexRecord.startOffset = rawOutput.getPos(); + byte buffer[] = outStreams[i].toByteArray(); + IFileOutputStream checksumOutput = new IFileOutputStream(rawOutput); + checksumOutput.write(buffer); + // Write checksum. + checksumOutput.finish(); + // Write index record + indexRecord.rawLength = (long)buffer.length; + indexRecord.partLength = rawOutput.getPos() - indexRecord.startOffset; + spillRecord.putIndex(indexRecord, i); + reporter.progress(); + } + rawOutput.close(); + spillRecord.writeToFile(indexPath, jobConf); + } + } + + static class KeyValueWriter { + private Class keyClass; + private Class valueClass; + private DataOutputBuffer dataBuffer; + private Serializer keySerializer; + private Serializer valueSerializer; + private DataOutputStream outputStream; + + public KeyValueWriter(Configuration conf, OutputStream output, + Class kyClass, Class valClass + ) throws IOException { + keyClass = kyClass; + valueClass = valClass; + dataBuffer = new DataOutputBuffer(); + SerializationFactory serializationFactory + = new SerializationFactory(conf); + keySerializer + = (Serializer)serializationFactory.getSerializer(keyClass); + keySerializer.open(dataBuffer); + valueSerializer + = (Serializer)serializationFactory.getSerializer(valueClass); + valueSerializer.open(dataBuffer); + outputStream = new DataOutputStream(output); + } + + public void write(K key, V value) throws IOException { + if (key.getClass() != keyClass) { + throw new IOException("wrong key class: "+ key.getClass() + +" is not "+ keyClass); + } + if (value.getClass() != valueClass) { + throw new IOException("wrong value class: "+ value.getClass() + +" is not "+ valueClass); + } + // Append the 'key' + keySerializer.serialize(key); + int keyLength = dataBuffer.getLength(); + if (keyLength < 0) { + throw new IOException("Negative key-length not allowed: " + keyLength + + " for " + key); + } + // Append the 'value' + valueSerializer.serialize(value); + int valueLength = dataBuffer.getLength() - keyLength; + if (valueLength < 0) { + throw new IOException("Negative value-length not allowed: " + + valueLength + " for " + value); + } + // Write the record out + WritableUtils.writeVInt(outputStream, keyLength); + WritableUtils.writeVInt(outputStream, valueLength); + outputStream.write(dataBuffer.getData(), 0, dataBuffer.getLength()); + // Reset + dataBuffer.reset(); + } + + public void close() throws IOException { + keySerializer.close(); + valueSerializer.close(); + WritableUtils.writeVInt(outputStream, IFile.EOF_MARKER); + WritableUtils.writeVInt(outputStream, IFile.EOF_MARKER); + outputStream.close(); + } + } +} diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMiniMRClientCluster.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMiniMRClientCluster.java index 27e6666cc5b..d988c08a6f6 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMiniMRClientCluster.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMiniMRClientCluster.java @@ -32,6 +32,8 @@ import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Counters; import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.v2.jobhistory.JHAdminConfig; +import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; @@ -91,6 +93,65 @@ public class TestMiniMRClientCluster { mrCluster.stop(); } + @Test + public void testRestart() throws Exception { + + String rmAddress1 = mrCluster.getConfig().get(YarnConfiguration.RM_ADDRESS); + String rmAdminAddress1 = mrCluster.getConfig().get( + YarnConfiguration.RM_ADMIN_ADDRESS); + String rmSchedAddress1 = mrCluster.getConfig().get( + YarnConfiguration.RM_SCHEDULER_ADDRESS); + String rmRstrackerAddress1 = mrCluster.getConfig().get( + YarnConfiguration.RM_RESOURCE_TRACKER_ADDRESS); + String rmWebAppAddress1 = mrCluster.getConfig().get( + YarnConfiguration.RM_WEBAPP_ADDRESS); + + String mrHistAddress1 = mrCluster.getConfig().get( + JHAdminConfig.MR_HISTORY_ADDRESS); + String mrHistWebAppAddress1 = mrCluster.getConfig().get( + JHAdminConfig.MR_HISTORY_WEBAPP_ADDRESS); + + mrCluster.restart(); + + String rmAddress2 = mrCluster.getConfig().get(YarnConfiguration.RM_ADDRESS); + String rmAdminAddress2 = mrCluster.getConfig().get( + YarnConfiguration.RM_ADMIN_ADDRESS); + String rmSchedAddress2 = mrCluster.getConfig().get( + YarnConfiguration.RM_SCHEDULER_ADDRESS); + String rmRstrackerAddress2 = mrCluster.getConfig().get( + YarnConfiguration.RM_RESOURCE_TRACKER_ADDRESS); + String rmWebAppAddress2 = mrCluster.getConfig().get( + YarnConfiguration.RM_WEBAPP_ADDRESS); + + String mrHistAddress2 = mrCluster.getConfig().get( + JHAdminConfig.MR_HISTORY_ADDRESS); + String mrHistWebAppAddress2 = mrCluster.getConfig().get( + JHAdminConfig.MR_HISTORY_WEBAPP_ADDRESS); + + assertEquals("Address before restart: " + rmAddress1 + + " is different from new address: " + rmAddress2, rmAddress1, + rmAddress2); + assertEquals("Address before restart: " + rmAdminAddress1 + + " is different from new address: " + rmAdminAddress2, + rmAdminAddress1, rmAdminAddress2); + assertEquals("Address before restart: " + rmSchedAddress1 + + " is different from new address: " + rmSchedAddress2, + rmSchedAddress1, rmSchedAddress2); + assertEquals("Address before restart: " + rmRstrackerAddress1 + + " is different from new address: " + rmRstrackerAddress2, + rmRstrackerAddress1, rmRstrackerAddress2); + assertEquals("Address before restart: " + rmWebAppAddress1 + + " is different from new address: " + rmWebAppAddress2, + rmWebAppAddress1, rmWebAppAddress2); + assertEquals("Address before restart: " + mrHistAddress1 + + " is different from new address: " + mrHistAddress2, mrHistAddress1, + mrHistAddress2); + assertEquals("Address before restart: " + mrHistWebAppAddress1 + + " is different from new address: " + mrHistWebAppAddress2, + mrHistWebAppAddress1, mrHistWebAppAddress2); + + } + @Test public void testJob() throws Exception { final Job job = createJob(); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/lib/output/TestJobOutputCommitter.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/lib/output/TestJobOutputCommitter.java index 41f82e212e9..49b59cacfe4 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/lib/output/TestJobOutputCommitter.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/lib/output/TestJobOutputCommitter.java @@ -45,7 +45,7 @@ public class TestJobOutputCommitter extends HadoopTestCase { private static String TEST_ROOT_DIR = new File(System.getProperty( "test.build.data", "/tmp") - + "/" + "test-job-cleanup").toString(); + + "/" + "test-job-output-committer").toString(); private static final String CUSTOM_CLEANUP_FILE_NAME = "_custom_cleanup"; private static final String ABORT_KILLED_FILE_NAME = "_custom_abort_killed"; private static final String ABORT_FAILED_FILE_NAME = "_custom_abort_failed"; diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/security/token/TestDelegationTokenRenewal.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/security/token/TestDelegationTokenRenewal.java deleted file mode 100644 index c2e71e920b2..00000000000 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/security/token/TestDelegationTokenRenewal.java +++ /dev/null @@ -1,330 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.mapreduce.security.token; - - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; - -import java.io.IOException; -import java.net.URI; -import java.net.URISyntaxException; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.hdfs.DFSConfigKeys; -import org.apache.hadoop.hdfs.DistributedFileSystem; -import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier; -import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager; -import org.apache.hadoop.security.token.delegation.DelegationKey; -import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapreduce.JobID; -import org.apache.hadoop.security.Credentials; -import org.apache.hadoop.security.token.Token; -import org.apache.hadoop.security.token.SecretManager.InvalidToken; -import org.apache.hadoop.security.token.TokenRenewer; -import org.apache.hadoop.util.StringUtils; -import org.junit.BeforeClass; -import org.junit.Ignore; -import org.junit.Test; - -/** - * unit test - - * tests addition/deletion/cancelation of renewals of delegation tokens - * - */ -@Ignore -public class TestDelegationTokenRenewal { - private static final Log LOG = - LogFactory.getLog(TestDelegationTokenRenewal.class); - - private static final Text KIND = - new Text("TestDelegationTokenRenewal.Token"); - - public static class Renewer extends TokenRenewer { - private static int counter = 0; - private static Token lastRenewed = null; - private static Token tokenToRenewIn2Sec = null; - - @Override - public boolean handleKind(Text kind) { - return KIND.equals(kind); - } - - @Override - public boolean isManaged(Token token) throws IOException { - return true; - } - - @Override - public long renew(Token t, Configuration conf) throws IOException { - MyToken token = (MyToken)t; - if(token.isCanceled()) { - throw new InvalidToken("token has been canceled"); - } - lastRenewed = token; - counter ++; - LOG.info("Called MYDFS.renewdelegationtoken " + token + - ";this dfs=" + this.hashCode() + ";c=" + counter); - if(tokenToRenewIn2Sec == token) { - // this token first renewal in 2 seconds - LOG.info("RENEW in 2 seconds"); - tokenToRenewIn2Sec=null; - return 2*1000 + System.currentTimeMillis(); - } else { - return 86400*1000 + System.currentTimeMillis(); - } - } - - @Override - public void cancel(Token t, Configuration conf) { - MyToken token = (MyToken)t; - LOG.info("Cancel token " + token); - token.cancelToken(); - } - - } - - private static Configuration conf; - - @BeforeClass - public static void setUp() throws Exception { - conf = new Configuration(); - - // create a fake FileSystem (MyFS) and assosiate it - // with "hdfs" schema. - URI uri = new URI(DelegationTokenRenewal.SCHEME+"://localhost:0"); - System.out.println("scheme is : " + uri.getScheme()); - conf.setClass("fs." + uri.getScheme() + ".impl", MyFS.class, DistributedFileSystem.class); - FileSystem.setDefaultUri(conf, uri); - LOG.info("filesystem uri = " + FileSystem.getDefaultUri(conf).toString()); - } - - private static class MyDelegationTokenSecretManager extends DelegationTokenSecretManager { - - public MyDelegationTokenSecretManager(long delegationKeyUpdateInterval, - long delegationTokenMaxLifetime, long delegationTokenRenewInterval, - long delegationTokenRemoverScanInterval, FSNamesystem namesystem) { - super(delegationKeyUpdateInterval, delegationTokenMaxLifetime, - delegationTokenRenewInterval, delegationTokenRemoverScanInterval, - namesystem); - } - - @Override //DelegationTokenSecretManager - public void logUpdateMasterKey(DelegationKey key) throws IOException { - return; - } - } - - /** - * add some extra functionality for testing - * 1. toString(); - * 2. cancel() and isCanceled() - */ - private static class MyToken extends Token { - public String status = "GOOD"; - public static final String CANCELED = "CANCELED"; - - public MyToken(DelegationTokenIdentifier dtId1, - MyDelegationTokenSecretManager sm) { - super(dtId1, sm); - setKind(KIND); - status = "GOOD"; - } - - public boolean isCanceled() {return status.equals(CANCELED);} - - public void cancelToken() {this.status=CANCELED;} - - public String toString() { - StringBuilder sb = new StringBuilder(1024); - - sb.append("id="); - String id = StringUtils.byteToHexString(this.getIdentifier()); - int idLen = id.length(); - sb.append(id.substring(idLen-6)); - sb.append(";k="); - sb.append(this.getKind()); - sb.append(";s="); - sb.append(this.getService()); - return sb.toString(); - } - } - - /** - * fake FileSystem - * overwrites three methods - * 1. getDelegationToken() - generates a token - * 2. renewDelegataionToken - counts number of calls, and remembers - * most recently renewed token. - * 3. cancelToken -cancels token (subsequent renew will cause IllegalToken - * exception - */ - static class MyFS extends DistributedFileSystem { - - public MyFS() {} - public void close() {} - @Override - public void initialize(URI uri, Configuration conf) throws IOException {} - - @Override - public MyToken getDelegationToken(Text renewer) throws IOException { - MyToken result = createTokens(renewer); - LOG.info("Called MYDFS.getdelegationtoken " + result); - return result; - } - - } - - /** - * auxilary - create token - * @param renewer - * @return - * @throws IOException - */ - static MyToken createTokens(Text renewer) - throws IOException { - Text user1= new Text("user1"); - - MyDelegationTokenSecretManager sm = new MyDelegationTokenSecretManager( - DFSConfigKeys.DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT, - DFSConfigKeys.DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT, - DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT, - 3600000, null); - sm.startThreads(); - - DelegationTokenIdentifier dtId1 = - new DelegationTokenIdentifier(user1, renewer, user1); - - MyToken token1 = new MyToken(dtId1, sm); - - - token1.setService(new Text("localhost:0")); - return token1; - } - - - /** - * Basic idea of the test: - * 1. create tokens. - * 2. Mark one of them to be renewed in 2 seconds (istead of - * 24 hourse) - * 3. register them for renewal - * 4. sleep for 3 seconds - * 5. count number of renewals (should 3 initial ones + one extra) - * 6. register another token for 2 seconds - * 7. cancel it immediately - * 8. Sleep and check that the 2 seconds renew didn't happen - * (totally 5 reneals) - * 9. check cancelation - * @throws IOException - * @throws URISyntaxException - */ - @Test - public void testDTRenewal () throws Exception { - MyFS dfs = (MyFS)FileSystem.get(conf); - LOG.info("dfs="+(Object)dfs.hashCode() + ";conf="+conf.hashCode()); - // Test 1. - add three tokens - make sure exactly one get's renewed - - // get the delegation tokens - MyToken token1, token2, token3; - token1 = dfs.getDelegationToken(new Text("user1")); - token2 = dfs.getDelegationToken(new Text("user2")); - token3 = dfs.getDelegationToken(new Text("user3")); - - //to cause this one to be set for renew in 2 secs - Renewer.tokenToRenewIn2Sec = token1; - LOG.info("token="+token1+" should be renewed for 2 secs"); - - // two distinct Namenodes - String nn1 = DelegationTokenRenewal.SCHEME + "://host1:0"; - String nn2 = DelegationTokenRenewal.SCHEME + "://host2:0"; - String nn3 = DelegationTokenRenewal.SCHEME + "://host3:0"; - - Credentials ts = new Credentials(); - - // register the token for renewal - ts.addToken(new Text(nn1), token1); - ts.addToken(new Text(nn2), token2); - ts.addToken(new Text(nn3), token3); - - // register the tokens for renewal - DelegationTokenRenewal.registerDelegationTokensForRenewal( - new JobID("job1", 1), ts, conf); - // first 3 initial renewals + 1 real - int numberOfExpectedRenewals = 3+1; - - int attempts = 10; - while(attempts-- > 0) { - try { - Thread.sleep(3*1000); // sleep 3 seconds, so it has time to renew - } catch (InterruptedException e) {} - - // since we cannot guarantee timely execution - let's give few chances - if(Renewer.counter==numberOfExpectedRenewals) - break; - } - - assertEquals("renew wasn't called as many times as expected(4):", - numberOfExpectedRenewals, Renewer.counter); - assertEquals("most recently renewed token mismatch", Renewer.lastRenewed, - token1); - - // Test 2. - // add another token ( that expires in 2 secs). Then remove it, before - // time is up. - // Wait for 3 secs , and make sure no renews were called - ts = new Credentials(); - MyToken token4 = dfs.getDelegationToken(new Text("user4")); - - //to cause this one to be set for renew in 2 secs - Renewer.tokenToRenewIn2Sec = token4; - LOG.info("token="+token4+" should be renewed for 2 secs"); - - String nn4 = DelegationTokenRenewal.SCHEME + "://host4:0"; - ts.addToken(new Text(nn4), token4); - - - JobID jid2 = new JobID("job2",1); - DelegationTokenRenewal.registerDelegationTokensForRenewal(jid2, ts, conf); - DelegationTokenRenewal.removeDelegationTokenRenewalForJob(jid2); - numberOfExpectedRenewals = Renewer.counter; // number of renewals so far - try { - Thread.sleep(6*1000); // sleep 6 seconds, so it has time to renew - } catch (InterruptedException e) {} - System.out.println("Counter = " + Renewer.counter + ";t="+ - Renewer.lastRenewed); - - // counter and the token should stil be the old ones - assertEquals("renew wasn't called as many times as expected", - numberOfExpectedRenewals, Renewer.counter); - - // also renewing of the cancelled token should fail - try { - token4.renew(conf); - fail("Renew of canceled token didn't fail"); - } catch (InvalidToken ite) { - //expected - } - } -} diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/util/TestProcfsBasedProcessTree.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/util/TestProcfsBasedProcessTree.java index 54e13025873..a80ee67be52 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/util/TestProcfsBasedProcessTree.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/util/TestProcfsBasedProcessTree.java @@ -151,7 +151,7 @@ public class TestProcfsBasedProcessTree extends TestCase { ProcfsBasedProcessTree p = new ProcfsBasedProcessTree(pid, ProcessTree.isSetsidAvailable, ProcessTree.DEFAULT_SLEEPTIME_BEFORE_SIGKILL); - p = p.getProcessTree(); // initialize + p.updateProcessTree(); // initialize LOG.info("ProcessTree: " + p.toString()); File leaf = new File(lowestDescendant); @@ -164,7 +164,7 @@ public class TestProcfsBasedProcessTree extends TestCase { } } - p = p.getProcessTree(); // reconstruct + p.updateProcessTree(); // reconstruct LOG.info("ProcessTree: " + p.toString()); // Get the process-tree dump @@ -203,7 +203,7 @@ public class TestProcfsBasedProcessTree extends TestCase { } // ProcessTree is gone now. Any further calls should be sane. - p = p.getProcessTree(); + p.updateProcessTree(); assertFalse("ProcessTree must have been gone", p.isAlive()); assertTrue("Cumulative vmem for the gone-process is " + p.getCumulativeVmem() + " . It should be zero.", p @@ -336,7 +336,7 @@ public class TestProcfsBasedProcessTree extends TestCase { new ProcfsBasedProcessTree("100", true, 100L, procfsRootDir.getAbsolutePath()); // build the process tree. - processTree.getProcessTree(); + processTree.updateProcessTree(); // verify cumulative memory assertEquals("Cumulative virtual memory does not match", 600000L, @@ -362,7 +362,7 @@ public class TestProcfsBasedProcessTree extends TestCase { writeStatFiles(procfsRootDir, pids, procInfos); // build the process tree. - processTree.getProcessTree(); + processTree.updateProcessTree(); // verify cumulative cpu time again cumuCpuTime = ProcfsBasedProcessTree.JIFFY_LENGTH_IN_MILLIS > 0 ? @@ -409,7 +409,7 @@ public class TestProcfsBasedProcessTree extends TestCase { new ProcfsBasedProcessTree("100", true, 100L, procfsRootDir.getAbsolutePath()); // build the process tree. - processTree.getProcessTree(); + processTree.updateProcessTree(); // verify cumulative memory assertEquals("Cumulative memory does not match", @@ -425,7 +425,7 @@ public class TestProcfsBasedProcessTree extends TestCase { writeStatFiles(procfsRootDir, newPids, newProcInfos); // check memory includes the new process. - processTree.getProcessTree(); + processTree.updateProcessTree(); assertEquals("Cumulative vmem does not include new process", 1200000L, processTree.getCumulativeVmem()); long cumuRssMem = ProcfsBasedProcessTree.PAGE_SIZE > 0 ? @@ -451,7 +451,7 @@ public class TestProcfsBasedProcessTree extends TestCase { writeStatFiles(procfsRootDir, newPids, newProcInfos); // refresh process tree - processTree.getProcessTree(); + processTree.updateProcessTree(); // processes older than 2 iterations should be same as before. assertEquals("Cumulative vmem shouldn't have included new processes", @@ -555,7 +555,7 @@ public class TestProcfsBasedProcessTree extends TestCase { new ProcfsBasedProcessTree("100", true, 100L, procfsRootDir .getAbsolutePath()); // build the process tree. - processTree.getProcessTree(); + processTree.updateProcessTree(); // Get the process-tree dump String processTreeDump = processTree.getProcessTreeDump(); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/pom.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/pom.xml index dacf995b4d8..4d7ad123d80 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/pom.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/pom.xml @@ -194,5 +194,6 @@ hadoop-mapreduce-client-app hadoop-mapreduce-client-jobclient hadoop-mapreduce-client-hs + hadoop-mapreduce-client-hs-plugins diff --git a/hadoop-project/src/site/site.xml b/hadoop-project/src/site/site.xml index da976f635ef..2cfe2e8c456 100644 --- a/hadoop-project/src/site/site.xml +++ b/hadoop-project/src/site/site.xml @@ -51,6 +51,8 @@ + + diff --git a/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/StreamJob.java b/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/StreamJob.java index 7076b94547c..ec8d72e22ae 100644 --- a/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/StreamJob.java +++ b/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/StreamJob.java @@ -306,7 +306,13 @@ public class StreamJob implements Tool { throw new IllegalArgumentException(e); } } - config_.set("tmpfiles", config_.get("tmpfiles", "") + fileList); + String tmpFiles = config_.get("tmpfiles", ""); + if (tmpFiles.isEmpty()) { + tmpFiles = fileList.toString(); + } else { + tmpFiles = tmpFiles + "," + fileList; + } + config_.set("tmpfiles", tmpFiles); validate(packageFiles_); } diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 6ba341a385c..991f78d9fbc 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -6,13 +6,13 @@ Trunk - Unreleased NEW FEATURES + YARN-3. Add support for CPU isolation/monitoring of containers. + (adferguson via tucu) + IMPROVEMENTS YARN-84. Use Builder to build RPC server. (Brandon Li via suresh) - YARN-57. Allow process-tree based resource calculation et al. to be - pluggable to support it on multiple platforms. (Radim Kolar via acmurthy) - OPTIMAZATIONS BUG FIXES @@ -25,8 +25,17 @@ Release 2.0.3-alpha - Unreleased YARN-145. Add a Web UI to the fair share scheduler. (Sandy Ryza via tomwhite) + YARN-230. RM Restart phase 1 - includes support for saving/restarting all + applications on an RM bounce. (Bikas Saha via acmurthy) + IMPROVEMENTS + YARN-223. Update process tree instead of getting new process trees. + (Radim Kolar via llu) + + YARN-57. Allow process-tree based resource calculation et al. to be + pluggable to support it on multiple platforms. (Radim Kolar via acmurthy) + YARN-78. Changed UnManagedAM application to use YarnClient. (Bikas Saha via vinodkv) @@ -73,6 +82,9 @@ Release 2.0.3-alpha - Unreleased YARN-129. Simplify classpath construction for mini YARN tests. (tomwhite) + YARN-254. Update fair scheduler web UI for hierarchical queues. + (sandyr via tucu) + OPTIMIZATIONS BUG FIXES @@ -123,6 +135,26 @@ Release 2.0.3-alpha - Unreleased YARN-72. NM should handle cleaning up containers when it shuts down. (Sandy Ryza via tomwhite) + YARN-267. Fix fair scheduler web UI. (Sandy Ryza via tomwhite) + + YARN-264. y.s.rm.DelegationTokenRenewer attempts to renew token even + after removing an app. (kkambatl via tucu) + + YARN-271. Fair scheduler hits IllegalStateException trying to reserve + different apps on same node. (Sandy Ryza via tomwhite) + + YARN-272. Fair scheduler log messages try to print objects without + overridden toString methods. (sandyr via tucu) + + YARN-278. Fair scheduler maxRunningApps config causes no apps to make + progress. (sandyr via tucu) + + YARN-282. Fair scheduler web UI double counts Apps Submitted. + (sandyr via tucu) + + YARN-283. Fair scheduler fails to get queue info without root prefix. + (sandyr via tucu) + Release 2.0.2-alpha - 2012-09-07 INCOMPATIBLE CHANGES @@ -184,6 +216,10 @@ Release 0.23.6 - UNRELEASED IMPROVEMENTS + YARN-285. Added a temporary plugin interface for RM to be able to redirect + to JobHistory server for apps that it no longer tracks. (Derek Dagit via + vinodkv) + OPTIMIZATIONS BUG FIXES @@ -206,6 +242,12 @@ Release 0.23.6 - UNRELEASED YARN-258. RM web page UI shows Invalid Date for start and finish times (Ravi Prakash via jlowe) + YARN-266. RM and JHS Web UIs are blank because AppsBlock is not escaping + string properly (Ravi Prakash via jlowe) + + YARN-280. RM does not reject app submission with invalid tokens + (Daryn Sharp via tgraves) + Release 0.23.5 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml b/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml index 082df546305..e00bc4dde7b 100644 --- a/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml +++ b/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml @@ -237,4 +237,11 @@ + + + + + + + diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationAttemptStateData.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationAttemptStateData.java new file mode 100644 index 00000000000..d1dbda0dc51 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationAttemptStateData.java @@ -0,0 +1,50 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api.records; + +import org.apache.hadoop.classification.InterfaceAudience.Public; +import org.apache.hadoop.classification.InterfaceStability.Unstable; + +/* + * Contains the state data that needs to be persisted for an ApplicationAttempt + */ +@Public +@Unstable +public interface ApplicationAttemptStateData { + + /** + * The ApplicationAttemptId for the application attempt + * @return ApplicationAttemptId for the application attempt + */ + @Public + @Unstable + public ApplicationAttemptId getAttemptId(); + + public void setAttemptId(ApplicationAttemptId attemptId); + + /* + * The master container running the application attempt + * @return Container that hosts the attempt + */ + @Public + @Unstable + public Container getMasterContainer(); + + public void setMasterContainer(Container container); +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationStateData.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationStateData.java new file mode 100644 index 00000000000..9b1e14a3c00 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationStateData.java @@ -0,0 +1,58 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api.records; + +import org.apache.hadoop.classification.InterfaceAudience.Public; +import org.apache.hadoop.classification.InterfaceStability.Unstable; + +/** + * Contains all the state data that needs to be stored persistently + * for an Application + */ +@Public +@Unstable +public interface ApplicationStateData { + + /** + * The time at which the application was received by the Resource Manager + * @return submitTime + */ + @Public + @Unstable + public long getSubmitTime(); + + @Public + @Unstable + public void setSubmitTime(long submitTime); + + /** + * The {@link ApplicationSubmissionContext} for the application + * {@link ApplicationId} can be obtained from the this + * @return ApplicationSubmissionContext + */ + @Public + @Unstable + public ApplicationSubmissionContext getApplicationSubmissionContext(); + + @Public + @Unstable + public void setApplicationSubmissionContext( + ApplicationSubmissionContext context); + +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationAttemptStateDataPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationAttemptStateDataPBImpl.java new file mode 100644 index 00000000000..fa0a596eb53 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationAttemptStateDataPBImpl.java @@ -0,0 +1,125 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api.records.impl.pb; + +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.ApplicationAttemptStateData; +import org.apache.hadoop.yarn.api.records.Container; +import org.apache.hadoop.yarn.api.records.ProtoBase; +import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationAttemptStateDataProto; +import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationAttemptStateDataProtoOrBuilder; + +public class ApplicationAttemptStateDataPBImpl +extends ProtoBase +implements ApplicationAttemptStateData { + + ApplicationAttemptStateDataProto proto = + ApplicationAttemptStateDataProto.getDefaultInstance(); + ApplicationAttemptStateDataProto.Builder builder = null; + boolean viaProto = false; + + private ApplicationAttemptId attemptId = null; + private Container masterContainer = null; + + public ApplicationAttemptStateDataPBImpl() { + builder = ApplicationAttemptStateDataProto.newBuilder(); + } + + public ApplicationAttemptStateDataPBImpl( + ApplicationAttemptStateDataProto proto) { + this.proto = proto; + viaProto = true; + } + + public ApplicationAttemptStateDataProto getProto() { + mergeLocalToProto(); + proto = viaProto ? proto : builder.build(); + viaProto = true; + return proto; + } + + private void mergeLocalToBuilder() { + if (this.attemptId != null) { + builder.setAttemptId(((ApplicationAttemptIdPBImpl)attemptId).getProto()); + } + if(this.masterContainer != null) { + builder.setMasterContainer(((ContainerPBImpl)masterContainer).getProto()); + } + } + + private void mergeLocalToProto() { + if (viaProto) + maybeInitBuilder(); + mergeLocalToBuilder(); + proto = builder.build(); + viaProto = true; + } + + private void maybeInitBuilder() { + if (viaProto || builder == null) { + builder = ApplicationAttemptStateDataProto.newBuilder(proto); + } + viaProto = false; + } + + @Override + public ApplicationAttemptId getAttemptId() { + ApplicationAttemptStateDataProtoOrBuilder p = viaProto ? proto : builder; + if(attemptId != null) { + return attemptId; + } + if (!p.hasAttemptId()) { + return null; + } + attemptId = new ApplicationAttemptIdPBImpl(p.getAttemptId()); + return attemptId; + } + + @Override + public void setAttemptId(ApplicationAttemptId attemptId) { + maybeInitBuilder(); + if (attemptId == null) { + builder.clearAttemptId(); + } + this.attemptId = attemptId; + } + + @Override + public Container getMasterContainer() { + ApplicationAttemptStateDataProtoOrBuilder p = viaProto ? proto : builder; + if(masterContainer != null) { + return masterContainer; + } + if (!p.hasMasterContainer()) { + return null; + } + masterContainer = new ContainerPBImpl(p.getMasterContainer()); + return masterContainer; + } + + @Override + public void setMasterContainer(Container container) { + maybeInitBuilder(); + if (container == null) { + builder.clearMasterContainer(); + } + this.masterContainer = container; + } + +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationStateDataPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationStateDataPBImpl.java new file mode 100644 index 00000000000..dced42397aa --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationStateDataPBImpl.java @@ -0,0 +1,118 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api.records.impl.pb; + +import org.apache.hadoop.yarn.api.records.ApplicationStateData; +import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; +import org.apache.hadoop.yarn.api.records.ProtoBase; +import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationStateDataProto; +import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationStateDataProtoOrBuilder; + +public class ApplicationStateDataPBImpl +extends ProtoBase +implements ApplicationStateData { + + ApplicationStateDataProto proto = + ApplicationStateDataProto.getDefaultInstance(); + ApplicationStateDataProto.Builder builder = null; + boolean viaProto = false; + + private ApplicationSubmissionContext applicationSubmissionContext = null; + + public ApplicationStateDataPBImpl() { + builder = ApplicationStateDataProto.newBuilder(); + } + + public ApplicationStateDataPBImpl( + ApplicationStateDataProto proto) { + this.proto = proto; + viaProto = true; + } + + public ApplicationStateDataProto getProto() { + mergeLocalToProto(); + proto = viaProto ? proto : builder.build(); + viaProto = true; + return proto; + } + + private void mergeLocalToBuilder() { + if (this.applicationSubmissionContext != null) { + builder.setApplicationSubmissionContext( + ((ApplicationSubmissionContextPBImpl)applicationSubmissionContext) + .getProto()); + } + } + + private void mergeLocalToProto() { + if (viaProto) + maybeInitBuilder(); + mergeLocalToBuilder(); + proto = builder.build(); + viaProto = true; + } + + private void maybeInitBuilder() { + if (viaProto || builder == null) { + builder = ApplicationStateDataProto.newBuilder(proto); + } + viaProto = false; + } + + @Override + public long getSubmitTime() { + ApplicationStateDataProtoOrBuilder p = viaProto ? proto : builder; + if (!p.hasSubmitTime()) { + return -1; + } + return (p.getSubmitTime()); + } + + @Override + public void setSubmitTime(long submitTime) { + maybeInitBuilder(); + builder.setSubmitTime(submitTime); + } + + @Override + public ApplicationSubmissionContext getApplicationSubmissionContext() { + ApplicationStateDataProtoOrBuilder p = viaProto ? proto : builder; + if(applicationSubmissionContext != null) { + return applicationSubmissionContext; + } + if (!p.hasApplicationSubmissionContext()) { + return null; + } + applicationSubmissionContext = + new ApplicationSubmissionContextPBImpl( + p.getApplicationSubmissionContext()); + return applicationSubmissionContext; + } + + @Override + public void setApplicationSubmissionContext( + ApplicationSubmissionContext context) { + maybeInitBuilder(); + if (context == null) { + builder.clearApplicationSubmissionContext(); + } + this.applicationSubmissionContext = context; + } + +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationSubmissionContextPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationSubmissionContextPBImpl.java index fa3763a7968..ef566834739 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationSubmissionContextPBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationSubmissionContextPBImpl.java @@ -210,7 +210,6 @@ implements ApplicationSubmissionContext { @Override public boolean getUnmanagedAM() { ApplicationSubmissionContextProtoOrBuilder p = viaProto ? proto : builder; - //There is a default so cancelTokens should never be null return p.getUnmanagedAm(); } @@ -219,7 +218,7 @@ implements ApplicationSubmissionContext { maybeInitBuilder(); builder.setUnmanagedAm(value); } - + @Override public boolean getCancelTokensWhenComplete() { ApplicationSubmissionContextProtoOrBuilder p = viaProto ? proto : builder; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/PriorityPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/PriorityPBImpl.java index 9a3f9bb2c95..3c67d66d3af 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/PriorityPBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/PriorityPBImpl.java @@ -61,5 +61,10 @@ public class PriorityPBImpl extends Priority { maybeInitBuilder(); builder.setPriority((priority)); } + + @Override + public String toString() { + return Integer.valueOf(getPriority()).toString(); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto index 55c0d78e7dc..175e134c83a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto @@ -329,3 +329,15 @@ message StringBytesMapProto { optional bytes value = 2; } +//////////////////////////////////////////////////////////////////////// +////// From recovery//////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////// +message ApplicationStateDataProto { + optional int64 submit_time = 1; + optional ApplicationSubmissionContextProto application_submission_context = 2; +} + +message ApplicationAttemptStateDataProto { + optional ApplicationAttemptIdProto attemptId = 1; + optional ContainerProto master_container = 2; +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestGetGroups.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestGetGroups.java index 2cb69730a63..990368e0b62 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestGetGroups.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestGetGroups.java @@ -28,8 +28,6 @@ import org.apache.hadoop.tools.GetGroupsTestBase; import org.apache.hadoop.util.Tool; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; -import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore; -import org.apache.hadoop.yarn.server.resourcemanager.recovery.StoreFactory; import org.apache.hadoop.yarn.service.Service.STATE; import org.junit.AfterClass; import org.junit.Before; @@ -46,8 +44,7 @@ public class TestGetGroups extends GetGroupsTestBase { @BeforeClass public static void setUpResourceManager() throws IOException, InterruptedException { conf = new YarnConfiguration(); - RMStateStore store = StoreFactory.getStore(conf); - resourceManager = new ResourceManager(store) { + resourceManager = new ResourceManager() { @Override protected void doSecureLogin() throws IOException { }; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestYarnClient.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestYarnClient.java index eb57e4194ba..3d7f1201f5e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestYarnClient.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestYarnClient.java @@ -34,7 +34,7 @@ public class TestYarnClient { @Test public void testClientStop() { Configuration conf = new Configuration(); - ResourceManager rm = new ResourceManager(null); + ResourceManager rm = new ResourceManager(); rm.init(conf); rm.start(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index 920362671ae..2c2311d5458 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -225,10 +225,12 @@ public class YarnConfiguration extends Configuration { public static final long DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT = 7*24*60*60*1000; // 7 days + public static final String RECOVERY_ENABLED = RM_PREFIX + "recovery.enabled"; + public static final boolean DEFAULT_RM_RECOVERY_ENABLED = false; /** The class to use as the persistent store.*/ public static final String RM_STORE = RM_PREFIX + "store.class"; - + /** The maximum number of completed applications RM keeps. */ public static final String RM_MAX_COMPLETED_APPLICATIONS = RM_PREFIX + "max-completed-applications"; @@ -398,6 +400,9 @@ public class YarnConfiguration extends Configuration { public static final String YARN_LOG_SERVER_URL = YARN_PREFIX + "log.server.url"; + + public static final String YARN_TRACKING_URL_GENERATOR = + YARN_PREFIX + "tracking.url.generator"; /** Amount of memory in GB that can be allocated for containers.*/ public static final String NM_PMEM_MB = NM_PREFIX + "resource.memory-mb"; @@ -481,6 +486,24 @@ public class YarnConfiguration extends Configuration { public static final String NM_LINUX_CONTAINER_GROUP = NM_PREFIX + "linux-container-executor.group"; + /** The type of resource enforcement to use with the + * linux container executor. + */ + public static final String NM_LINUX_CONTAINER_RESOURCES_HANDLER = + NM_PREFIX + "linux-container-executor.resources-handler.class"; + + /** The path the linux container executor should use for cgroups */ + public static final String NM_LINUX_CONTAINER_CGROUPS_HIERARCHY = + NM_PREFIX + "linux-container-executor.cgroups.hierarchy"; + + /** Whether the linux container executor should mount cgroups if not found */ + public static final String NM_LINUX_CONTAINER_CGROUPS_MOUNT = + NM_PREFIX + "linux-container-executor.cgroups.mount"; + + /** Where the linux container executor should mount cgroups if not found */ + public static final String NM_LINUX_CONTAINER_CGROUPS_MOUNT_PATH = + NM_PREFIX + "linux-container-executor.cgroups.mount-path"; + /** T-file compression types used to compress aggregated logs.*/ public static final String NM_LOG_AGG_COMPRESSION_TYPE = NM_PREFIX + "log-aggregation.compression-type"; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/LinuxResourceCalculatorPlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/LinuxResourceCalculatorPlugin.java index 21c66c2ea3d..56a81cde673 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/LinuxResourceCalculatorPlugin.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/LinuxResourceCalculatorPlugin.java @@ -91,7 +91,6 @@ public class LinuxResourceCalculatorPlugin extends ResourceCalculatorPlugin { private float cpuUsage = UNAVAILABLE; private long sampleTime = UNAVAILABLE; private long lastSampleTime = UNAVAILABLE; - private ResourceCalculatorProcessTree pTree = null; boolean readMemInfoFile = false; boolean readCpuInfoFile = false; @@ -109,8 +108,6 @@ public class LinuxResourceCalculatorPlugin extends ResourceCalculatorPlugin { procfsCpuFile = PROCFS_CPUINFO; procfsStatFile = PROCFS_STAT; jiffyLengthInMillis = ProcfsBasedProcessTree.JIFFY_LENGTH_IN_MILLIS; - String pid = System.getenv().get("JVM_PID"); - pTree = new ProcfsBasedProcessTree(pid); } /** @@ -129,8 +126,6 @@ public class LinuxResourceCalculatorPlugin extends ResourceCalculatorPlugin { this.procfsCpuFile = procfsCpuFile; this.procfsStatFile = procfsStatFile; this.jiffyLengthInMillis = jiffyLengthInMillis; - String pid = System.getenv().get("JVM_PID"); - pTree = new ProcfsBasedProcessTree(pid); } /** @@ -400,13 +395,4 @@ public class LinuxResourceCalculatorPlugin extends ResourceCalculatorPlugin { } System.out.println("CPU usage % : " + plugin.getCpuUsage()); } - - @Override - public ProcResourceValues getProcResourceValues() { - pTree = pTree.getProcessTree(); - long cpuTime = pTree.getCumulativeCpuTime(); - long pMem = pTree.getCumulativeRssmem(); - long vMem = pTree.getCumulativeVmem(); - return new ProcResourceValues(cpuTime, pMem, vMem); - } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ProcfsBasedProcessTree.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ProcfsBasedProcessTree.java index 158dcd5adcd..7123042d8c9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ProcfsBasedProcessTree.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ProcfsBasedProcessTree.java @@ -140,13 +140,12 @@ public class ProcfsBasedProcessTree extends ResourceCalculatorProcessTree { } /** - * Get the process-tree with latest state. If the root-process is not alive, - * an empty tree will be returned. + * Update process-tree with latest state. If the root-process is not alive, + * tree will be empty. * - * @return the process-tree with latest state. */ @Override - public ResourceCalculatorProcessTree getProcessTree() { + public void updateProcessTree() { if (!pid.equals(deadPid)) { // Get the list of processes List processList = getProcessList(); @@ -172,7 +171,7 @@ public class ProcfsBasedProcessTree extends ResourceCalculatorProcessTree { } if (me == null) { - return this; + return; } // Add each process to its parent. @@ -214,7 +213,6 @@ public class ProcfsBasedProcessTree extends ResourceCalculatorProcessTree { LOG.debug(this.toString()); } } - return this; } /** Verify that the given process id is same as its process group id. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ResourceCalculatorPlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ResourceCalculatorPlugin.java index b7594460631..2e438124943 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ResourceCalculatorPlugin.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ResourceCalculatorPlugin.java @@ -90,48 +90,6 @@ public abstract class ResourceCalculatorPlugin extends Configured { */ public abstract float getCpuUsage(); - /** - * Obtain resource status used by current process tree. - */ - @InterfaceAudience.Private - @InterfaceStability.Unstable - public abstract ProcResourceValues getProcResourceValues(); - - public static class ProcResourceValues { - private final long cumulativeCpuTime; - private final long physicalMemorySize; - private final long virtualMemorySize; - public ProcResourceValues(long cumulativeCpuTime, long physicalMemorySize, - long virtualMemorySize) { - this.cumulativeCpuTime = cumulativeCpuTime; - this.physicalMemorySize = physicalMemorySize; - this.virtualMemorySize = virtualMemorySize; - } - /** - * Obtain the physical memory size used by current process tree. - * @return physical memory size in bytes. - */ - public long getPhysicalMemorySize() { - return physicalMemorySize; - } - - /** - * Obtain the virtual memory size used by a current process tree. - * @return virtual memory size in bytes. - */ - public long getVirtualMemorySize() { - return virtualMemorySize; - } - - /** - * Obtain the cumulative CPU time used by a current process tree. - * @return cumulative CPU time in milliseconds - */ - public long getCumulativeCpuTime() { - return cumulativeCpuTime; - } - } - /** * Create the ResourceCalculatorPlugin from the class name and configure it. If * class name is null, this method will try and return a memory calculator diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ResourceCalculatorProcessTree.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ResourceCalculatorProcessTree.java index e5b4e87a2b3..2ecc1ce2513 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ResourceCalculatorProcessTree.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ResourceCalculatorProcessTree.java @@ -43,16 +43,14 @@ public abstract class ResourceCalculatorProcessTree extends Configured { } /** - * Get the process-tree with latest state. If the root-process is not alive, - * an empty tree will be returned. + * Update the process-tree with latest state. * * Each call to this function should increment the age of the running * processes that already exist in the process tree. Age is used other API's * of the interface. * - * @return the process-tree with latest state. */ - public abstract ResourceCalculatorProcessTree getProcessTree(); + public abstract void updateProcessTree(); /** * Get a dump of the process-tree. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/TrackingUriPlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/TrackingUriPlugin.java new file mode 100644 index 00000000000..c3def07fd77 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/TrackingUriPlugin.java @@ -0,0 +1,46 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.util; + +import java.net.URI; +import java.net.URISyntaxException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.yarn.api.records.ApplicationId; + +/** + * Plugin to derive a tracking URL from a Yarn Application ID + * + */ +@InterfaceAudience.LimitedPrivate({"MapReduce"}) +@InterfaceStability.Unstable +public abstract class TrackingUriPlugin extends Configured { + + /** + * Given an application ID, return a tracking URI. + * @param id the ID for which a URI is returned + * @return the tracking URI + * @throws URISyntaxException + */ + public abstract URI getTrackingUri(ApplicationId id) + throws URISyntaxException; + +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index 2b494172a7e..43f8eec56a4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -204,6 +204,13 @@ 8192 + + Enable RM to recover state after starting. If true, then + yarn.resourcemanager.store.class must be specified + yarn.resourcemanager.recovery.enabled + false + + The class to use as the persistent store. yarn.resourcemanager.store.class @@ -477,6 +484,39 @@ yarn.nodemanager.linux-container-executor.path + + The class which should help the LCE handle resources. + yarn.nodemanager.linux-container-executor.resources-handler.class + org.apache.hadoop.yarn.server.nodemanager.util.DefaultLCEResourcesHandler + + + + + The cgroups hierarchy under which to place YARN proccesses (cannot contain commas). + If yarn.nodemanager.linux-container-executor.cgroups.mount is false (that is, if cgroups have + been pre-configured), then this cgroups hierarchy must already exist and be writable by the + NodeManager user, otherwise the NodeManager may fail. + Only used when the LCE resources handler is set to the CgroupsLCEResourcesHandler. + yarn.nodemanager.linux-container-executor.cgroups.hierarchy + /hadoop-yarn + + + + Whether the LCE should attempt to mount cgroups if not found. + Only used when the LCE resources handler is set to the CgroupsLCEResourcesHandler. + yarn.nodemanager.linux-container-executor.cgroups.mount + false + + + + Where the LCE should attempt to mount cgroups if not found. Common locations + include /sys/fs/cgroup and /cgroup; the default location can vary depending on the Linux + distribution in use. This path must exist before the NodeManager is launched. + Only used when the LCE resources handler is set to the CgroupsLCEResourcesHandler, and + yarn.nodemanager.linux-container-executor.cgroups.mount is true. + yarn.nodemanager.linux-container-executor.cgroups.mount-path + + T-file compression types used to compress aggregated logs. yarn.nodemanager.log-aggregation.compression-type diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestProcfsBasedProcessTree.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestProcfsBasedProcessTree.java index 3ebd6c6f671..528e03e4ead 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestProcfsBasedProcessTree.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestProcfsBasedProcessTree.java @@ -161,7 +161,7 @@ public class TestProcfsBasedProcessTree { String pid = getRogueTaskPID(); LOG.info("Root process pid: " + pid); ProcfsBasedProcessTree p = createProcessTree(pid); - p.getProcessTree(); // initialize + p.updateProcessTree(); // initialize LOG.info("ProcessTree: " + p.toString()); File leaf = new File(lowestDescendant); @@ -174,7 +174,7 @@ public class TestProcfsBasedProcessTree { } } - p.getProcessTree(); // reconstruct + p.updateProcessTree(); // reconstruct LOG.info("ProcessTree: " + p.toString()); // Get the process-tree dump @@ -213,7 +213,7 @@ public class TestProcfsBasedProcessTree { } // ProcessTree is gone now. Any further calls should be sane. - p.getProcessTree(); + p.updateProcessTree(); Assert.assertFalse("ProcessTree must have been gone", isAlive(pid)); Assert.assertTrue("Cumulative vmem for the gone-process is " + p.getCumulativeVmem() + " . It should be zero.", p @@ -358,7 +358,7 @@ public class TestProcfsBasedProcessTree { ProcfsBasedProcessTree processTree = createProcessTree("100", procfsRootDir.getAbsolutePath()); // build the process tree. - processTree.getProcessTree(); + processTree.updateProcessTree(); // verify cumulative memory Assert.assertEquals("Cumulative virtual memory does not match", 600000L, @@ -384,7 +384,7 @@ public class TestProcfsBasedProcessTree { writeStatFiles(procfsRootDir, pids, procInfos); // build the process tree. - processTree.getProcessTree(); + processTree.updateProcessTree(); // verify cumulative cpu time again cumuCpuTime = ProcfsBasedProcessTree.JIFFY_LENGTH_IN_MILLIS > 0 ? @@ -431,7 +431,7 @@ public class TestProcfsBasedProcessTree { ProcfsBasedProcessTree processTree = createProcessTree("100", procfsRootDir.getAbsolutePath()); // build the process tree. - processTree.getProcessTree(); + processTree.updateProcessTree(); // verify cumulative memory Assert.assertEquals("Cumulative memory does not match", @@ -447,7 +447,7 @@ public class TestProcfsBasedProcessTree { writeStatFiles(procfsRootDir, newPids, newProcInfos); // check memory includes the new process. - processTree.getProcessTree(); + processTree.updateProcessTree(); Assert.assertEquals("Cumulative vmem does not include new process", 1200000L, processTree.getCumulativeVmem()); long cumuRssMem = ProcfsBasedProcessTree.PAGE_SIZE > 0 ? @@ -473,7 +473,7 @@ public class TestProcfsBasedProcessTree { writeStatFiles(procfsRootDir, newPids, newProcInfos); // refresh process tree - processTree.getProcessTree(); + processTree.updateProcessTree(); // processes older than 2 iterations should be same as before. Assert.assertEquals("Cumulative vmem shouldn't have included new processes", @@ -577,7 +577,7 @@ public class TestProcfsBasedProcessTree { ProcfsBasedProcessTree processTree = createProcessTree( "100", procfsRootDir.getAbsolutePath()); // build the process tree. - processTree.getProcessTree(); + processTree.updateProcessTree(); // Get the process-tree dump String processTreeDump = processTree.getProcessTreeDump(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestResourceCalculatorProcessTree.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestResourceCalculatorProcessTree.java index 68b20c97984..32ceb2378f6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestResourceCalculatorProcessTree.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestResourceCalculatorProcessTree.java @@ -34,8 +34,7 @@ public class TestResourceCalculatorProcessTree { super(pid); } - public ResourceCalculatorProcessTree getProcessTree() { - return this; + public void updateProcessTree() { } public String getProcessTreeDump() { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java index c18a0c93dd9..e1c86eb7141 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java @@ -29,6 +29,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.Shell.ExitCodeException; import org.apache.hadoop.util.Shell.ShellCommandExecutor; import org.apache.hadoop.util.StringUtils; @@ -38,6 +39,8 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerDiagnosticsUpdateEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ContainerLocalizer; +import org.apache.hadoop.yarn.server.nodemanager.util.DefaultLCEResourcesHandler; +import org.apache.hadoop.yarn.server.nodemanager.util.LCEResourcesHandler; import org.apache.hadoop.yarn.util.ConverterUtils; public class LinuxContainerExecutor extends ContainerExecutor { @@ -46,11 +49,18 @@ public class LinuxContainerExecutor extends ContainerExecutor { .getLog(LinuxContainerExecutor.class); private String containerExecutorExe; + private LCEResourcesHandler resourcesHandler; + @Override public void setConf(Configuration conf) { super.setConf(conf); containerExecutorExe = getContainerExecutorExecutablePath(conf); + + resourcesHandler = ReflectionUtils.newInstance( + conf.getClass(YarnConfiguration.NM_LINUX_CONTAINER_RESOURCES_HANDLER, + DefaultLCEResourcesHandler.class, LCEResourcesHandler.class), conf); + resourcesHandler.setConf(conf); } /** @@ -81,7 +91,8 @@ public class LinuxContainerExecutor extends ContainerExecutor { UNABLE_TO_EXECUTE_CONTAINER_SCRIPT(7), INVALID_CONTAINER_PID(9), INVALID_CONTAINER_EXEC_PERMISSIONS(22), - INVALID_CONFIG_FILE(24); + INVALID_CONFIG_FILE(24), + WRITE_CGROUP_FAILED(27); private final int value; ResultCode(int value) { @@ -124,6 +135,8 @@ public class LinuxContainerExecutor extends ContainerExecutor { throw new IOException("Linux container executor not configured properly" + " (error=" + exitCode + ")", e); } + + resourcesHandler.init(this); } @Override @@ -188,6 +201,11 @@ public class LinuxContainerExecutor extends ContainerExecutor { ContainerId containerId = container.getContainerID(); String containerIdStr = ConverterUtils.toString(containerId); + + resourcesHandler.preExecute(containerId, + container.getLaunchContext().getResource()); + String resourcesOptions = resourcesHandler.getResourcesOption( + containerId); ShellCommandExecutor shExec = null; @@ -202,7 +220,8 @@ public class LinuxContainerExecutor extends ContainerExecutor { nmPrivateTokensPath.toUri().getPath().toString(), pidFilePath.toString(), StringUtils.join(",", localDirs), - StringUtils.join(",", logDirs))); + StringUtils.join(",", logDirs), + resourcesOptions)); String[] commandArray = command.toArray(new String[command.size()]); shExec = new ShellCommandExecutor(commandArray, null, // NM's cwd container.getLaunchContext().getEnvironment()); // sanitized env @@ -241,7 +260,7 @@ public class LinuxContainerExecutor extends ContainerExecutor { } return exitCode; } finally { - ; // + resourcesHandler.postExecute(containerId); } if (LOG.isDebugEnabled()) { LOG.debug("Output from LinuxContainerExecutor's launchContainer follows:"); @@ -316,4 +335,27 @@ public class LinuxContainerExecutor extends ContainerExecutor { } } } + + public void mountCgroups(List cgroupKVs, String hierarchy) + throws IOException { + List command = new ArrayList( + Arrays.asList(containerExecutorExe, "--mount-cgroups", hierarchy)); + command.addAll(cgroupKVs); + + String[] commandArray = command.toArray(new String[command.size()]); + ShellCommandExecutor shExec = new ShellCommandExecutor(commandArray); + + if (LOG.isDebugEnabled()) { + LOG.debug("mountCgroups: " + Arrays.toString(commandArray)); + } + + try { + shExec.execute(); + } catch (IOException e) { + int ret_code = shExec.getExitCode(); + logOutput(shExec.getOutput()); + throw new IOException("Problem mounting cgroups " + cgroupKVs + + "; exit code = " + ret_code, e); + } + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java index dd1487a39dc..62d6afc557d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java @@ -396,9 +396,7 @@ public class ContainersMonitorImpl extends AbstractService implements LOG.debug("Constructing ProcessTree for : PID = " + pId + " ContainerId = " + containerId); ResourceCalculatorProcessTree pTree = ptInfo.getProcessTree(); - pTree = pTree.getProcessTree(); // get the updated process-tree - ptInfo.setProcessTree(pTree); // update ptInfo with proces-tree of - // updated state + pTree.updateProcessTree(); // update process-tree long currentVmemUsage = pTree.getCumulativeVmem(); long currentPmemUsage = pTree.getCumulativeRssmem(); // as processes begin with an age 1, we want to see if there diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/util/CgroupsLCEResourcesHandler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/util/CgroupsLCEResourcesHandler.java new file mode 100644 index 00000000000..53a01ebddec --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/util/CgroupsLCEResourcesHandler.java @@ -0,0 +1,321 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.util; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor; + +public class CgroupsLCEResourcesHandler implements LCEResourcesHandler { + + final static Log LOG = LogFactory + .getLog(CgroupsLCEResourcesHandler.class); + + private Configuration conf; + private String cgroupPrefix; + private boolean cgroupMount; + private String cgroupMountPath; + + private boolean cpuWeightEnabled = true; + + private final String MTAB_FILE = "/proc/mounts"; + private final String CGROUPS_FSTYPE = "cgroup"; + private final String CONTROLLER_CPU = "cpu"; + private final int CPU_DEFAULT_WEIGHT = 1024; // set by kernel + private final Map controllerPaths; // Controller -> path + + public CgroupsLCEResourcesHandler() { + this.controllerPaths = new HashMap(); + } + + @Override + public void setConf(Configuration conf) { + this.conf = conf; + } + + @Override + public Configuration getConf() { + return conf; + } + + public synchronized void init(LinuxContainerExecutor lce) throws IOException { + + this.cgroupPrefix = conf.get(YarnConfiguration. + NM_LINUX_CONTAINER_CGROUPS_HIERARCHY, "/hadoop-yarn"); + this.cgroupMount = conf.getBoolean(YarnConfiguration. + NM_LINUX_CONTAINER_CGROUPS_MOUNT, false); + this.cgroupMountPath = conf.get(YarnConfiguration. + NM_LINUX_CONTAINER_CGROUPS_MOUNT_PATH, null); + + // remove extra /'s at end or start of cgroupPrefix + if (cgroupPrefix.charAt(0) == '/') { + cgroupPrefix = cgroupPrefix.substring(1); + } + + int len = cgroupPrefix.length(); + if (cgroupPrefix.charAt(len - 1) == '/') { + cgroupPrefix = cgroupPrefix.substring(0, len - 1); + } + + // mount cgroups if requested + if (cgroupMount && cgroupMountPath != null) { + ArrayList cgroupKVs = new ArrayList(); + cgroupKVs.add(CONTROLLER_CPU + "=" + cgroupMountPath + "/" + + CONTROLLER_CPU); + lce.mountCgroups(cgroupKVs, cgroupPrefix); + } + + initializeControllerPaths(); + } + + + boolean isCpuWeightEnabled() { + return this.cpuWeightEnabled; + } + + /* + * Next four functions are for an individual cgroup. + */ + + private String pathForCgroup(String controller, String groupName) { + String controllerPath = controllerPaths.get(controller); + return controllerPath + "/" + cgroupPrefix + "/" + groupName; + } + + private void createCgroup(String controller, String groupName) + throws IOException { + String path = pathForCgroup(controller, groupName); + + if (LOG.isDebugEnabled()) { + LOG.debug("createCgroup: " + path); + } + + if (! new File(path).mkdir()) { + throw new IOException("Failed to create cgroup at " + path); + } + } + + private void updateCgroup(String controller, String groupName, String param, + String value) throws IOException { + FileWriter f = null; + String path = pathForCgroup(controller, groupName); + param = controller + "." + param; + + if (LOG.isDebugEnabled()) { + LOG.debug("updateCgroup: " + path + ": " + param + "=" + value); + } + + try { + f = new FileWriter(path + "/" + param, false); + f.write(value); + } catch (IOException e) { + throw new IOException("Unable to set " + param + "=" + value + + " for cgroup at: " + path, e); + } finally { + if (f != null) { + try { + f.close(); + } catch (IOException e) { + LOG.warn("Unable to close cgroup file: " + + path, e); + } + } + } + } + + private void deleteCgroup(String controller, String groupName) { + String path = pathForCgroup(controller, groupName); + + LOG.debug("deleteCgroup: " + path); + + if (! new File(path).delete()) { + LOG.warn("Unable to delete cgroup at: " + path); + } + } + + /* + * Next three functions operate on all the resources we are enforcing. + */ + + /* + * TODO: After YARN-2 is committed, we should call containerResource.getCpus() + * (or equivalent) to multiply the weight by the number of requested cpus. + */ + private void setupLimits(ContainerId containerId, + Resource containerResource) throws IOException { + String containerName = containerId.toString(); + + if (isCpuWeightEnabled()) { + createCgroup(CONTROLLER_CPU, containerName); + updateCgroup(CONTROLLER_CPU, containerName, "shares", + String.valueOf(CPU_DEFAULT_WEIGHT)); + } + } + + private void clearLimits(ContainerId containerId) { + String containerName = containerId.toString(); + + // Based on testing, ApplicationMaster executables don't terminate until + // a little after the container appears to have finished. Therefore, we + // wait a short bit for the cgroup to become empty before deleting it. + if (containerId.getId() == 1) { + try { + Thread.sleep(500); + } catch (InterruptedException e) { + // not a problem, continue anyway + } + } + + if (isCpuWeightEnabled()) { + deleteCgroup(CONTROLLER_CPU, containerName); + } + } + + /* + * LCE Resources Handler interface + */ + + public void preExecute(ContainerId containerId, Resource containerResource) + throws IOException { + setupLimits(containerId, containerResource); + } + + public void postExecute(ContainerId containerId) { + clearLimits(containerId); + } + + public String getResourcesOption(ContainerId containerId) { + String containerName = containerId.toString(); + + StringBuilder sb = new StringBuilder("cgroups="); + + if (isCpuWeightEnabled()) { + sb.append(pathForCgroup(CONTROLLER_CPU, containerName) + "/cgroup.procs"); + sb.append(","); + } + + if (sb.charAt(sb.length() - 1) == ',') { + sb.deleteCharAt(sb.length() - 1); + } + + return sb.toString(); + } + + /* We are looking for entries of the form: + * none /cgroup/path/mem cgroup rw,memory 0 0 + * + * Use a simple pattern that splits on the five spaces, and + * grabs the 2, 3, and 4th fields. + */ + + private static final Pattern MTAB_FILE_FORMAT = Pattern.compile( + "^[^\\s]+\\s([^\\s]+)\\s([^\\s]+)\\s([^\\s]+)\\s[^\\s]+\\s[^\\s]+$"); + + /* + * Returns a map: path -> mount options + * for mounts with type "cgroup". Cgroup controllers will + * appear in the list of options for a path. + */ + private Map> parseMtab() throws IOException { + Map> ret = new HashMap>(); + BufferedReader in = null; + + try { + in = new BufferedReader(new FileReader(new File(MTAB_FILE))); + + for (String str = in.readLine(); str != null; + str = in.readLine()) { + Matcher m = MTAB_FILE_FORMAT.matcher(str); + boolean mat = m.find(); + if (mat) { + String path = m.group(1); + String type = m.group(2); + String options = m.group(3); + + if (type.equals(CGROUPS_FSTYPE)) { + List value = Arrays.asList(options.split(",")); + ret.put(path, value); + } + } + } + } catch (IOException e) { + throw new IOException("Error while reading " + MTAB_FILE, e); + } finally { + // Close the streams + try { + in.close(); + } catch (IOException e2) { + LOG.warn("Error closing the stream: " + MTAB_FILE, e2); + } + } + + return ret; + } + + private String findControllerInMtab(String controller, + Map> entries) { + for (Entry> e : entries.entrySet()) { + if (e.getValue().contains(controller)) + return e.getKey(); + } + + return null; + } + + private void initializeControllerPaths() throws IOException { + String controllerPath; + Map> parsedMtab = parseMtab(); + + // CPU + + controllerPath = findControllerInMtab(CONTROLLER_CPU, parsedMtab); + + if (controllerPath != null) { + File f = new File(controllerPath + "/" + this.cgroupPrefix); + + if (f.canWrite()) { + controllerPaths.put(CONTROLLER_CPU, controllerPath); + } else { + throw new IOException("Not able to enforce cpu weights; cannot write " + + "to cgroup at: " + controllerPath); + } + } else { + throw new IOException("Not able to enforce cpu weights; cannot find " + + "cgroup for cpu controller in " + MTAB_FILE); + } + } +} \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/util/DefaultLCEResourcesHandler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/util/DefaultLCEResourcesHandler.java new file mode 100644 index 00000000000..fcb166ffbf0 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/util/DefaultLCEResourcesHandler.java @@ -0,0 +1,65 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.util; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor; + +public class DefaultLCEResourcesHandler implements LCEResourcesHandler { + + final static Log LOG = LogFactory + .getLog(DefaultLCEResourcesHandler.class); + + private Configuration conf; + + public DefaultLCEResourcesHandler() { + } + + public void setConf(Configuration conf) { + this.conf = conf; + } + + @Override + public Configuration getConf() { + return conf; + } + + public void init(LinuxContainerExecutor lce) { + } + + /* + * LCE Resources Handler interface + */ + + public void preExecute(ContainerId containerId, Resource containerResource) { + } + + public void postExecute(ContainerId containerId) { + } + + public String getResourcesOption(ContainerId containerId) { + return "cgroups=none"; + } + + +} \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/util/LCEResourcesHandler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/util/LCEResourcesHandler.java new file mode 100644 index 00000000000..34f7f31c327 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/util/LCEResourcesHandler.java @@ -0,0 +1,49 @@ +/** +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +package org.apache.hadoop.yarn.server.nodemanager.util; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor; + +public interface LCEResourcesHandler extends Configurable { + + void init(LinuxContainerExecutor lce) throws IOException; + + /** + * Called by the LinuxContainerExecutor before launching the executable + * inside the container. + * @param containerId the id of the container being launched + * @param containerResource the node resources the container will be using + */ + void preExecute(ContainerId containerId, Resource containerResource) + throws IOException; + + /** + * Called by the LinuxContainerExecutor after the executable inside the + * container has exited (successfully or not). + * @param containerId the id of the container which was launched + */ + void postExecute(ContainerId containerId); + + String getResourcesOption(ContainerId containerId); +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/configuration.c b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/configuration.c index eb85eb2f18e..f393c97f9fb 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/configuration.c +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/configuration.c @@ -308,7 +308,7 @@ char ** extract_values(char *value) { tempTok = strtok_r(NULL, ",", &tempstr); } } - if (size > 0) { + if (toPass != NULL) { toPass[size] = NULL; } return toPass; @@ -323,3 +323,52 @@ void free_values(char** values) { free(values); } } + +/** + * If str is a string of the form key=val, find 'key' + */ +int get_kv_key(const char *input, char *out, size_t out_len) { + + if (input == NULL) + return -EINVAL; + + char *split = strchr(input, '='); + + if (split == NULL) + return -EINVAL; + + int key_len = split - input; + + if (out_len < (key_len + 1) || out == NULL) + return -ENAMETOOLONG; + + memcpy(out, input, key_len); + out[key_len] = '\0'; + + return 0; +} + +/** + * If str is a string of the form key=val, find 'val' + */ +int get_kv_value(const char *input, char *out, size_t out_len) { + + if (input == NULL) + return -EINVAL; + + char *split = strchr(input, '='); + + if (split == NULL) + return -EINVAL; + + split++; // advance past '=' to the value + int val_len = (input + strlen(input)) - split; + + if (out_len < (val_len + 1) || out == NULL) + return -ENAMETOOLONG; + + memcpy(out, split, val_len); + out[val_len] = '\0'; + + return 0; +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/configuration.h b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/configuration.h index fb9529f0dc7..133e67b8c5e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/configuration.h +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/configuration.h @@ -16,6 +16,8 @@ * limitations under the License. */ +#include + /** * Ensure that the configuration file and all of the containing directories * are only writable by root. Otherwise, an attacker can change the @@ -50,3 +52,28 @@ void free_values(char** values); //method to free allocated configuration void free_configurations(); +/** + * If str is a string of the form key=val, find 'key' + * + * @param input The input string + * @param out Where to put the output string. + * @param out_len The length of the output buffer. + * + * @return -ENAMETOOLONG if out_len is not long enough; + * -EINVAL if there is no equals sign in the input; + * 0 on success + */ +int get_kv_key(const char *input, char *out, size_t out_len); + +/** + * If str is a string of the form key=val, find 'val' + * + * @param input The input string + * @param out Where to put the output string. + * @param out_len The length of the output buffer. + * + * @return -ENAMETOOLONG if out_len is not long enough; + * -EINVAL if there is no equals sign in the input; + * 0 on success + */ +int get_kv_value(const char *input, char *out, size_t out_len); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c index d08a1d62bf8..af443770f85 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c @@ -31,6 +31,7 @@ #include #include #include +#include static const int DEFAULT_MIN_USERID = 1000; @@ -150,6 +151,44 @@ static int change_effective_user(uid_t user, gid_t group) { return 0; } +/** + * Write the pid of the current process to the cgroup file. + * cgroup_file: Path to cgroup file where pid needs to be written to. + */ +static int write_pid_to_cgroup_as_root(const char* cgroup_file, pid_t pid) { + uid_t user = geteuid(); + gid_t group = getegid(); + if (change_effective_user(0, 0) != 0) { + return -1; + } + + // open + int cgroup_fd = open(cgroup_file, O_WRONLY | O_APPEND, 0); + if (cgroup_fd == -1) { + fprintf(LOGFILE, "Can't open file %s as node manager - %s\n", cgroup_file, + strerror(errno)); + return -1; + } + + // write pid + char pid_buf[21]; + snprintf(pid_buf, sizeof(pid_buf), "%d", pid); + ssize_t written = write(cgroup_fd, pid_buf, strlen(pid_buf)); + close(cgroup_fd); + if (written == -1) { + fprintf(LOGFILE, "Failed to write pid to file %s - %s\n", + cgroup_file, strerror(errno)); + return -1; + } + + // Revert back to the calling user. + if (change_effective_user(user, group)) { + return -1; + } + + return 0; +} + /** * Write the pid of the current process into the pid file. * pid_file: Path to pid file where pid needs to be written to @@ -810,7 +849,8 @@ int launch_container_as_user(const char *user, const char *app_id, const char *container_id, const char *work_dir, const char *script_name, const char *cred_file, const char* pid_file, char* const* local_dirs, - char* const* log_dirs) { + char* const* log_dirs, const char *resources_key, + char* const* resources_values) { int exit_code = -1; char *script_file_dest = NULL; char *cred_file_dest = NULL; @@ -849,7 +889,22 @@ int launch_container_as_user(const char *user, const char *app_id, || write_pid_to_file_as_nm(pid_file, pid) != 0) { exit_code = WRITE_PIDFILE_FAILED; goto cleanup; - } + } + + // cgroups-based resource enforcement + if (resources_key != NULL && ! strcmp(resources_key, "cgroups")) { + + // write pid to cgroups + char* const* cgroup_ptr; + for (cgroup_ptr = resources_values; cgroup_ptr != NULL && + *cgroup_ptr != NULL; ++cgroup_ptr) { + if (strcmp(*cgroup_ptr, "none") != 0 && + write_pid_to_cgroup_as_root(*cgroup_ptr, pid) != 0) { + exit_code = WRITE_CGROUP_FAILED; + goto cleanup; + } + } + } // give up root privs if (change_user(user_detail->pw_uid, user_detail->pw_gid) != 0) { @@ -1108,4 +1163,73 @@ int delete_as_user(const char *user, return ret; } +void chown_dir_contents(const char *dir_path, uid_t uid, gid_t gid) { + DIR *dp; + struct dirent *ep; + + char *path_tmp = malloc(strlen(dir_path) + NAME_MAX + 2); + if (path_tmp == NULL) { + return; + } + + char *buf = stpncpy(path_tmp, dir_path, strlen(dir_path)); + *buf++ = '/'; + + dp = opendir(dir_path); + if (dp != NULL) { + while (ep = readdir(dp)) { + stpncpy(buf, ep->d_name, strlen(ep->d_name)); + buf[strlen(ep->d_name)] = '\0'; + change_owner(path_tmp, uid, gid); + } + closedir(dp); + } + + free(path_tmp); +} + +/** + * Mount a cgroup controller at the requested mount point and create + * a hierarchy for the Hadoop NodeManager to manage. + * pair: a key-value pair of the form "controller=mount-path" + * hierarchy: the top directory of the hierarchy for the NM + */ +int mount_cgroup(const char *pair, const char *hierarchy) { + char *controller = malloc(strlen(pair)); + char *mount_path = malloc(strlen(pair)); + char hier_path[PATH_MAX]; + int result = 0; + + if (get_kv_key(pair, controller, strlen(pair)) < 0 || + get_kv_value(pair, mount_path, strlen(pair)) < 0) { + fprintf(LOGFILE, "Failed to mount cgroup controller; invalid option: %s\n", + pair); + result = -1; + } else { + if (mount("none", mount_path, "cgroup", 0, controller) == 0) { + char *buf = stpncpy(hier_path, mount_path, strlen(mount_path)); + *buf++ = '/'; + snprintf(buf, PATH_MAX - (buf - hier_path), "%s", hierarchy); + + // create hierarchy as 0750 and chown to Hadoop NM user + const mode_t perms = S_IRWXU | S_IRGRP | S_IXGRP; + if (mkdirs(hier_path, perms) == 0) { + change_owner(hier_path, nm_uid, nm_gid); + chown_dir_contents(hier_path, nm_uid, nm_gid); + } + } else { + fprintf(LOGFILE, "Failed to mount cgroup controller %s at %s - %s\n", + controller, mount_path, strerror(errno)); + // if controller is already mounted, don't stop trying to mount others + if (errno != EBUSY) { + result = -1; + } + } + } + + free(controller); + free(mount_path); + + return result; +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h index ac8b07dac71..ec5a3742785 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h @@ -53,7 +53,8 @@ enum errorcodes { // PREPARE_JOB_LOGS_FAILED (NOT USED) 23 INVALID_CONFIG_FILE = 24, SETSID_OPER_FAILED = 25, - WRITE_PIDFILE_FAILED = 26 + WRITE_PIDFILE_FAILED = 26, + WRITE_CGROUP_FAILED = 27 }; #define NM_GROUP_KEY "yarn.nodemanager.linux-container-executor.group" @@ -111,13 +112,16 @@ int initialize_app(const char *user, const char *app_id, * @param pid_file file where pid of process should be written to * @param local_dirs nodemanager-local-directories to be used * @param log_dirs nodemanager-log-directories to be used + * @param resources_key type of resource enforcement (none, cgroups) + * @param resources_value values needed to apply resource enforcement * @return -1 or errorcode enum value on error (should never return on success). */ int launch_container_as_user(const char * user, const char *app_id, const char *container_id, const char *work_dir, const char *script_name, const char *cred_file, const char *pid_file, char* const* local_dirs, - char* const* log_dirs); + char* const* log_dirs, const char *resources_key, + char* const* resources_value); /** * Function used to signal a container launched by the user. @@ -196,3 +200,5 @@ int initialize_user(const char *user, char* const* local_dirs); int create_directory_for_user(const char* path); int change_user(uid_t user, gid_t group); + +int mount_cgroup(const char *pair, const char *hierarchy); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/main.c b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/main.c index d6ce5aa7061..f0245d81dc1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/main.c +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/main.c @@ -45,6 +45,9 @@ void display_usage(FILE *stream) { fprintf(stream, "Usage: container-executor --checksetup\n"); + fprintf(stream, + "Usage: container-executor --mount-cgroups "\ + "hierarchy controller=path...\n"); fprintf(stream, "Usage: container-executor user command command-args\n"); fprintf(stream, "Commands:\n"); @@ -52,7 +55,7 @@ void display_usage(FILE *stream) { "nm-local-dirs nm-log-dirs cmd app...\n", INITIALIZE_CONTAINER); fprintf(stream, " launch container: %2d appid containerid workdir "\ - "container-script tokens pidfile nm-local-dirs nm-log-dirs\n", + "container-script tokens pidfile nm-local-dirs nm-log-dirs resources\n", LAUNCH_CONTAINER); fprintf(stream, " signal container: %2d container-pid signal\n", SIGNAL_CONTAINER); @@ -63,14 +66,21 @@ void display_usage(FILE *stream) { int main(int argc, char **argv) { int invalid_args = 0; int do_check_setup = 0; + int do_mount_cgroups = 0; LOGFILE = stdout; ERRORFILE = stderr; + if (argc > 1) { + if (strcmp("--mount-cgroups", argv[1]) == 0) { + do_mount_cgroups = 1; + } + } + // Minimum number of arguments required to run // the std. container-executor commands is 4 // 4 args not needed for checksetup option - if (argc < 4) { + if (argc < 4 && !do_mount_cgroups) { invalid_args = 1; if (argc == 2) { const char *arg1 = argv[1]; @@ -103,6 +113,7 @@ int main(int argc, char **argv) { char *orig_conf_file = HADOOP_CONF_DIR "/" CONF_FILENAME; char *conf_file = resolve_config_path(orig_conf_file, argv[0]); char *local_dirs, *log_dirs; + char *resources, *resources_key, *resources_value; if (conf_file == NULL) { fprintf(ERRORFILE, "Configuration file %s not found.\n", orig_conf_file); @@ -145,6 +156,18 @@ int main(int argc, char **argv) { return 0; } + if (do_mount_cgroups) { + optind++; + char *hierarchy = argv[optind++]; + int result = 0; + + while (optind < argc && result == 0) { + result = mount_cgroup(argv[optind++], hierarchy); + } + + return result; + } + //checks done for user name if (argv[optind] == NULL) { fprintf(ERRORFILE, "Invalid user name.\n"); @@ -180,8 +203,8 @@ int main(int argc, char **argv) { extract_values(log_dirs), argv + optind); break; case LAUNCH_CONTAINER: - if (argc != 11) { - fprintf(ERRORFILE, "Too few arguments (%d vs 11) for launch container\n", + if (argc != 12) { + fprintf(ERRORFILE, "Wrong number of arguments (%d vs 12) for launch container\n", argc); fflush(ERRORFILE); return INVALID_ARGUMENT_NUMBER; @@ -194,10 +217,26 @@ int main(int argc, char **argv) { pid_file = argv[optind++]; local_dirs = argv[optind++];// good local dirs as a comma separated list log_dirs = argv[optind++];// good log dirs as a comma separated list + resources = argv[optind++];// key,value pair describing resources + char *resources_key = malloc(strlen(resources)); + char *resources_value = malloc(strlen(resources)); + if (get_kv_key(resources, resources_key, strlen(resources)) < 0 || + get_kv_value(resources, resources_value, strlen(resources)) < 0) { + fprintf(ERRORFILE, "Invalid arguments for cgroups resources: %s", + resources); + fflush(ERRORFILE); + free(resources_key); + free(resources_value); + return INVALID_ARGUMENT_NUMBER; + } + char** resources_values = extract_values(resources_value); exit_code = launch_container_as_user(user_detail->pw_name, app_id, container_id, current_dir, script_file, cred_file, pid_file, extract_values(local_dirs), - extract_values(log_dirs)); + extract_values(log_dirs), resources_key, + resources_values); + free(resources_key); + free(resources_value); break; case SIGNAL_CONTAINER: if (argc != 5) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/test-container-executor.c b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/test-container-executor.c index 6d7e154a72f..2563fa68eb3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/test-container-executor.c +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/test-container-executor.c @@ -39,6 +39,7 @@ static char* username = NULL; static char* local_dirs = NULL; static char* log_dirs = NULL; +static char* resources = NULL; /** * Run the command using the effective user id. @@ -610,9 +611,17 @@ void test_run_container() { strerror(errno)); exit(1); } else if (child == 0) { + char *key = malloc(strlen(resources)); + char *value = malloc(strlen(resources)); + if (get_kv_key(resources, key, strlen(resources)) < 0 || + get_kv_value(resources, key, strlen(resources)) < 0) { + printf("FAIL: resources failed - %s\n"); + exit(1); + } if (launch_container_as_user(username, "app_4", "container_1", container_dir, script_name, TEST_ROOT "/creds.txt", pid_file, - extract_values(local_dirs), extract_values(log_dirs)) != 0) { + extract_values(local_dirs), extract_values(log_dirs), + key, extract_values(value)) != 0) { printf("FAIL: failed in child\n"); exit(42); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java index 46ee4aa0c54..dfaf0460c4f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java @@ -126,7 +126,7 @@ public class TestLinuxContainerExecutorWithMocks { assertEquals(Arrays.asList(appSubmitter, cmd, appId, containerId, workDir.toString(), "/bin/echo", "/dev/null", pidFile.toString(), StringUtils.join(",", dirsHandler.getLocalDirs()), - StringUtils.join(",", dirsHandler.getLogDirs())), + StringUtils.join(",", dirsHandler.getLogDirs()), "cgroups=none"), readMockParams()); } @@ -211,7 +211,8 @@ public class TestLinuxContainerExecutorWithMocks { assertEquals(Arrays.asList(appSubmitter, cmd, appId, containerId, workDir.toString(), "/bin/echo", "/dev/null", pidFile.toString(), StringUtils.join(",", dirsHandler.getLocalDirs()), - StringUtils.join(",", dirsHandler.getLogDirs())), readMockParams()); + StringUtils.join(",", dirsHandler.getLogDirs()), + "cgroups=none"), readMockParams()); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitor.java index dc21db30e83..16db4a7fd91 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitor.java @@ -134,7 +134,7 @@ public class TestContainersMonitor extends BaseContainerManagerTest { ProcfsBasedProcessTree pTree = new ProcfsBasedProcessTree( "100", procfsRootDir.getAbsolutePath()); - pTree.getProcessTree(); + pTree.updateProcessTree(); assertTrue("tree rooted at 100 should be over limit " + "after first iteration.", test.isProcessTreeOverLimit(pTree, "dummyId", limit)); @@ -142,13 +142,13 @@ public class TestContainersMonitor extends BaseContainerManagerTest { // the tree rooted at 200 is initially below limit. pTree = new ProcfsBasedProcessTree("200", procfsRootDir.getAbsolutePath()); - pTree.getProcessTree(); + pTree.updateProcessTree(); assertFalse("tree rooted at 200 shouldn't be over limit " + "after one iteration.", test.isProcessTreeOverLimit(pTree, "dummyId", limit)); // second iteration - now the tree has been over limit twice, // hence it should be declared over limit. - pTree.getProcessTree(); + pTree.updateProcessTree(); assertTrue( "tree rooted at 200 should be over limit after 2 iterations", test.isProcessTreeOverLimit(pTree, "dummyId", limit)); @@ -156,12 +156,12 @@ public class TestContainersMonitor extends BaseContainerManagerTest { // the tree rooted at 600 is never over limit. pTree = new ProcfsBasedProcessTree("600", procfsRootDir.getAbsolutePath()); - pTree.getProcessTree(); + pTree.updateProcessTree(); assertFalse("tree rooted at 600 should never be over limit.", test.isProcessTreeOverLimit(pTree, "dummyId", limit)); // another iteration does not make any difference. - pTree.getProcessTree(); + pTree.updateProcessTree(); assertFalse("tree rooted at 600 should never be over limit.", test.isProcessTreeOverLimit(pTree, "dummyId", limit)); } finally { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java index 96ee551e205..d5f8ac201ab 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java @@ -263,6 +263,8 @@ public class ApplicationMasterService extends AbstractService implements } else if (request.getResponseId() + 1 < lastResponse.getResponseId()) { LOG.error("Invalid responseid from appAttemptId " + appAttemptId); // Oh damn! Sending reboot isn't enough. RM state is corrupted. TODO: + // Reboot is not useful since after AM reboots, it will send register and + // get an exception. Might as well throw an exception here. allocateResponse.setAMResponse(reboot); return allocateResponse; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java index ec29a4792ba..e8bd5d03114 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java @@ -37,6 +37,7 @@ import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod; import org.apache.hadoop.security.authorize.PolicyProvider; import org.apache.hadoop.security.token.Token; +import org.apache.hadoop.util.ExitUtil; import org.apache.hadoop.yarn.api.ClientRMProtocol; import org.apache.hadoop.yarn.api.protocolrecords.GetAllApplicationsRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetAllApplicationsResponse; @@ -75,6 +76,7 @@ import org.apache.hadoop.yarn.ipc.YarnRPC; import org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier; import org.apache.hadoop.yarn.server.RMDelegationTokenSecretManager; import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants; +import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType; @@ -254,6 +256,20 @@ public class ClientRMService extends AbstractService implements // So call handle directly and do not send an event. rmAppManager.handle(new RMAppManagerSubmitEvent(submissionContext, System .currentTimeMillis())); + + // If recovery is enabled then store the application information in a + // blocking call so make sure that RM has stored the information needed + // to restart the AM after RM restart without further client communication + RMStateStore stateStore = rmContext.getStateStore(); + LOG.info("Storing Application with id " + applicationId); + try { + stateStore.storeApplication(rmContext.getRMApps().get(applicationId)); + } catch (Exception e) { + // For HA this exception needs to be handled by giving up + // master status if we got fenced + LOG.error("Failed to store application:" + applicationId, e); + ExitUtil.terminate(1, e); + } LOG.info("Application with id " + applicationId.getId() + " submitted by user " + user); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java index 44753928fe1..8b5e55aa92b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java @@ -20,6 +20,7 @@ package org.apache.hadoop.yarn.server.resourcemanager; import java.io.IOException; import java.nio.ByteBuffer; import java.util.LinkedList; +import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -36,6 +37,10 @@ import org.apache.hadoop.yarn.event.EventHandler; import org.apache.hadoop.yarn.ipc.RPCUtil; import org.apache.hadoop.yarn.security.client.ClientTokenIdentifier; import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants; +import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore; +import org.apache.hadoop.yarn.server.resourcemanager.recovery.Recoverable; +import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.ApplicationState; +import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType; @@ -48,7 +53,8 @@ import org.apache.hadoop.yarn.server.security.ApplicationACLsManager; /** * This class manages the list of applications for the resource manager. */ -public class RMAppManager implements EventHandler { +public class RMAppManager implements EventHandler, + Recoverable { private static final Log LOG = LogFactory.getLog(RMAppManager.class); @@ -173,6 +179,10 @@ public class RMAppManager implements EventHandler { completedApps.add(applicationId); writeAuditLog(applicationId); + + // application completely done. Remove from state + RMStateStore store = rmContext.getStateStore(); + store.removeApplication(rmContext.getRMApps().get(applicationId)); } } @@ -306,6 +316,37 @@ public class RMAppManager implements EventHandler { } return credentials; } + + @Override + public void recover(RMState state) throws Exception { + RMStateStore store = rmContext.getStateStore(); + assert store != null; + // recover applications + Map appStates = state.getApplicationState(); + LOG.info("Recovering " + appStates.size() + " applications"); + for(ApplicationState appState : appStates.values()) { + // re-submit the application + // this is going to send an app start event but since the async dispatcher + // has not started that event will be queued until we have completed re + // populating the state + if(appState.getApplicationSubmissionContext().getUnmanagedAM()) { + // do not recover unmanaged applications since current recovery + // mechanism of restarting attempts does not work for them. + // This will need to be changed in work preserving recovery in which + // RM will re-connect with the running AM's instead of restarting them + LOG.info("Not recovering unmanaged application " + appState.getAppId()); + store.removeApplication(appState); + } else { + LOG.info("Recovering application " + appState.getAppId()); + submitApplication(appState.getApplicationSubmissionContext(), + appState.getSubmitTime()); + // re-populate attempt information in application + RMAppImpl appImpl = (RMAppImpl) rmContext.getRMApps().get( + appState.getAppId()); + appImpl.recover(state); + } + } + } @Override public void handle(RMAppManagerEvent event) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContext.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContext.java index ccb3deb382f..b48767001bd 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContext.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContext.java @@ -23,6 +23,7 @@ import java.util.concurrent.ConcurrentMap; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.event.Dispatcher; +import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AMLivelinessMonitor; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.ContainerAllocationExpirer; @@ -38,6 +39,8 @@ import org.apache.hadoop.yarn.server.resourcemanager.security.RMContainerTokenSe public interface RMContext { Dispatcher getDispatcher(); + + RMStateStore getStateStore(); ConcurrentMap getRMApps(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContextImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContextImpl.java index 632b6c3453d..840d129dc4e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContextImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContextImpl.java @@ -23,7 +23,10 @@ import java.util.concurrent.ConcurrentMap; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.Dispatcher; +import org.apache.hadoop.yarn.server.resourcemanager.recovery.NullRMStateStore; +import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AMLivelinessMonitor; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.ContainerAllocationExpirer; @@ -33,6 +36,8 @@ import org.apache.hadoop.yarn.server.resourcemanager.security.ClientToAMTokenSec import org.apache.hadoop.yarn.server.resourcemanager.security.DelegationTokenRenewer; import org.apache.hadoop.yarn.server.resourcemanager.security.RMContainerTokenSecretManager; +import com.google.common.annotations.VisibleForTesting; + public class RMContextImpl implements RMContext { private final Dispatcher rmDispatcher; @@ -48,6 +53,7 @@ public class RMContextImpl implements RMContext { private AMLivelinessMonitor amLivelinessMonitor; private AMLivelinessMonitor amFinishingMonitor; + private RMStateStore stateStore = null; private ContainerAllocationExpirer containerAllocationExpirer; private final DelegationTokenRenewer tokenRenewer; private final ApplicationTokenSecretManager appTokenSecretManager; @@ -55,6 +61,7 @@ public class RMContextImpl implements RMContext { private final ClientToAMTokenSecretManagerInRM clientToAMTokenSecretManager; public RMContextImpl(Dispatcher rmDispatcher, + RMStateStore store, ContainerAllocationExpirer containerAllocationExpirer, AMLivelinessMonitor amLivelinessMonitor, AMLivelinessMonitor amFinishingMonitor, @@ -63,6 +70,7 @@ public class RMContextImpl implements RMContext { RMContainerTokenSecretManager containerTokenSecretManager, ClientToAMTokenSecretManagerInRM clientTokenSecretManager) { this.rmDispatcher = rmDispatcher; + this.stateStore = store; this.containerAllocationExpirer = containerAllocationExpirer; this.amLivelinessMonitor = amLivelinessMonitor; this.amFinishingMonitor = amFinishingMonitor; @@ -71,11 +79,39 @@ public class RMContextImpl implements RMContext { this.containerTokenSecretManager = containerTokenSecretManager; this.clientToAMTokenSecretManager = clientTokenSecretManager; } + + @VisibleForTesting + // helper constructor for tests + public RMContextImpl(Dispatcher rmDispatcher, + ContainerAllocationExpirer containerAllocationExpirer, + AMLivelinessMonitor amLivelinessMonitor, + AMLivelinessMonitor amFinishingMonitor, + DelegationTokenRenewer tokenRenewer, + ApplicationTokenSecretManager appTokenSecretManager, + RMContainerTokenSecretManager containerTokenSecretManager, + ClientToAMTokenSecretManagerInRM clientTokenSecretManager) { + this(rmDispatcher, null, containerAllocationExpirer, amLivelinessMonitor, + amFinishingMonitor, tokenRenewer, appTokenSecretManager, + containerTokenSecretManager, clientTokenSecretManager); + RMStateStore nullStore = new NullRMStateStore(); + nullStore.setDispatcher(rmDispatcher); + try { + nullStore.init(new YarnConfiguration()); + setStateStore(nullStore); + } catch (Exception e) { + assert false; + } + } @Override public Dispatcher getDispatcher() { return this.rmDispatcher; } + + @Override + public RMStateStore getStateStore() { + return stateStore; + } @Override public ConcurrentMap getRMApps() { @@ -126,4 +162,9 @@ public class RMContextImpl implements RMContext { public ClientToAMTokenSecretManagerInRM getClientToAMTokenSecretManager() { return this.clientToAMTokenSecretManager; } + + @VisibleForTesting + public void setStateStore(RMStateStore store) { + stateStore = store; + } } \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java index c7d2b26b699..e196770837e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java @@ -30,6 +30,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.metrics2.source.JvmMetrics; import org.apache.hadoop.security.SecurityUtil; +import org.apache.hadoop.util.ExitUtil; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.ShutdownHookManager; import org.apache.hadoop.util.StringUtils; @@ -45,10 +46,11 @@ import org.apache.hadoop.yarn.event.EventHandler; import org.apache.hadoop.yarn.server.RMDelegationTokenSecretManager; import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEventType; import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.ApplicationMasterLauncher; +import org.apache.hadoop.yarn.server.resourcemanager.recovery.NullRMStateStore; import org.apache.hadoop.yarn.server.resourcemanager.recovery.Recoverable; import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore; import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState; -import org.apache.hadoop.yarn.server.resourcemanager.recovery.StoreFactory; +import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStoreFactory; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType; @@ -80,6 +82,8 @@ import org.apache.hadoop.yarn.webapp.WebApp; import org.apache.hadoop.yarn.webapp.WebApps; import org.apache.hadoop.yarn.webapp.WebApps.Builder; +import com.google.common.annotations.VisibleForTesting; + /** * The ResourceManager is the main class that is a set of components. * "I am the ResourceManager. All your resources are belong to us..." @@ -119,14 +123,13 @@ public class ResourceManager extends CompositeService implements Recoverable { protected RMDelegationTokenSecretManager rmDTSecretManager; private WebApp webApp; protected RMContext rmContext; - private final RMStateStore store; protected ResourceTrackerService resourceTracker; + private boolean recoveryEnabled; private Configuration conf; - - public ResourceManager(RMStateStore store) { + + public ResourceManager() { super("ResourceManager"); - this.store = store; } public RMContext getRMContext() { @@ -160,12 +163,34 @@ public class ResourceManager extends CompositeService implements Recoverable { this.containerTokenSecretManager = createContainerTokenSecretManager(conf); + boolean isRecoveryEnabled = conf.getBoolean( + YarnConfiguration.RECOVERY_ENABLED, + YarnConfiguration.DEFAULT_RM_RECOVERY_ENABLED); + + RMStateStore rmStore = null; + if(isRecoveryEnabled) { + recoveryEnabled = true; + rmStore = RMStateStoreFactory.getStore(conf); + } else { + recoveryEnabled = false; + rmStore = new NullRMStateStore(); + } + try { + rmStore.init(conf); + rmStore.setDispatcher(rmDispatcher); + } catch (Exception e) { + // the Exception from stateStore.init() needs to be handled for + // HA and we need to give up master status if we got fenced + LOG.error("Failed to init state store", e); + ExitUtil.terminate(1, e); + } + this.rmContext = - new RMContextImpl(this.rmDispatcher, + new RMContextImpl(this.rmDispatcher, rmStore, this.containerAllocationExpirer, amLivelinessMonitor, amFinishingMonitor, tokenRenewer, this.appTokenSecretManager, this.containerTokenSecretManager, this.clientToAMSecretManager); - + // Register event handler for NodesListManager this.nodesListManager = new NodesListManager(this.rmContext); this.rmDispatcher.register(NodesListManagerEventType.class, @@ -226,9 +251,15 @@ public class ResourceManager extends CompositeService implements Recoverable { addService(applicationMasterLauncher); new RMNMInfo(this.rmContext, this.scheduler); - + super.init(conf); } + + @VisibleForTesting + protected void setRMStateStore(RMStateStore rmStore) { + rmStore.setDispatcher(rmDispatcher); + ((RMContextImpl) rmContext).setStateStore(rmStore); + } protected RMContainerTokenSecretManager createContainerTokenSecretManager( Configuration conf) { @@ -502,6 +533,19 @@ public class ResourceManager extends CompositeService implements Recoverable { this.appTokenSecretManager.start(); this.containerTokenSecretManager.start(); + if(recoveryEnabled) { + try { + RMStateStore rmStore = rmContext.getStateStore(); + RMState state = rmStore.loadState(); + recover(state); + } catch (Exception e) { + // the Exception from loadState() needs to be handled for + // HA and we need to give up master status if we got fenced + LOG.error("Failed to load/recover state", e); + ExitUtil.terminate(1, e); + } + } + startWepApp(); DefaultMetricsSystem.initialize("ResourceManager"); JvmMetrics.initSingleton("ResourceManager", null); @@ -555,6 +599,13 @@ public class ResourceManager extends CompositeService implements Recoverable { DefaultMetricsSystem.shutdown(); + RMStateStore store = rmContext.getStateStore(); + try { + store.close(); + } catch (Exception e) { + LOG.error("Error closing store.", e); + } + super.stop(); } @@ -643,6 +694,8 @@ public class ResourceManager extends CompositeService implements Recoverable { @Override public void recover(RMState state) throws Exception { + // recover applications + rmAppManager.recover(state); } public static void main(String argv[]) { @@ -650,13 +703,11 @@ public class ResourceManager extends CompositeService implements Recoverable { StringUtils.startupShutdownMessage(ResourceManager.class, argv, LOG); try { Configuration conf = new YarnConfiguration(); - RMStateStore store = StoreFactory.getStore(conf); - ResourceManager resourceManager = new ResourceManager(store); + ResourceManager resourceManager = new ResourceManager(); ShutdownHookManager.get().addShutdownHook( new CompositeServiceShutdownHook(resourceManager), SHUTDOWN_HOOK_PRIORITY); resourceManager.init(conf); - //resourceManager.recover(store.restore()); resourceManager.start(); } catch (Throwable t) { LOG.fatal("Error starting ResourceManager", t); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/FileRMStateStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/FileRMStateStore.java deleted file mode 100644 index d1d0ee6d33b..00000000000 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/FileRMStateStore.java +++ /dev/null @@ -1,22 +0,0 @@ -/** -* Licensed to the Apache Software Foundation (ASF) under one -* or more contributor license agreements. See the NOTICE file -* distributed with this work for additional information -* regarding copyright ownership. The ASF licenses this file -* to you under the Apache License, Version 2.0 (the -* "License"); you may not use this file except in compliance -* with the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ -package org.apache.hadoop.yarn.server.resourcemanager.recovery; - -public class FileRMStateStore implements RMStateStore { - -} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/MemoryRMStateStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/MemoryRMStateStore.java new file mode 100644 index 00000000000..c5d59378401 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/MemoryRMStateStore.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.recovery; + +import org.apache.hadoop.classification.InterfaceAudience.Private; +import org.apache.hadoop.classification.InterfaceStability.Unstable; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationAttemptStateDataPBImpl; +import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationStateDataPBImpl; +import org.apache.hadoop.yarn.util.ConverterUtils; + +import com.google.common.annotations.VisibleForTesting; + +@Private +@Unstable +public class MemoryRMStateStore extends RMStateStore { + + RMState state = new RMState(); + + @VisibleForTesting + public RMState getState() { + return state; + } + + @Override + public synchronized RMState loadState() throws Exception { + // return a copy of the state to allow for modification of the real state + RMState returnState = new RMState(); + returnState.appState.putAll(state.appState); + return returnState; + } + + @Override + public synchronized void initInternal(Configuration conf) { + } + + @Override + protected synchronized void closeInternal() throws Exception { + } + + @Override + public void storeApplicationState(String appId, + ApplicationStateDataPBImpl appStateData) + throws Exception { + ApplicationState appState = new ApplicationState( + appStateData.getSubmitTime(), + appStateData.getApplicationSubmissionContext()); + state.appState.put(appState.getAppId(), appState); + } + + @Override + public synchronized void storeApplicationAttemptState(String attemptIdStr, + ApplicationAttemptStateDataPBImpl attemptStateData) + throws Exception { + ApplicationAttemptId attemptId = ConverterUtils + .toApplicationAttemptId(attemptIdStr); + ApplicationAttemptState attemptState = new ApplicationAttemptState( + attemptId, attemptStateData.getMasterContainer()); + + ApplicationState appState = state.getApplicationState().get( + attemptState.getAttemptId().getApplicationId()); + assert appState != null; + + appState.attempts.put(attemptState.getAttemptId(), attemptState); + } + + @Override + public synchronized void removeApplicationState(ApplicationState appState) + throws Exception { + ApplicationId appId = appState.getAppId(); + ApplicationState removed = state.appState.remove(appId); + assert removed != null; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/NullRMStateStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/NullRMStateStore.java new file mode 100644 index 00000000000..6b614606c27 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/NullRMStateStore.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.recovery; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationAttemptStateDataPBImpl; +import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationStateDataPBImpl; + +public class NullRMStateStore extends RMStateStore { + + @Override + protected void initInternal(Configuration conf) throws Exception { + // Do nothing + } + + @Override + protected void closeInternal() throws Exception { + // Do nothing + } + + @Override + public RMState loadState() throws Exception { + return null; + } + + @Override + protected void storeApplicationState(String appId, + ApplicationStateDataPBImpl appStateData) throws Exception { + // Do nothing + } + + @Override + protected void storeApplicationAttemptState(String attemptId, + ApplicationAttemptStateDataPBImpl attemptStateData) throws Exception { + // Do nothing + } + + @Override + protected void removeApplicationState(ApplicationState appState) + throws Exception { + // Do nothing + } + +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java index 4e1e41e7813..674a779cc2b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java @@ -15,10 +15,313 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package org.apache.hadoop.yarn.server.resourcemanager.recovery; -public interface RMStateStore { - public interface RMState { +import java.util.HashMap; +import java.util.Map; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience.Private; +import org.apache.hadoop.classification.InterfaceStability.Unstable; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; +import org.apache.hadoop.yarn.api.records.Container; +import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationAttemptStateDataPBImpl; +import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationStateDataPBImpl; +import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationSubmissionContextPBImpl; +import org.apache.hadoop.yarn.event.AsyncDispatcher; +import org.apache.hadoop.yarn.event.Dispatcher; +import org.apache.hadoop.yarn.event.EventHandler; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptStoredEvent; + +@Private +@Unstable +/** + * Base class to implement storage of ResourceManager state. + * Takes care of asynchronous notifications and interfacing with YARN objects. + * Real store implementations need to derive from it and implement blocking + * store and load methods to actually store and load the state. + */ +public abstract class RMStateStore { + public static final Log LOG = LogFactory.getLog(RMStateStore.class); + + /** + * State of an application attempt + */ + public static class ApplicationAttemptState { + final ApplicationAttemptId attemptId; + final Container masterContainer; + + public ApplicationAttemptState(ApplicationAttemptId attemptId, + Container masterContainer) { + this.attemptId = attemptId; + this.masterContainer = masterContainer; + } + + public Container getMasterContainer() { + return masterContainer; + } + public ApplicationAttemptId getAttemptId() { + return attemptId; + } + } + + /** + * State of an application application + */ + public static class ApplicationState { + final ApplicationSubmissionContext context; + final long submitTime; + Map attempts = + new HashMap(); + + ApplicationState(long submitTime, ApplicationSubmissionContext context) { + this.submitTime = submitTime; + this.context = context; + } + + public ApplicationId getAppId() { + return context.getApplicationId(); + } + public long getSubmitTime() { + return submitTime; + } + public int getAttemptCount() { + return attempts.size(); + } + public ApplicationSubmissionContext getApplicationSubmissionContext() { + return context; + } + public ApplicationAttemptState getAttempt(ApplicationAttemptId attemptId) { + return attempts.get(attemptId); + } + } + + /** + * State of the ResourceManager + */ + public static class RMState { + Map appState = + new HashMap(); + + public Map getApplicationState() { + return appState; + } + } + + private Dispatcher rmDispatcher; + + /** + * Dispatcher used to send state operation completion events to + * ResourceManager services + */ + public void setDispatcher(Dispatcher dispatcher) { + this.rmDispatcher = dispatcher; + } + + AsyncDispatcher dispatcher; + + public synchronized void init(Configuration conf) throws Exception{ + // create async handler + dispatcher = new AsyncDispatcher(); + dispatcher.init(conf); + dispatcher.register(RMStateStoreEventType.class, + new ForwardingEventHandler()); + dispatcher.start(); + + initInternal(conf); + } + + /** + * Derived classes initialize themselves using this method. + * The base class is initialized and the event dispatcher is ready to use at + * this point + */ + protected abstract void initInternal(Configuration conf) throws Exception; + + public synchronized void close() throws Exception { + closeInternal(); + dispatcher.stop(); + } + + /** + * Derived classes close themselves using this method. + * The base class will be closed and the event dispatcher will be shutdown + * after this + */ + protected abstract void closeInternal() throws Exception; + + /** + * Blocking API + * The derived class must recover state from the store and return a new + * RMState object populated with that state + * This must not be called on the dispatcher thread + */ + public abstract RMState loadState() throws Exception; + + /** + * Blocking API + * ResourceManager services use this to store the application's state + * This must not be called on the dispatcher thread + */ + public synchronized void storeApplication(RMApp app) throws Exception { + ApplicationSubmissionContext context = app + .getApplicationSubmissionContext(); + assert context instanceof ApplicationSubmissionContextPBImpl; + + ApplicationStateDataPBImpl appStateData = new ApplicationStateDataPBImpl(); + appStateData.setSubmitTime(app.getSubmitTime()); + appStateData.setApplicationSubmissionContext(context); + + LOG.info("Storing info for app: " + context.getApplicationId()); + storeApplicationState(app.getApplicationId().toString(), appStateData); + } + + /** + * Blocking API + * Derived classes must implement this method to store the state of an + * application. + */ + protected abstract void storeApplicationState(String appId, + ApplicationStateDataPBImpl appStateData) + throws Exception; + + @SuppressWarnings("unchecked") + /** + * Non-blocking API + * ResourceManager services call this to store state on an application attempt + * This does not block the dispatcher threads + * RMAppAttemptStoredEvent will be sent on completion to notify the RMAppAttempt + */ + public synchronized void storeApplicationAttempt(RMAppAttempt appAttempt) { + ApplicationAttemptState attemptState = new ApplicationAttemptState( + appAttempt.getAppAttemptId(), appAttempt.getMasterContainer()); + dispatcher.getEventHandler().handle( + new RMStateStoreAppAttemptEvent(attemptState)); + } + + /** + * Blocking API + * Derived classes must implement this method to store the state of an + * application attempt + */ + protected abstract void storeApplicationAttemptState(String attemptId, + ApplicationAttemptStateDataPBImpl attemptStateData) + throws Exception; + + + /** + * Non-blocking API + * ResourceManager services call this to remove an application from the state + * store + * This does not block the dispatcher threads + * There is no notification of completion for this operation. + */ + public synchronized void removeApplication(RMApp app) { + ApplicationState appState = new ApplicationState( + app.getSubmitTime(), app.getApplicationSubmissionContext()); + for(RMAppAttempt appAttempt : app.getAppAttempts().values()) { + ApplicationAttemptState attemptState = new ApplicationAttemptState( + appAttempt.getAppAttemptId(), appAttempt.getMasterContainer()); + appState.attempts.put(attemptState.getAttemptId(), attemptState); + } + + removeApplication(appState); + } + + @SuppressWarnings("unchecked") + /** + * Non-Blocking API + */ + public synchronized void removeApplication(ApplicationState appState) { + dispatcher.getEventHandler().handle(new RMStateStoreRemoveAppEvent(appState)); + } + + /** + * Blocking API + * Derived classes must implement this method to remove the state of an + * application and its attempts + */ + protected abstract void removeApplicationState(ApplicationState appState) + throws Exception; + + // Dispatcher related code + + private synchronized void handleStoreEvent(RMStateStoreEvent event) { + switch(event.getType()) { + case STORE_APP_ATTEMPT: + { + ApplicationAttemptState attemptState = + ((RMStateStoreAppAttemptEvent) event).getAppAttemptState(); + Exception storedException = null; + ApplicationAttemptStateDataPBImpl attemptStateData = + new ApplicationAttemptStateDataPBImpl(); + attemptStateData.setAttemptId(attemptState.getAttemptId()); + attemptStateData.setMasterContainer(attemptState.getMasterContainer()); + + LOG.info("Storing info for attempt: " + attemptState.getAttemptId()); + try { + storeApplicationAttemptState(attemptState.getAttemptId().toString(), + attemptStateData); + } catch (Exception e) { + LOG.error("Error storing appAttempt: " + + attemptState.getAttemptId(), e); + storedException = e; + } finally { + notifyDoneStoringApplicationAttempt(attemptState.getAttemptId(), + storedException); + } + } + break; + case REMOVE_APP: + { + ApplicationState appState = + ((RMStateStoreRemoveAppEvent) event).getAppState(); + ApplicationId appId = appState.getAppId(); + + LOG.info("Removing info for app: " + appId); + try { + removeApplicationState(appState); + } catch (Exception e) { + LOG.error("Error removing app: " + appId, e); + } + } + break; + default: + LOG.error("Unknown RMStateStoreEvent type: " + event.getType()); + } + } + + @SuppressWarnings("unchecked") + /** + * In (@link storeApplicationAttempt}, derived class can call this method to + * notify the application attempt about operation completion + * @param appAttempt attempt that has been saved + */ + private void notifyDoneStoringApplicationAttempt(ApplicationAttemptId attemptId, + Exception storedException) { + rmDispatcher.getEventHandler().handle( + new RMAppAttemptStoredEvent(attemptId, storedException)); + } + + /** + * EventHandler implementation which forward events to the FSRMStateStore + * This hides the EventHandle methods of the store from its public interface + */ + private final class ForwardingEventHandler + implements EventHandler { + + @Override + public void handle(RMStateStoreEvent event) { + handleStoreEvent(event); + } } + } \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreAppAttemptEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreAppAttemptEvent.java new file mode 100644 index 00000000000..c4a04bc5771 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreAppAttemptEvent.java @@ -0,0 +1,34 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.recovery; + +import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.ApplicationAttemptState; + +public class RMStateStoreAppAttemptEvent extends RMStateStoreEvent { + ApplicationAttemptState attemptState; + + public RMStateStoreAppAttemptEvent(ApplicationAttemptState attemptState) { + super(RMStateStoreEventType.STORE_APP_ATTEMPT); + this.attemptState = attemptState; + } + + public ApplicationAttemptState getAppAttemptState() { + return attemptState; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreEvent.java new file mode 100644 index 00000000000..8e49a826e04 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreEvent.java @@ -0,0 +1,27 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.recovery; + +import org.apache.hadoop.yarn.event.AbstractEvent; + +public class RMStateStoreEvent extends AbstractEvent { + public RMStateStoreEvent(RMStateStoreEventType type) { + super(type); + } +} \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreEventType.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreEventType.java new file mode 100644 index 00000000000..22f155cbf26 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreEventType.java @@ -0,0 +1,24 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.recovery; + +public enum RMStateStoreEventType { + STORE_APP_ATTEMPT, + REMOVE_APP +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/StoreFactory.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreFactory.java similarity index 92% rename from hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/StoreFactory.java rename to hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreFactory.java index b314989968f..f9e2869d997 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/StoreFactory.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreFactory.java @@ -21,12 +21,12 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.yarn.conf.YarnConfiguration; -public class StoreFactory { +public class RMStateStoreFactory { public static RMStateStore getStore(Configuration conf) { RMStateStore store = ReflectionUtils.newInstance( conf.getClass(YarnConfiguration.RM_STORE, - FileRMStateStore.class, RMStateStore.class), + MemoryRMStateStore.class, RMStateStore.class), conf); return store; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreRemoveAppEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreRemoveAppEvent.java new file mode 100644 index 00000000000..402feb96ec9 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreRemoveAppEvent.java @@ -0,0 +1,34 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.recovery; + +import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.ApplicationState; + +public class RMStateStoreRemoveAppEvent extends RMStateStoreEvent { + ApplicationState appState; + + RMStateStoreRemoveAppEvent(ApplicationState appState) { + super(RMStateStoreEventType.REMOVE_APP); + this.appState = appState; + } + + public ApplicationState getAppState() { + return appState; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/package-info.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/package-info.java index 5e5a07efb2e..cbbeb3d01d0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/package-info.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/package-info.java @@ -15,6 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + @InterfaceAudience.Private @InterfaceStability.Unstable package org.apache.hadoop.yarn.server.resourcemanager.recovery; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMApp.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMApp.java index 2533d465dee..92bc2b600f9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMApp.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMApp.java @@ -44,6 +44,12 @@ public interface RMApp extends EventHandler { * @return the {@link ApplicationId} for this {@link RMApp}. */ ApplicationId getApplicationId(); + + /** + * The application submission context for this {@link RMApp} + * @return the {@link ApplicationSubmissionContext} for this {@link RMApp} + */ + ApplicationSubmissionContext getApplicationSubmissionContext(); /** * The current state of the {@link RMApp}. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java index 0317a3c6022..70abe2fe319 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java @@ -50,6 +50,9 @@ import org.apache.hadoop.yarn.server.resourcemanager.RMAppManagerEvent; import org.apache.hadoop.yarn.server.resourcemanager.RMAppManagerEventType; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppNodeUpdateEvent.RMAppNodeUpdateType; +import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.ApplicationState; +import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState; +import org.apache.hadoop.yarn.server.resourcemanager.recovery.Recoverable; import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent; @@ -66,7 +69,7 @@ import org.apache.hadoop.yarn.state.StateMachineFactory; import org.apache.hadoop.yarn.util.BuilderUtils; import org.apache.hadoop.yarn.util.Records; -public class RMAppImpl implements RMApp { +public class RMAppImpl implements RMApp, Recoverable { private static final Log LOG = LogFactory.getLog(RMAppImpl.class); private static final String UNAVAILABLE = "N/A"; @@ -243,6 +246,11 @@ public class RMAppImpl implements RMApp { public ApplicationId getApplicationId() { return this.applicationId; } + + @Override + public ApplicationSubmissionContext getApplicationSubmissionContext() { + return this.submissionContext; + } @Override public FinalApplicationStatus getFinalApplicationStatus() { @@ -512,9 +520,22 @@ public class RMAppImpl implements RMApp { this.writeLock.unlock(); } } + + @Override + public void recover(RMState state) { + ApplicationState appState = state.getApplicationState().get(getApplicationId()); + LOG.info("Recovering app: " + getApplicationId() + " with " + + + appState.getAttemptCount() + " attempts"); + for(int i=0; i= app.maxRetries) { retryApp = false; msg = "Application " + app.getApplicationId() + " failed " + app.maxRetries + " times due to " + failedEvent.getDiagnostics() @@ -655,7 +678,7 @@ public class RMAppImpl implements RMApp { } if (retryApp) { - app.createNewAttempt(); + app.createNewAttempt(true); return initialState; } else { LOG.info(msg); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptEventType.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptEventType.java index 99287a3a55f..bd96e2b9f5f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptEventType.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptEventType.java @@ -39,9 +39,15 @@ public enum RMAppAttemptEventType { CONTAINER_ACQUIRED, CONTAINER_ALLOCATED, CONTAINER_FINISHED, + + // Source: RMStateStore + ATTEMPT_SAVED, // Source: Scheduler APP_REJECTED, APP_ACCEPTED, + + // Source: RMAttemptImpl.recover + RECOVER } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java index da8a6ed6719..95a19541311 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java @@ -38,6 +38,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.http.HttpConfig; +import org.apache.hadoop.util.ExitUtil; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationResourceUsageReport; import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; @@ -57,6 +58,11 @@ import org.apache.hadoop.yarn.server.resourcemanager.ApplicationMasterService; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEvent; import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEventType; +import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.ApplicationAttemptState; +import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.ApplicationState; +import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState; +import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore; +import org.apache.hadoop.yarn.server.resourcemanager.recovery.Recoverable; import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType; @@ -69,6 +75,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAt import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptRegistrationEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptRejectedEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptStatusupdateEvent; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptStoredEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptUnregistrationEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation; @@ -85,7 +92,7 @@ import org.apache.hadoop.yarn.state.StateMachineFactory; import org.apache.hadoop.yarn.util.BuilderUtils; @SuppressWarnings({"unchecked", "rawtypes"}) -public class RMAppAttemptImpl implements RMAppAttempt { +public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { private static final Log LOG = LogFactory.getLog(RMAppAttemptImpl.class); @@ -153,12 +160,15 @@ public class RMAppAttemptImpl implements RMAppAttempt { .addTransition(RMAppAttemptState.NEW, RMAppAttemptState.FAILED, RMAppAttemptEventType.REGISTERED, new UnexpectedAMRegisteredTransition()) + .addTransition(RMAppAttemptState.NEW, RMAppAttemptState.RECOVERED, + RMAppAttemptEventType.RECOVER) // Transitions from SUBMITTED state .addTransition(RMAppAttemptState.SUBMITTED, RMAppAttemptState.FAILED, RMAppAttemptEventType.APP_REJECTED, new AppRejectedTransition()) .addTransition(RMAppAttemptState.SUBMITTED, - EnumSet.of(RMAppAttemptState.LAUNCHED, RMAppAttemptState.SCHEDULED), + EnumSet.of(RMAppAttemptState.LAUNCHED_UNMANAGED_SAVING, + RMAppAttemptState.SCHEDULED), RMAppAttemptEventType.APP_ACCEPTED, new ScheduleTransition()) .addTransition(RMAppAttemptState.SUBMITTED, RMAppAttemptState.KILLED, @@ -170,12 +180,42 @@ public class RMAppAttemptImpl implements RMAppAttempt { // Transitions from SCHEDULED State .addTransition(RMAppAttemptState.SCHEDULED, - RMAppAttemptState.ALLOCATED, + RMAppAttemptState.ALLOCATED_SAVING, RMAppAttemptEventType.CONTAINER_ALLOCATED, new AMContainerAllocatedTransition()) .addTransition(RMAppAttemptState.SCHEDULED, RMAppAttemptState.KILLED, RMAppAttemptEventType.KILL, new BaseFinalTransition(RMAppAttemptState.KILLED)) + + // Transitions from ALLOCATED_SAVING State + .addTransition(RMAppAttemptState.ALLOCATED_SAVING, + RMAppAttemptState.ALLOCATED, + RMAppAttemptEventType.ATTEMPT_SAVED, new AttemptStoredTransition()) + .addTransition(RMAppAttemptState.ALLOCATED_SAVING, + RMAppAttemptState.ALLOCATED_SAVING, + RMAppAttemptEventType.CONTAINER_ACQUIRED, + new ContainerAcquiredTransition()) + // App could be killed by the client. So need to handle this. + .addTransition(RMAppAttemptState.ALLOCATED_SAVING, + RMAppAttemptState.KILLED, + RMAppAttemptEventType.KILL, + new BaseFinalTransition(RMAppAttemptState.KILLED)) + + // Transitions from LAUNCHED_UNMANAGED_SAVING State + .addTransition(RMAppAttemptState.LAUNCHED_UNMANAGED_SAVING, + RMAppAttemptState.LAUNCHED, + RMAppAttemptEventType.ATTEMPT_SAVED, + new UnmanagedAMAttemptSavedTransition()) + // attempt should not try to register in this state + .addTransition(RMAppAttemptState.LAUNCHED_UNMANAGED_SAVING, + RMAppAttemptState.FAILED, + RMAppAttemptEventType.REGISTERED, + new UnexpectedAMRegisteredTransition()) + // App could be killed by the client. So need to handle this. + .addTransition(RMAppAttemptState.LAUNCHED_UNMANAGED_SAVING, + RMAppAttemptState.KILLED, + RMAppAttemptEventType.KILL, + new BaseFinalTransition(RMAppAttemptState.KILLED)) // Transitions from ALLOCATED State .addTransition(RMAppAttemptState.ALLOCATED, @@ -279,11 +319,30 @@ public class RMAppAttemptImpl implements RMAppAttempt { RMAppAttemptEventType.EXPIRE, RMAppAttemptEventType.REGISTERED, RMAppAttemptEventType.CONTAINER_ALLOCATED, + RMAppAttemptEventType.ATTEMPT_SAVED, + RMAppAttemptEventType.CONTAINER_FINISHED, + RMAppAttemptEventType.UNREGISTERED, + RMAppAttemptEventType.KILL, + RMAppAttemptEventType.STATUS_UPDATE)) + + // Transitions from RECOVERED State + .addTransition( + RMAppAttemptState.RECOVERED, + RMAppAttemptState.RECOVERED, + EnumSet.of(RMAppAttemptEventType.START, + RMAppAttemptEventType.APP_ACCEPTED, + RMAppAttemptEventType.APP_REJECTED, + RMAppAttemptEventType.EXPIRE, + RMAppAttemptEventType.LAUNCHED, + RMAppAttemptEventType.LAUNCH_FAILED, + RMAppAttemptEventType.REGISTERED, + RMAppAttemptEventType.CONTAINER_ALLOCATED, + RMAppAttemptEventType.CONTAINER_ACQUIRED, + RMAppAttemptEventType.ATTEMPT_SAVED, RMAppAttemptEventType.CONTAINER_FINISHED, RMAppAttemptEventType.UNREGISTERED, RMAppAttemptEventType.KILL, RMAppAttemptEventType.STATUS_UPDATE)) - .installTopology(); public RMAppAttemptImpl(ApplicationAttemptId appAttemptId, @@ -318,7 +377,7 @@ public class RMAppAttemptImpl implements RMAppAttempt { @Override public ApplicationSubmissionContext getSubmissionContext() { return this.submissionContext; - } + } @Override public FinalApplicationStatus getFinalApplicationStatus() { @@ -494,6 +553,10 @@ public class RMAppAttemptImpl implements RMAppAttempt { } } + private void setMasterContainer(Container container) { + masterContainer = container; + } + @Override public void handle(RMAppAttemptEvent event) { @@ -561,6 +624,21 @@ public class RMAppAttemptImpl implements RMAppAttempt { } } + @Override + public void recover(RMState state) { + ApplicationState appState = + state.getApplicationState().get(getAppAttemptId().getApplicationId()); + ApplicationAttemptState attemptState = appState.getAttempt(getAppAttemptId()); + assert attemptState != null; + setMasterContainer(attemptState.getMasterContainer()); + LOG.info("Recovered attempt: AppId: " + getAppAttemptId().getApplicationId() + + " AttemptId: " + getAppAttemptId() + + " MasterContainer: " + masterContainer); + setDiagnostics("Attempt recovered after RM restart"); + handle(new RMAppAttemptEvent(getAppAttemptId(), + RMAppAttemptEventType.RECOVER)); + } + private static class BaseTransition implements SingleArcTransition { @@ -625,13 +703,12 @@ public class RMAppAttemptImpl implements RMAppAttempt { @Override public RMAppAttemptState transition(RMAppAttemptImpl appAttempt, RMAppAttemptEvent event) { - - // Send the acceptance to the app - appAttempt.eventHandler.handle(new RMAppEvent(event - .getApplicationAttemptId().getApplicationId(), - RMAppEventType.APP_ACCEPTED)); - if (!appAttempt.submissionContext.getUnmanagedAM()) { + // Send the acceptance to the app + appAttempt.eventHandler.handle(new RMAppEvent(event + .getApplicationAttemptId().getApplicationId(), + RMAppEventType.APP_ACCEPTED)); + // Request a container for the AM. ResourceRequest request = BuilderUtils.newResourceRequest( AM_CONTAINER_PRIORITY, "*", appAttempt.submissionContext @@ -647,35 +724,42 @@ public class RMAppAttemptImpl implements RMAppAttempt { return RMAppAttemptState.SCHEDULED; } else { // RM not allocating container. AM is self launched. - // Directly go to LAUNCHED state - // Register with AMLivelinessMonitor - appAttempt.rmContext.getAMLivelinessMonitor().register( - appAttempt.applicationAttemptId); - return RMAppAttemptState.LAUNCHED; + RMStateStore store = appAttempt.rmContext.getStateStore(); + // save state and then go to LAUNCHED state + appAttempt.storeAttempt(store); + return RMAppAttemptState.LAUNCHED_UNMANAGED_SAVING; } } } - private static final class AMContainerAllocatedTransition extends BaseTransition { + private static final class AMContainerAllocatedTransition + extends BaseTransition { @Override public void transition(RMAppAttemptImpl appAttempt, - RMAppAttemptEvent event) { - + RMAppAttemptEvent event) { // Acquire the AM container from the scheduler. Allocation amContainerAllocation = appAttempt.scheduler.allocate( appAttempt.applicationAttemptId, EMPTY_CONTAINER_REQUEST_LIST, EMPTY_CONTAINER_RELEASE_LIST); // Set the masterContainer - appAttempt.masterContainer = amContainerAllocation.getContainers().get( - 0); + appAttempt.setMasterContainer(amContainerAllocation.getContainers().get( + 0)); - // Send event to launch the AM Container - appAttempt.eventHandler.handle(new AMLauncherEvent( - AMLauncherEventType.LAUNCH, appAttempt)); + RMStateStore store = appAttempt.rmContext.getStateStore(); + appAttempt.storeAttempt(store); } } - + + private static final class AttemptStoredTransition extends BaseTransition { + @Override + public void transition(RMAppAttemptImpl appAttempt, + RMAppAttemptEvent event) { + appAttempt.checkAttemptStoreError(event); + appAttempt.launchAttempt(); + } + } + private static class BaseFinalTransition extends BaseTransition { private final RMAppAttemptState finalAttemptState; @@ -736,17 +820,34 @@ public class RMAppAttemptImpl implements RMAppAttempt { } } - private static final class AMLaunchedTransition extends BaseTransition { + private static class AMLaunchedTransition extends BaseTransition { @Override public void transition(RMAppAttemptImpl appAttempt, - RMAppAttemptEvent event) { - + RMAppAttemptEvent event) { // Register with AMLivelinessMonitor - appAttempt.rmContext.getAMLivelinessMonitor().register( - appAttempt.applicationAttemptId); - + appAttempt.attemptLaunched(); } } + + private static final class UnmanagedAMAttemptSavedTransition + extends AMLaunchedTransition { + @Override + public void transition(RMAppAttemptImpl appAttempt, + RMAppAttemptEvent event) { + appAttempt.checkAttemptStoreError(event); + // Send the acceptance to the app + // Ideally this should have been done when the scheduler accepted the app. + // But its here because until the attempt is saved the client should not + // launch the unmanaged AM. Client waits for the app status to be accepted + // before doing so. So we have to delay the accepted state until we have + // completed storing the attempt + appAttempt.eventHandler.handle(new RMAppEvent(event + .getApplicationAttemptId().getApplicationId(), + RMAppEventType.APP_ACCEPTED)); + + super.transition(appAttempt, event); + } + } private static final class LaunchFailedTransition extends BaseFinalTransition { @@ -1040,4 +1141,37 @@ public class RMAppAttemptImpl implements RMAppAttempt { this.readLock.unlock(); } } + + private void launchAttempt(){ + // Send event to launch the AM Container + eventHandler.handle(new AMLauncherEvent(AMLauncherEventType.LAUNCH, this)); + } + + private void attemptLaunched() { + // Register with AMLivelinessMonitor + rmContext.getAMLivelinessMonitor().register(getAppAttemptId()); + } + + private void checkAttemptStoreError(RMAppAttemptEvent event) { + RMAppAttemptStoredEvent storeEvent = (RMAppAttemptStoredEvent) event; + if(storeEvent.getStoredException() != null) + { + // This needs to be handled for HA and give up master status if we got + // fenced + LOG.error("Failed to store attempt: " + getAppAttemptId(), + storeEvent.getStoredException()); + ExitUtil.terminate(1, storeEvent.getStoredException()); + } + } + + private void storeAttempt(RMStateStore store) { + // store attempt data in a non-blocking manner to prevent dispatcher + // thread starvation and wait for state to be saved + LOG.info("Storing attempt: AppId: " + + getAppAttemptId().getApplicationId() + + " AttemptId: " + + getAppAttemptId() + + " MasterContainer: " + masterContainer); + store.storeApplicationAttempt(this); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptState.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptState.java index b3eaa02cde2..3eb13edbeef 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptState.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptState.java @@ -19,6 +19,6 @@ package org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt; public enum RMAppAttemptState { - NEW, SUBMITTED, SCHEDULED, ALLOCATED, LAUNCHED, FAILED, RUNNING, - FINISHING, FINISHED, KILLED, + NEW, SUBMITTED, SCHEDULED, ALLOCATED, LAUNCHED, FAILED, RUNNING, FINISHING, + FINISHED, KILLED, ALLOCATED_SAVING, LAUNCHED_UNMANAGED_SAVING, RECOVERED } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/event/RMAppAttemptStoredEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/event/RMAppAttemptStoredEvent.java new file mode 100644 index 00000000000..8d9ba359247 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/event/RMAppAttemptStoredEvent.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event; + +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEventType; + +public class RMAppAttemptStoredEvent extends RMAppAttemptEvent { + + final Exception storedException; + + public RMAppAttemptStoredEvent(ApplicationAttemptId appAttemptId, + Exception storedException) { + super(appAttemptId, RMAppAttemptEventType.ATTEMPT_SAVED); + this.storedException = storedException; + } + + public Exception getStoredException() { + return storedException; + } + +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java index de66f583e2a..69ac6012eb8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java @@ -198,6 +198,11 @@ public class RMContainerImpl implements RMContainer { return reservedPriority; } + @Override + public String toString() { + return containerId.toString(); + } + @Override public void handle(RMContainerEvent event) { LOG.debug("Processing " + event.getContainerId() + " of type " + event.getType()); @@ -221,7 +226,7 @@ public class RMContainerImpl implements RMContainer { writeLock.unlock(); } } - + private static class BaseTransition implements SingleArcTransition { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java index 8b3e13420b2..1b466a66bde 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java @@ -177,7 +177,9 @@ public class FSLeafQueue extends FSQueue { Collections.sort(appScheds, comparator); for (AppSchedulable sched: appScheds) { - return sched.assignContainer(node, reserved); + if (sched.getRunnable()) { + return sched.assignContainer(node, reserved); + } } return Resources.none(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java index f0b9949dc85..3f86f57daf4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java @@ -514,7 +514,6 @@ public class FairScheduler implements ResourceScheduler { queue.addApp(schedulerApp); queue.getMetrics().submitApp(user, applicationAttemptId.getAttemptId()); - rootMetrics.submitApp(user, applicationAttemptId.getAttemptId()); applications.put(applicationAttemptId, schedulerApp); @@ -777,7 +776,8 @@ public class FairScheduler implements ResourceScheduler { boolean assignedContainer = false; for (FSLeafQueue sched : scheds) { Resource assigned = sched.assignContainer(node, false); - if (Resources.greaterThan(assigned, Resources.none())) { + if (Resources.greaterThan(assigned, Resources.none()) || + node.getReservedContainer() != null) { eventLog.log("ASSIGN", nm.getHostName(), assigned); assignedContainers++; assignedContainer = true; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/QueueManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/QueueManager.java index 5da1d4fd5bf..8f27531ad25 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/QueueManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/QueueManager.java @@ -227,6 +227,9 @@ public class QueueManager { * Return whether a queue exists already. */ public boolean exists(String name) { + if (!name.startsWith(ROOT_QUEUE + ".") && !name.equals(ROOT_QUEUE)) { + name = ROOT_QUEUE + "." + name; + } synchronized (queues) { return queues.containsKey(name); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/security/DelegationTokenRenewer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/security/DelegationTokenRenewer.java index 23eb72ed9b7..9232190ba3b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/security/DelegationTokenRenewer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/security/DelegationTokenRenewer.java @@ -276,21 +276,26 @@ public class DelegationTokenRenewer extends AbstractService { Collection > tokens = ts.getAllTokens(); long now = System.currentTimeMillis(); + // find tokens for renewal, but don't add timers until we know + // all renewable tokens are valid + Set dtrs = new HashSet(); for(Token token : tokens) { // first renew happens immediately if (token.isManaged()) { DelegationTokenToRenew dtr = new DelegationTokenToRenew(applicationId, token, getConfig(), now, shouldCancelAtEnd); - - addTokenToList(dtr); - - setTimerForTokenRenewal(dtr, true); - if (LOG.isDebugEnabled()) { - LOG.debug("Registering token for renewal for:" + - " service = " + token.getService() + - " for appId = " + applicationId); - } + renewToken(dtr); + dtrs.add(dtr); + } + } + for (DelegationTokenToRenew dtr : dtrs) { + addTokenToList(dtr); + setTimerForTokenRenewal(dtr); + if (LOG.isDebugEnabled()) { + LOG.debug("Registering token for renewal for:" + + " service = " + dtr.token.getService() + + " for appId = " + applicationId); } } } @@ -301,54 +306,49 @@ public class DelegationTokenRenewer extends AbstractService { */ private class RenewalTimerTask extends TimerTask { private DelegationTokenToRenew dttr; + private boolean cancelled = false; RenewalTimerTask(DelegationTokenToRenew t) { dttr = t; } @Override - public void run() { + public synchronized void run() { + if (cancelled) { + return; + } + Token token = dttr.token; try { - // need to use doAs so that http can find the kerberos tgt - dttr.expirationDate = UserGroupInformation.getLoginUser() - .doAs(new PrivilegedExceptionAction(){ - - @Override - public Long run() throws Exception { - return dttr.token.renew(dttr.conf); - } - }); - + renewToken(dttr); if (LOG.isDebugEnabled()) { LOG.debug("Renewing delegation-token for:" + token.getService() + "; new expiration;" + dttr.expirationDate); } - setTimerForTokenRenewal(dttr, false);// set the next one + setTimerForTokenRenewal(dttr);// set the next one } catch (Exception e) { LOG.error("Exception renewing token" + token + ". Not rescheduled", e); removeFailedDelegationToken(dttr); } } + + @Override + public synchronized boolean cancel() { + cancelled = true; + return super.cancel(); + } } /** * set task to renew the token */ - private - void setTimerForTokenRenewal(DelegationTokenToRenew token, - boolean firstTime) throws IOException { + private void setTimerForTokenRenewal(DelegationTokenToRenew token) + throws IOException { // calculate timer time - long now = System.currentTimeMillis(); - long renewIn; - if(firstTime) { - renewIn = now; - } else { - long expiresIn = (token.expirationDate - now); - renewIn = now + expiresIn - expiresIn/10; // little bit before the expiration - } + long expiresIn = token.expirationDate - System.currentTimeMillis(); + long renewIn = token.expirationDate - expiresIn/10; // little bit before the expiration // need to create new task every time TimerTask tTask = new RenewalTimerTask(token); @@ -357,6 +357,24 @@ public class DelegationTokenRenewer extends AbstractService { renewalTimer.schedule(token.timerTask, new Date(renewIn)); } + // renew a token + private void renewToken(final DelegationTokenToRenew dttr) + throws IOException { + // need to use doAs so that http can find the kerberos tgt + // NOTE: token renewers should be responsible for the correct UGI! + try { + dttr.expirationDate = UserGroupInformation.getLoginUser().doAs( + new PrivilegedExceptionAction(){ + @Override + public Long run() throws Exception { + return dttr.token.renew(dttr.conf); + } + }); + } catch (InterruptedException e) { + throw new IOException(e); + } + } + // cancel a token private void cancelToken(DelegationTokenToRenew t) { if(t.shouldCancelAtEnd) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AppsBlock.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AppsBlock.java index e90edae89aa..6fd35ec12b8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AppsBlock.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AppsBlock.java @@ -84,12 +84,12 @@ class AppsBlock extends HtmlBlock { appsTableData.append("[\"") .append(appInfo.getAppId()).append("\",\"") - .append(StringEscapeUtils.escapeHtml(appInfo.getUser())) - .append("\",\"") - .append(StringEscapeUtils.escapeHtml(appInfo.getName())) - .append("\",\"") - .append(StringEscapeUtils.escapeHtml(appInfo.getQueue())) - .append("\",\"") + .append(StringEscapeUtils.escapeJavaScript(StringEscapeUtils.escapeHtml( + appInfo.getUser()))).append("\",\"") + .append(StringEscapeUtils.escapeJavaScript(StringEscapeUtils.escapeHtml( + appInfo.getName()))).append("\",\"") + .append(StringEscapeUtils.escapeJavaScript(StringEscapeUtils.escapeHtml( + appInfo.getQueue()))).append("\",\"") .append(appInfo.getStartTime()).append("\",\"") .append(appInfo.getFinishTime()).append("\",\"") .append(appInfo.getState()).append("\",\"") diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/FairSchedulerAppsBlock.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/FairSchedulerAppsBlock.java index 9860e18dac3..7ce59cb74a1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/FairSchedulerAppsBlock.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/FairSchedulerAppsBlock.java @@ -20,13 +20,14 @@ package org.apache.hadoop.yarn.server.resourcemanager.webapp; import static org.apache.hadoop.yarn.util.StringHelper.join; import static org.apache.hadoop.yarn.webapp.YarnWebParams.APP_STATE; -import static org.apache.hadoop.yarn.webapp.view.JQueryUI._PROGRESSBAR; -import static org.apache.hadoop.yarn.webapp.view.JQueryUI._PROGRESSBAR_VALUE; +import static org.apache.hadoop.yarn.webapp.view.JQueryUI.C_PROGRESSBAR; +import static org.apache.hadoop.yarn.webapp.view.JQueryUI.C_PROGRESSBAR_VALUE; import java.util.Collection; import java.util.HashSet; import java.util.concurrent.ConcurrentMap; +import org.apache.commons.lang.StringEscapeUtils; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; @@ -36,7 +37,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.AppInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.FairSchedulerInfo; -import org.apache.hadoop.yarn.util.Times; import org.apache.hadoop.yarn.webapp.hamlet.Hamlet; import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TABLE; import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TBODY; @@ -86,44 +86,52 @@ public class FairSchedulerAppsBlock extends HtmlBlock { reqAppStates.add(RMAppState.valueOf(stateString)); } } + StringBuilder appsTableData = new StringBuilder("[\n"); for (RMApp app : apps.values()) { if (reqAppStates != null && !reqAppStates.contains(app.getState())) { continue; } AppInfo appInfo = new AppInfo(app, true); String percent = String.format("%.1f", appInfo.getProgress()); - String startTime = Times.format(appInfo.getStartTime()); - String finishTime = Times.format(appInfo.getFinishTime()); ApplicationAttemptId attemptId = app.getCurrentAppAttempt().getAppAttemptId(); int fairShare = fsinfo.getAppFairShare(attemptId); + //AppID numerical value parsed by parseHadoopID in yarn.dt.plugins.js + appsTableData.append("[\"") + .append(appInfo.getAppId()).append("\",\"") + .append(StringEscapeUtils.escapeJavaScript(StringEscapeUtils.escapeHtml( + appInfo.getUser()))).append("\",\"") + .append(StringEscapeUtils.escapeJavaScript(StringEscapeUtils.escapeHtml( + appInfo.getName()))).append("\",\"") + .append(StringEscapeUtils.escapeJavaScript(StringEscapeUtils.escapeHtml( + appInfo.getQueue()))).append("\",\"") + .append(fairShare).append("\",\"") + .append(appInfo.getStartTime()).append("\",\"") + .append(appInfo.getFinishTime()).append("\",\"") + .append(appInfo.getState()).append("\",\"") + .append(appInfo.getFinalStatus()).append("\",\"") + // Progress bar + .append("
").append("
") + .append("\",\"") + .append(appInfo.getTrackingUI()).append("\"],\n"); - tbody. - tr(). - td(). - br().$title(appInfo.getAppIdNum())._(). // for sorting - a(url("app", appInfo.getAppId()), appInfo.getAppId())._(). - td(appInfo.getUser()). - td(appInfo.getName()). - td(appInfo.getQueue()). - td("" + fairShare). - td(). - br().$title(String.valueOf(appInfo.getStartTime()))._(). - _(startTime)._(). - td(). - br().$title(String.valueOf(appInfo.getFinishTime()))._(). - _(finishTime)._(). - td(appInfo.getState()). - td(appInfo.getFinalStatus()). - td(). - br().$title(percent)._(). // for sorting - div(_PROGRESSBAR). - $title(join(percent, '%')). // tooltip - div(_PROGRESSBAR_VALUE). - $style(join("width:", percent, '%'))._()._()._(). - td(). - a(!appInfo.isTrackingUrlReady()? - "#" : appInfo.getTrackingUrlPretty(), appInfo.getTrackingUI())._()._(); } + if(appsTableData.charAt(appsTableData.length() - 2) == ',') { + appsTableData.delete(appsTableData.length()-2, appsTableData.length()-1); + } + appsTableData.append("]"); + html.script().$type("text/javascript"). + _("var appsTableData=" + appsTableData)._(); + tbody._()._(); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/FairSchedulerPage.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/FairSchedulerPage.java index 3a560161402..b36fd9a4c2d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/FairSchedulerPage.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/FairSchedulerPage.java @@ -20,16 +20,18 @@ package org.apache.hadoop.yarn.server.resourcemanager.webapp; import static org.apache.hadoop.yarn.util.StringHelper.join; -import java.util.List; +import java.util.Collection; import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.FairSchedulerInfo; +import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.FairSchedulerLeafQueueInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.FairSchedulerQueueInfo; import org.apache.hadoop.yarn.webapp.ResponseInfo; import org.apache.hadoop.yarn.webapp.SubView; import org.apache.hadoop.yarn.webapp.hamlet.Hamlet; import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.DIV; +import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.LI; import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.UL; import org.apache.hadoop.yarn.webapp.view.HtmlBlock; import org.apache.hadoop.yarn.webapp.view.InfoBlock; @@ -48,16 +50,15 @@ public class FairSchedulerPage extends RmView { @RequestScoped static class FSQInfo { - FairSchedulerInfo fsinfo; FairSchedulerQueueInfo qinfo; } - static class QueueInfoBlock extends HtmlBlock { - final FairSchedulerQueueInfo qinfo; + static class LeafQueueBlock extends HtmlBlock { + final FairSchedulerLeafQueueInfo qinfo; - @Inject QueueInfoBlock(ViewContext ctx, FSQInfo info) { + @Inject LeafQueueBlock(ViewContext ctx, FSQInfo info) { super(ctx); - qinfo = (FairSchedulerQueueInfo) info.qinfo; + qinfo = (FairSchedulerLeafQueueInfo)info.qinfo; } @Override @@ -81,6 +82,47 @@ public class FairSchedulerPage extends RmView { } } + static class QueueBlock extends HtmlBlock { + final FSQInfo fsqinfo; + + @Inject QueueBlock(FSQInfo info) { + fsqinfo = info; + } + + @Override + public void render(Block html) { + Collection subQueues = fsqinfo.qinfo.getChildQueues(); + UL ul = html.ul("#pq"); + for (FairSchedulerQueueInfo info : subQueues) { + float capacity = info.getMaxResourcesFraction(); + float fairShare = info.getFairShareFraction(); + float used = info.getUsedFraction(); + LI> li = ul. + li(). + a(_Q).$style(width(capacity * Q_MAX_WIDTH)). + $title(join("Fair Share:", percent(fairShare))). + span().$style(join(Q_GIVEN, ";font-size:1px;", width(fairShare/capacity))). + _('.')._(). + span().$style(join(width(used/capacity), + ";font-size:1px;left:0%;", used > fairShare ? Q_OVER : Q_UNDER)). + _('.')._(). + span(".q", info.getQueueName())._(). + span().$class("qstats").$style(left(Q_STATS_POS)). + _(join(percent(used), " used"))._(); + + fsqinfo.qinfo = info; + if (info instanceof FairSchedulerLeafQueueInfo) { + li.ul("#lq").li()._(LeafQueueBlock.class)._()._(); + } else { + li._(QueueBlock.class); + } + li._(); + } + + ul._(); + } + } + static class QueuesBlock extends HtmlBlock { final FairScheduler fs; final FSQInfo fsqinfo; @@ -89,8 +131,9 @@ public class FairSchedulerPage extends RmView { fs = (FairScheduler)rm.getResourceScheduler(); fsqinfo = info; } - - @Override public void render(Block html) { + + @Override + public void render(Block html) { html._(MetricsOverviewTable.class); UL>> ul = html. div("#cs-wrapper.ui-widget"). @@ -106,8 +149,8 @@ public class FairSchedulerPage extends RmView { span(".q", "default")._()._(); } else { FairSchedulerInfo sinfo = new FairSchedulerInfo(fs); - fsqinfo.fsinfo = sinfo; - fsqinfo.qinfo = null; + fsqinfo.qinfo = sinfo.getRootQueueInfo(); + float used = fsqinfo.qinfo.getUsedFraction(); ul. li().$style("margin-bottom: 1em"). @@ -120,29 +163,15 @@ public class FairSchedulerPage extends RmView { _("Used (over fair share)")._(). span().$class("qlegend ui-corner-all ui-state-default"). _("Max Capacity")._(). - _(); - - List subQueues = fsqinfo.fsinfo.getQueueInfos(); - for (FairSchedulerQueueInfo info : subQueues) { - fsqinfo.qinfo = info; - float capacity = info.getMaxResourcesFraction(); - float fairShare = info.getFairShareFraction(); - float used = info.getUsedFraction(); - ul. - li(). - a(_Q).$style(width(capacity * Q_MAX_WIDTH)). - $title(join("Fair Share:", percent(fairShare))). - span().$style(join(Q_GIVEN, ";font-size:1px;", width(fairShare/capacity))). - _('.')._(). - span().$style(join(width(used/capacity), - ";font-size:1px;left:0%;", used > fairShare ? Q_OVER : Q_UNDER)). - _('.')._(). - span(".q", info.getQueueName())._(). - span().$class("qstats").$style(left(Q_STATS_POS)). - _(join(percent(used), " used"))._(). - ul("#lq").li()._(QueueInfoBlock.class)._()._(). - _(); - } + _(). + li(). + a(_Q).$style(width(Q_MAX_WIDTH)). + span().$style(join(width(used), ";left:0%;", + used > 1 ? Q_OVER : Q_UNDER))._(".")._(). + span(".q", "root")._(). + span().$class("qstats").$style(left(Q_STATS_POS)). + _(join(percent(used), " used"))._(). + _(QueueBlock.class)._(); } ul._()._(). script().$type("text/javascript"). @@ -159,13 +188,16 @@ public class FairSchedulerPage extends RmView { "#cs a { font-weight: normal; margin: 2px; position: relative }", "#cs a span { font-weight: normal; font-size: 80% }", "#cs-wrapper .ui-widget-header { padding: 0.2em 0.5em }", + ".qstats { font-weight: normal; font-size: 80%; position: absolute }", + ".qlegend { font-weight: normal; padding: 0 1em; margin: 1em }", "table.info tr th {width: 50%}")._(). // to center info table script("/static/jt/jquery.jstree.js"). script().$type("text/javascript"). _("$(function() {", " $('#cs a span').addClass('ui-corner-all').css('position', 'absolute');", " $('#cs').bind('loaded.jstree', function (e, data) {", - " data.inst.open_all(); }).", + " data.inst.open_node('#pq', true);", + " }).", " jstree({", " core: { animation: 188, html_titles: true },", " plugins: ['themeroller', 'html_data', 'ui'],", @@ -175,8 +207,9 @@ public class FairSchedulerPage extends RmView { " });", " $('#cs').bind('select_node.jstree', function(e, data) {", " var q = $('.q', data.rslt.obj).first().text();", - " if (q == 'root') q = '';", - " $('#apps').dataTable().fnFilter(q, 3);", + " if (q == 'root') q = '';", + " else q = '^' + q.substr(q.lastIndexOf('.') + 1) + '$';", + " $('#apps').dataTable().fnFilter(q, 3, true);", " });", " $('#cs').show();", "});")._(); @@ -197,4 +230,19 @@ public class FairSchedulerPage extends RmView { static String left(float f) { return String.format("left:%.1f%%", f * 100); } + + @Override + protected String getAppsTableColumnDefs() { + StringBuilder sb = new StringBuilder(); + return sb + .append("[\n") + .append("{'sType':'numeric', 'aTargets': [0]") + .append(", 'mRender': parseHadoopID }") + + .append("\n, {'sType':'numeric', 'aTargets': [5, 6]") + .append(", 'mRender': renderHadoopDate }") + + .append("\n, {'sType':'numeric', bSearchable:false, 'aTargets': [9]") + .append(", 'mRender': parseHadoopProgress }]").toString(); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RmView.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RmView.java index f9ad7825575..59c62ffbe6c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RmView.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RmView.java @@ -66,7 +66,17 @@ public class RmView extends TwoColumnLayout { .append(", bDeferRender: true") .append(", bProcessing: true") - .append("\n, aoColumnDefs: [\n") + .append("\n, aoColumnDefs: ") + .append(getAppsTableColumnDefs()) + + // Sort by id upon page load + .append(", aaSorting: [[0, 'desc']]}").toString(); + } + + protected String getAppsTableColumnDefs() { + StringBuilder sb = new StringBuilder(); + return sb + .append("[\n") .append("{'sType':'numeric', 'aTargets': [0]") .append(", 'mRender': parseHadoopID }") @@ -74,9 +84,6 @@ public class RmView extends TwoColumnLayout { .append(", 'mRender': renderHadoopDate }") .append("\n, {'sType':'numeric', bSearchable:false, 'aTargets': [8]") - .append(", 'mRender': parseHadoopProgress }]") - - // Sort by id upon page load - .append(", aaSorting: [[0, 'desc']]}").toString(); + .append(", 'mRender': parseHadoopProgress }]").toString(); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/FairSchedulerInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/FairSchedulerInfo.java index 4fe19ca13d1..e1fac4a2cf4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/FairSchedulerInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/FairSchedulerInfo.java @@ -18,33 +18,23 @@ package org.apache.hadoop.yarn.server.resourcemanager.webapp.dao; -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; - import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSLeafQueue; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler; public class FairSchedulerInfo { - private List queueInfos; private FairScheduler scheduler; public FairSchedulerInfo(FairScheduler fs) { scheduler = fs; - Collection queues = fs.getQueueManager().getLeafQueues(); - queueInfos = new ArrayList(); - for (FSLeafQueue queue : queues) { - queueInfos.add(new FairSchedulerQueueInfo(queue, fs)); - } - } - - public List getQueueInfos() { - return queueInfos; } public int getAppFairShare(ApplicationAttemptId appAttemptId) { return scheduler.getSchedulerApp(appAttemptId). getAppSchedulable().getFairShare().getMemory(); } + + public FairSchedulerQueueInfo getRootQueueInfo() { + return new FairSchedulerQueueInfo(scheduler.getQueueManager(). + getRootQueue(), scheduler); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/FairSchedulerLeafQueueInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/FairSchedulerLeafQueueInfo.java new file mode 100644 index 00000000000..bee1cfd9866 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/FairSchedulerLeafQueueInfo.java @@ -0,0 +1,32 @@ +package org.apache.hadoop.yarn.server.resourcemanager.webapp.dao; + +import java.util.Collection; + +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.AppSchedulable; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSLeafQueue; + +public class FairSchedulerLeafQueueInfo extends FairSchedulerQueueInfo { + private int numPendingApps; + private int numActiveApps; + + public FairSchedulerLeafQueueInfo(FSLeafQueue queue, FairScheduler scheduler) { + super(queue, scheduler); + Collection apps = queue.getAppSchedulables(); + for (AppSchedulable app : apps) { + if (app.getApp().isPending()) { + numPendingApps++; + } else { + numActiveApps++; + } + } + } + + public int getNumActiveApplications() { + return numPendingApps; + } + + public int getNumPendingApplications() { + return numActiveApps; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/FairSchedulerQueueInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/FairSchedulerQueueInfo.java index 35749427d00..3cab1da33c0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/FairSchedulerQueueInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/FairSchedulerQueueInfo.java @@ -18,19 +18,18 @@ package org.apache.hadoop.yarn.server.resourcemanager.webapp.dao; + +import java.util.ArrayList; import java.util.Collection; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.AppSchedulable; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSLeafQueue; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSQueue; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.QueueManager; -public class FairSchedulerQueueInfo { - private int numPendingApps; - private int numActiveApps; - +public class FairSchedulerQueueInfo { private int fairShare; private int minShare; private int maxShare; @@ -48,16 +47,9 @@ public class FairSchedulerQueueInfo { private String queueName; - public FairSchedulerQueueInfo(FSLeafQueue queue, FairScheduler scheduler) { - Collection apps = queue.getAppSchedulables(); - for (AppSchedulable app : apps) { - if (app.getApp().isPending()) { - numPendingApps++; - } else { - numActiveApps++; - } - } - + private Collection childInfos; + + public FairSchedulerQueueInfo(FSQueue queue, FairScheduler scheduler) { QueueManager manager = scheduler.getQueueManager(); queueName = queue.getName(); @@ -81,6 +73,16 @@ public class FairSchedulerQueueInfo { fractionMinShare = (float)minShare / clusterMaxMem; maxApps = manager.getQueueMaxApps(queueName); + + Collection childQueues = queue.getChildQueues(); + childInfos = new ArrayList(); + for (FSQueue child : childQueues) { + if (child instanceof FSLeafQueue) { + childInfos.add(new FairSchedulerLeafQueueInfo((FSLeafQueue)child, scheduler)); + } else { + childInfos.add(new FairSchedulerQueueInfo(child, scheduler)); + } + } } /** @@ -96,15 +98,7 @@ public class FairSchedulerQueueInfo { public int getFairShare() { return fairShare; } - - public int getNumActiveApplications() { - return numPendingApps; - } - - public int getNumPendingApplications() { - return numActiveApps; - } - + public Resource getMinResources() { return minResources; } @@ -148,4 +142,8 @@ public class FairSchedulerQueueInfo { public float getMaxResourcesFraction() { return (float)maxShare / clusterMaxMem; } + + public Collection getChildQueues() { + return childInfos; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockAM.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockAM.java index b097ea37140..ace5efb1fa2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockAM.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockAM.java @@ -47,7 +47,7 @@ public class MockAM { private volatile int responseId = 0; private final ApplicationAttemptId attemptId; private final RMContext context; - private final AMRMProtocol amRMProtocol; + private AMRMProtocol amRMProtocol; private final List requests = new ArrayList(); private final List releases = new ArrayList(); @@ -58,6 +58,10 @@ public class MockAM { this.amRMProtocol = amRMProtocol; this.attemptId = attemptId; } + + void setAMRMProtocol(AMRMProtocol amRMProtocol) { + this.amRMProtocol = amRMProtocol; + } public void waitForState(RMAppAttemptState finalState) throws Exception { RMApp app = context.getRMApps().get(attemptId.getApplicationId()); @@ -66,7 +70,8 @@ public class MockAM { while (!finalState.equals(attempt.getAppAttemptState()) && timeoutSecs++ < 20) { System.out - .println("AppAttempt State is : " + attempt.getAppAttemptState() + .println("AppAttempt : " + attemptId + " State is : " + + attempt.getAppAttemptState() + " Waiting for state : " + finalState); Thread.sleep(500); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNM.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNM.java index ba999bfb2e0..8f66bdba485 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNM.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNM.java @@ -46,7 +46,7 @@ public class MockNM { private int responseId; private NodeId nodeId; private final int memory; - private final ResourceTrackerService resourceTracker; + private ResourceTrackerService resourceTracker; private final int httpPort = 2; private MasterKey currentMasterKey; @@ -66,6 +66,10 @@ public class MockNM { public int getHttpPort() { return httpPort; } + + void setResourceTrackerService(ResourceTrackerService resourceTracker) { + this.resourceTracker = resourceTracker; + } public void containerStatus(Container container) throws Exception { Map> conts = diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java index 11bf85d3f96..0bc3211a819 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java @@ -39,9 +39,10 @@ import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEvent; import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.ApplicationMasterLauncher; -import org.apache.hadoop.yarn.server.resourcemanager.recovery.StoreFactory; +import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEventType; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; @@ -63,10 +64,17 @@ public class MockRM extends ResourceManager { } public MockRM(Configuration conf) { - super(StoreFactory.getStore(conf)); + this(conf, null); + } + + public MockRM(Configuration conf, RMStateStore store) { + super(); init(conf instanceof YarnConfiguration ? conf : new YarnConfiguration(conf)); + if(store != null) { + setRMStateStore(store); + } Logger rootLogger = LogManager.getRootLogger(); - rootLogger.setLevel(Level.DEBUG); + rootLogger.setLevel(Level.DEBUG); } public void waitForState(ApplicationId appId, RMAppState finalState) @@ -75,7 +83,7 @@ public class MockRM extends ResourceManager { Assert.assertNotNull("app shouldn't be null", app); int timeoutSecs = 0; while (!finalState.equals(app.getState()) && timeoutSecs++ < 20) { - System.out.println("App State is : " + app.getState() + System.out.println("App : " + appId + " State is : " + app.getState() + " Waiting for state : " + finalState); Thread.sleep(500); } @@ -83,6 +91,24 @@ public class MockRM extends ResourceManager { Assert.assertEquals("App state is not correct (timedout)", finalState, app.getState()); } + + public void waitForState(ApplicationAttemptId attemptId, + RMAppAttemptState finalState) + throws Exception { + RMApp app = getRMContext().getRMApps().get(attemptId.getApplicationId()); + Assert.assertNotNull("app shouldn't be null", app); + RMAppAttempt attempt = app.getCurrentAppAttempt(); + int timeoutSecs = 0; + while (!finalState.equals(attempt.getAppAttemptState()) && timeoutSecs++ < 20) { + System.out.println("AppAttempt : " + attemptId + + " State is : " + attempt.getAppAttemptState() + + " Waiting for state : " + finalState); + Thread.sleep(500); + } + System.out.println("Attempt State is : " + attempt.getAppAttemptState()); + Assert.assertEquals("Attempt state is not correct (timedout)", finalState, + attempt.getAppAttemptState()); + } // get new application id public GetNewApplicationResponse getNewAppId() throws Exception { @@ -97,11 +123,16 @@ public class MockRM extends ResourceManager { // client public RMApp submitApp(int masterMemory, String name, String user) throws Exception { - return submitApp(masterMemory, name, user, null); + return submitApp(masterMemory, name, user, null, false); } - + public RMApp submitApp(int masterMemory, String name, String user, Map acls) throws Exception { + return submitApp(masterMemory, name, user, acls, false); + } + + public RMApp submitApp(int masterMemory, String name, String user, + Map acls, boolean unmanaged) throws Exception { ClientRMProtocol client = getClientRMService(); GetNewApplicationResponse resp = client.getNewApplication(Records .newRecord(GetNewApplicationRequest.class)); @@ -114,6 +145,9 @@ public class MockRM extends ResourceManager { sub.setApplicationId(appId); sub.setApplicationName(name); sub.setUser(user); + if(unmanaged) { + sub.setUnmanagedAM(true); + } ContainerLaunchContext clc = Records .newRecord(ContainerLaunchContext.class); Resource capability = Records.newRecord(Resource.class); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationACLs.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationACLs.java index e0852c3ddd4..9ec2f13e2d7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationACLs.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationACLs.java @@ -51,7 +51,7 @@ import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.ipc.YarnRPC; import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore; -import org.apache.hadoop.yarn.server.resourcemanager.recovery.StoreFactory; +import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStoreFactory; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState; import org.apache.hadoop.yarn.service.Service.STATE; import org.apache.hadoop.yarn.util.BuilderUtils; @@ -85,7 +85,7 @@ public class TestApplicationACLs { @BeforeClass public static void setup() throws InterruptedException, IOException { - RMStateStore store = StoreFactory.getStore(conf); + RMStateStore store = RMStateStoreFactory.getStore(conf); conf.setBoolean(YarnConfiguration.YARN_ACL_ENABLE, true); AccessControlList adminACL = new AccessControlList(""); adminACL.addGroup(SUPER_GROUP); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java new file mode 100644 index 00000000000..d4f97380c3d --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java @@ -0,0 +1,308 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.util.ExitUtil; +import org.apache.hadoop.yarn.api.records.AMResponse; +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.Container; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ContainerState; +import org.apache.hadoop.yarn.api.records.ResourceRequest; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.server.api.records.HeartbeatResponse; +import org.apache.hadoop.yarn.server.api.records.NodeAction; +import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore; +import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.ApplicationAttemptState; +import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.ApplicationState; +import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; +import org.apache.hadoop.yarn.util.BuilderUtils; +import org.apache.log4j.Level; +import org.apache.log4j.LogManager; +import org.apache.log4j.Logger; +import org.junit.Assert; +import org.junit.Test; + +public class TestRMRestart { + + @Test + public void testRMRestart() throws Exception { + Logger rootLogger = LogManager.getRootLogger(); + rootLogger.setLevel(Level.DEBUG); + ExitUtil.disableSystemExit(); + + YarnConfiguration conf = new YarnConfiguration(); + conf.set(YarnConfiguration.RECOVERY_ENABLED, "true"); + conf.set(YarnConfiguration.RM_STORE, + "org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore"); + conf.set(YarnConfiguration.RM_SCHEDULER, + "org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler"); + + MemoryRMStateStore memStore = new MemoryRMStateStore(); + memStore.init(conf); + RMState rmState = memStore.getState(); + Map rmAppState = + rmState.getApplicationState(); + + + // PHASE 1: create state in an RM + + // start RM + MockRM rm1 = new MockRM(conf, memStore); + + // start like normal because state is empty + rm1.start(); + + MockNM nm1 = new MockNM("h1:1234", 15120, rm1.getResourceTrackerService()); + MockNM nm2 = new MockNM("h2:5678", 15120, rm1.getResourceTrackerService()); + nm1.registerNode(); + nm2.registerNode(); // nm2 will not heartbeat with RM1 + + // create app that will not be saved because it will finish + RMApp app0 = rm1.submitApp(200); + RMAppAttempt attempt0 = app0.getCurrentAppAttempt(); + // spot check that app is saved + Assert.assertEquals(1, rmAppState.size()); + nm1.nodeHeartbeat(true); + MockAM am0 = rm1.sendAMLaunched(attempt0.getAppAttemptId()); + am0.registerAppAttempt(); + am0.unregisterAppAttempt(); + nm1.nodeHeartbeat(attempt0.getAppAttemptId(), 1, ContainerState.COMPLETE); + am0.waitForState(RMAppAttemptState.FINISHED); + rm1.waitForState(app0.getApplicationId(), RMAppState.FINISHED); + + // spot check that app is not saved anymore + Assert.assertEquals(0, rmAppState.size()); + + // create app that gets launched and does allocate before RM restart + RMApp app1 = rm1.submitApp(200); + // assert app1 info is saved + ApplicationState appState = rmAppState.get(app1.getApplicationId()); + Assert.assertNotNull(appState); + Assert.assertEquals(0, appState.getAttemptCount()); + Assert.assertEquals(appState.getApplicationSubmissionContext() + .getApplicationId(), app1.getApplicationSubmissionContext() + .getApplicationId()); + + //kick the scheduling to allocate AM container + nm1.nodeHeartbeat(true); + + // assert app1 attempt is saved + RMAppAttempt attempt1 = app1.getCurrentAppAttempt(); + ApplicationAttemptId attemptId1 = attempt1.getAppAttemptId(); + rm1.waitForState(attemptId1, RMAppAttemptState.ALLOCATED); + Assert.assertEquals(1, appState.getAttemptCount()); + ApplicationAttemptState attemptState = + appState.getAttempt(attemptId1); + Assert.assertNotNull(attemptState); + Assert.assertEquals(BuilderUtils.newContainerId(attemptId1, 1), + attemptState.getMasterContainer().getId()); + + // launch the AM + MockAM am1 = rm1.sendAMLaunched(attempt1.getAppAttemptId()); + am1.registerAppAttempt(); + + // AM request for containers + am1.allocate("h1" , 1000, 1, new ArrayList()); + // kick the scheduler + nm1.nodeHeartbeat(true); + List conts = am1.allocate(new ArrayList(), + new ArrayList()).getAllocatedContainers(); + while (conts.size() == 0) { + nm1.nodeHeartbeat(true); + conts.addAll(am1.allocate(new ArrayList(), + new ArrayList()).getAllocatedContainers()); + Thread.sleep(500); + } + + // create app that does not get launched by RM before RM restart + RMApp app2 = rm1.submitApp(200); + + // assert app2 info is saved + appState = rmAppState.get(app2.getApplicationId()); + Assert.assertNotNull(appState); + Assert.assertEquals(0, appState.getAttemptCount()); + Assert.assertEquals(appState.getApplicationSubmissionContext() + .getApplicationId(), app2.getApplicationSubmissionContext() + .getApplicationId()); + + // create unmanaged app + RMApp appUnmanaged = rm1.submitApp(200, "", "", null, true); + ApplicationAttemptId unmanagedAttemptId = + appUnmanaged.getCurrentAppAttempt().getAppAttemptId(); + // assert appUnmanaged info is saved + ApplicationId unmanagedAppId = appUnmanaged.getApplicationId(); + appState = rmAppState.get(unmanagedAppId); + Assert.assertNotNull(appState); + // wait for attempt to reach LAUNCHED state + rm1.waitForState(unmanagedAttemptId, RMAppAttemptState.LAUNCHED); + rm1.waitForState(unmanagedAppId, RMAppState.ACCEPTED); + // assert unmanaged attempt info is saved + Assert.assertEquals(1, appState.getAttemptCount()); + Assert.assertEquals(appState.getApplicationSubmissionContext() + .getApplicationId(), appUnmanaged.getApplicationSubmissionContext() + .getApplicationId()); + + + // PHASE 2: create new RM and start from old state + + // create new RM to represent restart and recover state + MockRM rm2 = new MockRM(conf, memStore); + + // start new RM + rm2.start(); + + // change NM to point to new RM + nm1.setResourceTrackerService(rm2.getResourceTrackerService()); + nm2.setResourceTrackerService(rm2.getResourceTrackerService()); + + // verify load of old state + // only 2 apps are loaded since unmanaged app is not loaded back since it + // cannot be restarted by the RM this will change with work preserving RM + // restart in which AMs/NMs are not rebooted + Assert.assertEquals(2, rm2.getRMContext().getRMApps().size()); + + // verify correct number of attempts and other data + RMApp loadedApp1 = rm2.getRMContext().getRMApps().get(app1.getApplicationId()); + Assert.assertNotNull(loadedApp1); + //Assert.assertEquals(1, loadedApp1.getAppAttempts().size()); + Assert.assertEquals(app1.getApplicationSubmissionContext() + .getApplicationId(), loadedApp1.getApplicationSubmissionContext() + .getApplicationId()); + + RMApp loadedApp2 = rm2.getRMContext().getRMApps().get(app2.getApplicationId()); + Assert.assertNotNull(loadedApp2); + //Assert.assertEquals(0, loadedApp2.getAppAttempts().size()); + Assert.assertEquals(app2.getApplicationSubmissionContext() + .getApplicationId(), loadedApp2.getApplicationSubmissionContext() + .getApplicationId()); + + // verify state machine kicked into expected states + rm2.waitForState(loadedApp1.getApplicationId(), RMAppState.ACCEPTED); + rm2.waitForState(loadedApp2.getApplicationId(), RMAppState.ACCEPTED); + + // verify new attempts created + Assert.assertEquals(2, loadedApp1.getAppAttempts().size()); + Assert.assertEquals(1, loadedApp2.getAppAttempts().size()); + + // verify old AM is not accepted + // change running AM to talk to new RM + am1.setAMRMProtocol(rm2.getApplicationMasterService()); + AMResponse amResponse = am1.allocate(new ArrayList(), + new ArrayList()); + Assert.assertTrue(amResponse.getReboot()); + + // NM should be rebooted on heartbeat, even first heartbeat for nm2 + HeartbeatResponse hbResponse = nm1.nodeHeartbeat(true); + Assert.assertEquals(NodeAction.REBOOT, hbResponse.getNodeAction()); + hbResponse = nm2.nodeHeartbeat(true); + Assert.assertEquals(NodeAction.REBOOT, hbResponse.getNodeAction()); + + // new NM to represent NM re-register + nm1 = rm2.registerNode("h1:1234", 15120); + nm2 = rm2.registerNode("h2:5678", 15120); + + // verify no more reboot response sent + hbResponse = nm1.nodeHeartbeat(true); + Assert.assertTrue(NodeAction.REBOOT != hbResponse.getNodeAction()); + hbResponse = nm2.nodeHeartbeat(true); + Assert.assertTrue(NodeAction.REBOOT != hbResponse.getNodeAction()); + + // assert app1 attempt is saved + attempt1 = loadedApp1.getCurrentAppAttempt(); + attemptId1 = attempt1.getAppAttemptId(); + rm2.waitForState(attemptId1, RMAppAttemptState.ALLOCATED); + appState = rmAppState.get(loadedApp1.getApplicationId()); + attemptState = appState.getAttempt(attemptId1); + Assert.assertNotNull(attemptState); + Assert.assertEquals(BuilderUtils.newContainerId(attemptId1, 1), + attemptState.getMasterContainer().getId()); + + // Nodes on which the AM's run + MockNM am1Node = nm1; + if(attemptState.getMasterContainer().getNodeId().toString().contains("h2")){ + am1Node = nm2; + } + + // assert app2 attempt is saved + RMAppAttempt attempt2 = loadedApp2.getCurrentAppAttempt(); + ApplicationAttemptId attemptId2 = attempt2.getAppAttemptId(); + rm2.waitForState(attemptId2, RMAppAttemptState.ALLOCATED); + appState = rmAppState.get(loadedApp2.getApplicationId()); + attemptState = appState.getAttempt(attemptId2); + Assert.assertNotNull(attemptState); + Assert.assertEquals(BuilderUtils.newContainerId(attemptId2, 1), + attemptState.getMasterContainer().getId()); + + MockNM am2Node = nm1; + if(attemptState.getMasterContainer().getNodeId().toString().contains("h2")){ + am2Node = nm2; + } + + // start the AM's + am1 = rm2.sendAMLaunched(attempt1.getAppAttemptId()); + am1.registerAppAttempt(); + + MockAM am2 = rm2.sendAMLaunched(attempt2.getAppAttemptId()); + am2.registerAppAttempt(); + + //request for containers + am1.allocate("h1" , 1000, 3, new ArrayList()); + am2.allocate("h2" , 1000, 1, new ArrayList()); + + // verify container allocate continues to work + nm1.nodeHeartbeat(true); + nm2.nodeHeartbeat(true); + conts = am1.allocate(new ArrayList(), + new ArrayList()).getAllocatedContainers(); + while (conts.size() == 0) { + nm1.nodeHeartbeat(true); + nm2.nodeHeartbeat(true); + conts.addAll(am1.allocate(new ArrayList(), + new ArrayList()).getAllocatedContainers()); + Thread.sleep(500); + } + + // finish the AM's + am1.unregisterAppAttempt(); + am1Node.nodeHeartbeat(attempt1.getAppAttemptId(), 1, ContainerState.COMPLETE); + am1.waitForState(RMAppAttemptState.FINISHED); + + am2.unregisterAppAttempt(); + am2Node.nodeHeartbeat(attempt2.getAppAttemptId(), 1, ContainerState.COMPLETE); + am2.waitForState(RMAppAttemptState.FINISHED); + + // stop RM's + rm2.stop(); + rm1.stop(); + + // completed apps should be removed + Assert.assertEquals(0, rmAppState.size()); + } + +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceManager.java index 9ae5cbae0f7..65743c36eff 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceManager.java @@ -31,8 +31,6 @@ import org.apache.hadoop.yarn.api.records.NodeHealthStatus; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.conf.YarnConfiguration; -import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore; -import org.apache.hadoop.yarn.server.resourcemanager.recovery.StoreFactory; import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.junit.After; @@ -47,8 +45,7 @@ public class TestResourceManager { @Before public void setUp() throws Exception { Configuration conf = new YarnConfiguration(); - RMStateStore store = StoreFactory.getStore(conf); - resourceManager = new ResourceManager(store); + resourceManager = new ResourceManager(); resourceManager.init(conf); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/MockAsm.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/MockAsm.java index 2b9c8485c56..088eca9a2b4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/MockAsm.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/MockAsm.java @@ -28,6 +28,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationMaster; import org.apache.hadoop.yarn.api.records.ApplicationReport; import org.apache.hadoop.yarn.api.records.ApplicationStatus; +import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; @@ -152,6 +153,11 @@ public abstract class MockAsm extends MockApps { throw new UnsupportedOperationException("Not supported yet."); } + @Override + public ApplicationSubmissionContext getApplicationSubmissionContext() { + throw new UnsupportedOperationException("Not supported yet."); + } + @Override public String getName() { throw new UnsupportedOperationException("Not supported yet."); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestASMStateMachine.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestASMStateMachine.java index f866694c1b5..d1d45d09f3b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestASMStateMachine.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestASMStateMachine.java @@ -35,7 +35,7 @@ import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl; import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEventType; -import org.apache.hadoop.yarn.server.resourcemanager.recovery.StoreFactory; +import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStoreFactory; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AMLivelinessMonitor; import org.junit.After; import org.junit.Before; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestSchedulerNegotiator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestSchedulerNegotiator.java index 296ca7388d0..add00db378e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestSchedulerNegotiator.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestSchedulerNegotiator.java @@ -44,7 +44,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl; import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEventType; import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState; -import org.apache.hadoop.yarn.server.resourcemanager.recovery.StoreFactory; +import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStoreFactory; import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AMLivelinessMonitor; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/MockRMApp.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/MockRMApp.java index 2b1b892e189..142cc7e9e1e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/MockRMApp.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/MockRMApp.java @@ -24,9 +24,11 @@ import java.util.LinkedHashMap; import java.util.Map; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationReport; +import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationSubmissionContextPBImpl; import org.apache.hadoop.yarn.MockApps; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; @@ -67,6 +69,11 @@ public class MockRMApp implements RMApp { public ApplicationId getApplicationId() { return id; } + + @Override + public ApplicationSubmissionContext getApplicationSubmissionContext() { + return new ApplicationSubmissionContextPBImpl(); + } @Override public RMAppState getState() { @@ -118,7 +125,9 @@ public class MockRMApp implements RMApp { public Map getAppAttempts() { Map attempts = new LinkedHashMap(); - attempts.put(attempt.getAppAttemptId(), attempt); + if(attempt != null) { + attempts.put(attempt.getAppAttemptId(), attempt); + } return attempts; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java index 5c766c55cd4..f944744f2d2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java @@ -53,6 +53,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl; import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEvent; import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEventType; import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.ApplicationMasterLauncher; +import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore; import org.apache.hadoop.yarn.server.resourcemanager.resourcetracker.InlineDispatcher; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent; @@ -65,6 +66,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAt import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptLaunchFailedEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptRegistrationEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptRejectedEvent; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptStoredEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptUnregistrationEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.ContainerAllocationExpirer; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation; @@ -167,6 +169,9 @@ public class TestRMAppAttemptTransitions { new RMContainerTokenSecretManager(conf), new ClientToAMTokenSecretManagerInRM()); + RMStateStore store = mock(RMStateStore.class); + ((RMContextImpl) rmContext).setStateStore(store); + scheduler = mock(YarnScheduler.class); masterService = mock(ApplicationMasterService.class); applicationMasterLauncher = mock(ApplicationMasterLauncher.class); @@ -295,6 +300,14 @@ public class TestRMAppAttemptTransitions { assertEquals(0, applicationAttempt.getRanNodes().size()); assertNull(applicationAttempt.getFinalApplicationStatus()); } + + /** + * {@link RMAppAttemptState#RECOVERED} + */ + private void testAppAttemptRecoveredState() { + assertEquals(RMAppAttemptState.RECOVERED, + applicationAttempt.getAppAttemptState()); + } /** * {@link RMAppAttemptState#SCHEDULED} @@ -438,6 +451,15 @@ public class TestRMAppAttemptTransitions { new RMAppAttemptEvent( applicationAttempt.getAppAttemptId(), RMAppAttemptEventType.APP_ACCEPTED)); + + if(unmanagedAM){ + assertEquals(RMAppAttemptState.LAUNCHED_UNMANAGED_SAVING, + applicationAttempt.getAppAttemptState()); + applicationAttempt.handle( + new RMAppAttemptStoredEvent( + applicationAttempt.getAppAttemptId(), null)); + } + testAppAttemptScheduledState(); } @@ -463,6 +485,12 @@ public class TestRMAppAttemptTransitions { applicationAttempt.getAppAttemptId(), container)); + assertEquals(RMAppAttemptState.ALLOCATED_SAVING, + applicationAttempt.getAppAttemptState()); + applicationAttempt.handle( + new RMAppAttemptStoredEvent( + applicationAttempt.getAppAttemptId(), null)); + testAppAttemptAllocatedState(container); return container; @@ -554,6 +582,15 @@ public class TestRMAppAttemptTransitions { testAppAttemptKilledState(null, EMPTY_DIAGNOSTICS); } + @Test + public void testNewToRecovered() { + applicationAttempt.handle( + new RMAppAttemptEvent( + applicationAttempt.getAppAttemptId(), + RMAppAttemptEventType.RECOVER)); + testAppAttemptRecoveredState(); + } + @Test public void testSubmittedToFailed() { submitApplicationAttempt(); @@ -604,7 +641,7 @@ public class TestRMAppAttemptTransitions { diagnostics)); testAppAttemptFailedState(amContainer, diagnostics); } - + @Test public void testRunningToFailed() { Container amContainer = allocateApplicationAttempt(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java index b4cc6b38377..04b365111ea 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java @@ -27,7 +27,6 @@ import junit.framework.Assert; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.net.NetworkTopology; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.QueueInfo; @@ -40,8 +39,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.MockNodes; import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl; import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; import org.apache.hadoop.yarn.server.resourcemanager.Task; -import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore; -import org.apache.hadoop.yarn.server.resourcemanager.recovery.StoreFactory; import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; @@ -76,8 +73,7 @@ public class TestCapacityScheduler { @Before public void setUp() throws Exception { - RMStateStore store = StoreFactory.getStore(new Configuration()); - resourceManager = new ResourceManager(store); + resourceManager = new ResourceManager(); CapacitySchedulerConfiguration csConf = new CapacitySchedulerConfiguration(); setupQueueConfiguration(csConf); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFSLeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFSLeafQueue.java index cbad1564ffb..38ca88f23ec 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFSLeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFSLeafQueue.java @@ -29,8 +29,6 @@ import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.AsyncDispatcher; import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; -import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore; -import org.apache.hadoop.yarn.server.resourcemanager.recovery.StoreFactory; import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; import org.junit.Before; @@ -47,8 +45,7 @@ public class TestFSLeafQueue { Configuration conf = createConfiguration(); // All tests assume only one assignment per node update conf.set(FairSchedulerConfiguration.ASSIGN_MULTIPLE, "false"); - RMStateStore store = StoreFactory.getStore(conf); - ResourceManager resourceManager = new ResourceManager(store); + ResourceManager resourceManager = new ResourceManager(); resourceManager.init(conf); ((AsyncDispatcher)resourceManager.getRMContext().getDispatcher()).start(); scheduler.reinitialize(conf, resourceManager.getRMContext()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java index 56d247ff3bb..e43897ae15f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java @@ -51,8 +51,6 @@ import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.server.resourcemanager.MockNodes; import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; -import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore; -import org.apache.hadoop.yarn.server.resourcemanager.recovery.StoreFactory; import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; @@ -102,8 +100,7 @@ public class TestFairScheduler { Configuration conf = createConfiguration(); // All tests assume only one assignment per node update conf.set(FairSchedulerConfiguration.ASSIGN_MULTIPLE, "false"); - RMStateStore store = StoreFactory.getStore(conf); - resourceManager = new ResourceManager(store); + resourceManager = new ResourceManager(); resourceManager.init(conf); ((AsyncDispatcher)resourceManager.getRMContext().getDispatcher()).start(); scheduler.reinitialize(conf, resourceManager.getRMContext()); @@ -164,6 +161,13 @@ public class TestFairScheduler { scheduler.allocate(id, ask, new ArrayList()); return id; } + + private void createSchedulingRequestExistingApplication(int memory, int priority, ApplicationAttemptId attId) { + List ask = new ArrayList(); + ResourceRequest request = createResourceRequest(memory, "*", priority, 1); + ask.add(request); + scheduler.allocate(attId, ask, new ArrayList()); + } // TESTS @@ -1101,4 +1105,86 @@ public class TestFairScheduler { assertTrue(Resources.equals( Resources.createResource(1536), scheduler.resToPreempt(schedD, clock.getTime()))); } + + @Test + public void testMultipleContainersWaitingForReservation() { + // Add a node + RMNode node1 = MockNodes.newNodeInfo(1, Resources.createResource(1024)); + NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1); + scheduler.handle(nodeEvent1); + + // Request full capacity of node + createSchedulingRequest(1024, "queue1", "user1", 1); + scheduler.update(); + NodeUpdateSchedulerEvent updateEvent = new NodeUpdateSchedulerEvent(node1, + new ArrayList(), new ArrayList()); + scheduler.handle(updateEvent); + + ApplicationAttemptId attId1 = createSchedulingRequest(1024, "queue2", "user2", 1); + ApplicationAttemptId attId2 = createSchedulingRequest(1024, "queue3", "user3", 1); + + scheduler.update(); + scheduler.handle(updateEvent); + + // One container should get reservation and the other should get nothing + assertEquals(1024, + scheduler.applications.get(attId1).getCurrentReservation().getMemory()); + assertEquals(0, + scheduler.applications.get(attId2).getCurrentReservation().getMemory()); + } + + @Test + public void testUserMaxRunningApps() throws Exception { + // Set max running apps + Configuration conf = createConfiguration(); + conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, ALLOC_FILE); + scheduler.reinitialize(conf, resourceManager.getRMContext()); + + PrintWriter out = new PrintWriter(new FileWriter(ALLOC_FILE)); + out.println(""); + out.println(""); + out.println(""); + out.println("1"); + out.println(""); + out.println(""); + out.close(); + + QueueManager queueManager = scheduler.getQueueManager(); + queueManager.initialize(); + + // Add a node + RMNode node1 = MockNodes.newNodeInfo(1, Resources.createResource(8192)); + NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1); + scheduler.handle(nodeEvent1); + + // Request for app 1 + ApplicationAttemptId attId1 = createSchedulingRequest(1024, "queue1", + "user1", 1); + + scheduler.update(); + NodeUpdateSchedulerEvent updateEvent = new NodeUpdateSchedulerEvent(node1, + new ArrayList(), new ArrayList()); + scheduler.handle(updateEvent); + + // App 1 should be running + assertEquals(1, scheduler.applications.get(attId1).getLiveContainers().size()); + + ApplicationAttemptId attId2 = createSchedulingRequest(1024, "queue1", + "user1", 1); + + scheduler.update(); + scheduler.handle(updateEvent); + + // App 2 should not be running + assertEquals(0, scheduler.applications.get(attId2).getLiveContainers().size()); + + // Request another container for app 1 + createSchedulingRequestExistingApplication(1024, 1, attId1); + + scheduler.update(); + scheduler.handle(updateEvent); + + // Request should be fulfilled + assertEquals(2, scheduler.applications.get(attId1).getLiveContainers().size()); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairSchedulerEventLog.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairSchedulerEventLog.java index 78f0145edf1..db777954ca7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairSchedulerEventLog.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairSchedulerEventLog.java @@ -27,8 +27,6 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.AsyncDispatcher; import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; -import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore; -import org.apache.hadoop.yarn.server.resourcemanager.recovery.StoreFactory; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; import org.junit.After; import org.junit.Before; @@ -50,8 +48,7 @@ public class TestFairSchedulerEventLog { // All tests assume only one assignment per node update conf.set(FairSchedulerConfiguration.ASSIGN_MULTIPLE, "false"); - RMStateStore store = StoreFactory.getStore(conf); - resourceManager = new ResourceManager(store); + resourceManager = new ResourceManager(); resourceManager.init(conf); ((AsyncDispatcher)resourceManager.getRMContext().getDispatcher()).start(); scheduler.reinitialize(conf, resourceManager.getRMContext()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java index 53251191190..b18a28957e4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java @@ -38,8 +38,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl; import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; import org.apache.hadoop.yarn.server.resourcemanager.Task; -import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore; -import org.apache.hadoop.yarn.server.resourcemanager.recovery.StoreFactory; import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources; import org.apache.hadoop.yarn.server.resourcemanager.resourcetracker.InlineDispatcher; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; @@ -59,8 +57,7 @@ public class TestFifoScheduler { @Before public void setUp() throws Exception { - RMStateStore store = StoreFactory.getStore(new Configuration()); - resourceManager = new ResourceManager(store); + resourceManager = new ResourceManager(); Configuration conf = new Configuration(); conf.setClass(YarnConfiguration.RM_SCHEDULER, FifoScheduler.class, ResourceScheduler.class); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/security/TestDelegationTokenRenewer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/security/TestDelegationTokenRenewer.java index 1c3614e46df..ad127a9264d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/security/TestDelegationTokenRenewer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/security/TestDelegationTokenRenewer.java @@ -357,6 +357,27 @@ public class TestDelegationTokenRenewer { } } + @Test + public void testInvalidDTWithAddApplication() throws Exception { + MyFS dfs = (MyFS)FileSystem.get(conf); + LOG.info("dfs="+(Object)dfs.hashCode() + ";conf="+conf.hashCode()); + + MyToken token = dfs.getDelegationToken(new Text("user1")); + token.cancelToken(); + + Credentials ts = new Credentials(); + ts.addToken(token.getKind(), token); + + // register the tokens for renewal + ApplicationId appId = BuilderUtils.newApplicationId(0, 0); + try { + delegationTokenRenewer.addApplication(appId, ts, true); + fail("App submission with a cancelled token should have failed"); + } catch (InvalidToken e) { + // expected + } + } + /** * Basic idea of the test: * 1. register a token for 2 seconds with no cancel at the end diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/MiniYARNCluster.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/MiniYARNCluster.java index 1c4f2cfb72b..1bb4dea0dde 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/MiniYARNCluster.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/MiniYARNCluster.java @@ -48,8 +48,6 @@ import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdater; import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdaterImpl; import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; import org.apache.hadoop.yarn.server.resourcemanager.ResourceTrackerService; -import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore; -import org.apache.hadoop.yarn.server.resourcemanager.recovery.StoreFactory; import org.apache.hadoop.yarn.service.AbstractService; import org.apache.hadoop.yarn.service.CompositeService; @@ -154,8 +152,7 @@ public class MiniYARNCluster extends CompositeService { getConfig().set(YarnConfiguration.RM_WEBAPP_ADDRESS, MiniYARNCluster.getHostname() + ":0"); } - RMStateStore store = StoreFactory.getStore(getConfig()); - resourceManager = new ResourceManager(store) { + resourceManager = new ResourceManager() { @Override protected void doSecureLogin() throws IOException { // Don't try to login using keytab in the testcase. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestRMNMSecretKeys.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestRMNMSecretKeys.java index 9b6024ce3c0..7bd1ff2032d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestRMNMSecretKeys.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestRMNMSecretKeys.java @@ -47,7 +47,7 @@ public class TestRMNMSecretKeys { // intervene final DrainDispatcher dispatcher = new DrainDispatcher(); - ResourceManager rm = new ResourceManager(null) { + ResourceManager rm = new ResourceManager() { @Override protected void doSecureLogin() throws IOException { // Do nothing. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/main/java/org/apache/hadoop/yarn/server/webproxy/ProxyUriUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/main/java/org/apache/hadoop/yarn/server/webproxy/ProxyUriUtils.java index 7545fc0d6e3..aee2cc81569 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/main/java/org/apache/hadoop/yarn/server/webproxy/ProxyUriUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/main/java/org/apache/hadoop/yarn/server/webproxy/ProxyUriUtils.java @@ -24,11 +24,13 @@ import java.io.UnsupportedEncodingException; import java.net.URI; import java.net.URISyntaxException; import java.net.URLEncoder; +import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.http.HttpConfig; import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.util.TrackingUriPlugin; public class ProxyUriUtils { @SuppressWarnings("unused") @@ -143,4 +145,28 @@ public class ProxyUriUtils { throws URISyntaxException { return new URI(HttpConfig.getSchemePrefix() + noSchemeUrl); } + + /** + * Returns the first valid tracking link, if any, from the given id from the + * given list of plug-ins, if any. + * + * @param id the id of the application for which the tracking link is desired + * @param trackingUriPlugins list of plugins from which to get the tracking link + * @return the desired link if possible, otherwise null + * @throws URISyntaxException + */ + public static URI getUriFromTrackingPlugins(ApplicationId id, + List trackingUriPlugins) + throws URISyntaxException { + URI toRet = null; + for(TrackingUriPlugin plugin : trackingUriPlugins) + { + toRet = plugin.getTrackingUri(id); + if (toRet != null) + { + return toRet; + } + } + return null; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/main/java/org/apache/hadoop/yarn/server/webproxy/WebAppProxyServlet.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/main/java/org/apache/hadoop/yarn/server/webproxy/WebAppProxyServlet.java index 93f6d0d2525..3e43351b4ba 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/main/java/org/apache/hadoop/yarn/server/webproxy/WebAppProxyServlet.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/main/java/org/apache/hadoop/yarn/server/webproxy/WebAppProxyServlet.java @@ -30,6 +30,7 @@ import java.util.Arrays; import java.util.EnumSet; import java.util.Enumeration; import java.util.HashSet; +import java.util.List; import javax.servlet.http.Cookie; import javax.servlet.http.HttpServlet; @@ -48,8 +49,10 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationReport; +import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.util.Apps; import org.apache.hadoop.yarn.util.StringHelper; +import org.apache.hadoop.yarn.util.TrackingUriPlugin; import org.apache.hadoop.yarn.webapp.MimeType; import org.apache.hadoop.yarn.webapp.hamlet.Hamlet; @@ -61,8 +64,9 @@ public class WebAppProxyServlet extends HttpServlet { "Accept-Language", "Accept-Charset")); public static final String PROXY_USER_COOKIE_NAME = "proxy-user"; - - + + private final List trackingUriPlugins; + private static class _ implements Hamlet._ { //Empty } @@ -77,6 +81,18 @@ public class WebAppProxyServlet extends HttpServlet { } } + /** + * Default constructor + */ + public WebAppProxyServlet() + { + super(); + YarnConfiguration conf = new YarnConfiguration(); + this.trackingUriPlugins = + conf.getInstances(YarnConfiguration.YARN_TRACKING_URL_GENERATOR, + TrackingUriPlugin.class); + } + /** * Output 404 with appropriate message. * @param resp the http response. @@ -253,6 +269,16 @@ public class WebAppProxyServlet extends HttpServlet { if(applicationReport == null) { LOG.warn(req.getRemoteUser()+" Attempting to access "+id+ " that was not found"); + + URI toFetch = + ProxyUriUtils + .getUriFromTrackingPlugins(id, this.trackingUriPlugins); + if (toFetch != null) + { + resp.sendRedirect(resp.encodeRedirectURL(toFetch.toString())); + return; + } + notFound(resp, "Application "+appId+" could not be found, " + "please try the history server"); return; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/test/java/org/apache/hadoop/yarn/server/webproxy/TestProxyUriUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/test/java/org/apache/hadoop/yarn/server/webproxy/TestProxyUriUtils.java index 16ee7bededd..ef3c4a72557 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/test/java/org/apache/hadoop/yarn/server/webproxy/TestProxyUriUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/test/java/org/apache/hadoop/yarn/server/webproxy/TestProxyUriUtils.java @@ -21,11 +21,16 @@ package org.apache.hadoop.yarn.server.webproxy; import static org.junit.Assert.*; import java.net.URI; +import java.net.URISyntaxException; +import java.util.List; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.util.BuilderUtils; +import org.apache.hadoop.yarn.util.TrackingUriPlugin; import org.junit.Test; +import com.google.common.collect.Lists; + public class TestProxyUriUtils { @Test public void testGetPathApplicationId() { @@ -83,4 +88,36 @@ public class TestProxyUriUtils { URI result = ProxyUriUtils.getProxyUri(originalUri, proxyUri, id); assertEquals(expected, result); } + + @Test + public void testGetProxyUriFromPluginsReturnsNullIfNoPlugins() + throws URISyntaxException { + ApplicationId id = BuilderUtils.newApplicationId(6384623l, 5); + List list = + Lists.newArrayListWithExpectedSize(0); + assertNull(ProxyUriUtils.getUriFromTrackingPlugins(id, list)); + } + + @Test + public void testGetProxyUriFromPluginsReturnsValidUriWhenAble() + throws URISyntaxException { + ApplicationId id = BuilderUtils.newApplicationId(6384623l, 5); + List list = + Lists.newArrayListWithExpectedSize(2); + // Insert a plugin that returns null. + list.add(new TrackingUriPlugin() { + public URI getTrackingUri(ApplicationId id) throws URISyntaxException { + return null; + } + }); + // Insert a plugin that returns a valid URI. + list.add(new TrackingUriPlugin() { + public URI getTrackingUri(ApplicationId id) throws URISyntaxException { + return new URI("http://history.server.net/"); + } + }); + URI result = ProxyUriUtils.getUriFromTrackingPlugins(id, list); + assertNotNull(result); + + } }