Merging r1523402 through r1523803 from trunk to branch HDFS-2832

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-2832@1523804 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Arpit Agarwal 2013-09-16 20:59:02 +00:00
commit 3e3a454bd2
49 changed files with 580 additions and 33 deletions

View File

@ -408,6 +408,9 @@ Release 2.1.1-beta - UNRELEASED
HADOOP-9945. HAServiceState should have a state for stopped services.
(Karthik Kambatla via atm)
HADOOP-9962. in order to avoid dependency divergence within Hadoop itself
lets enable DependencyConvergence. (rvs via tucu)
OPTIMIZATIONS
BUG FIXES
@ -468,6 +471,11 @@ Release 2.1.1-beta - UNRELEASED
HADOOP-9350. Hadoop not building against Java7 on OSX
(Robert Kanter via stevel)
HADOOP-9935. set junit dependency to test scope. (André Kelpe via cnauroth)
HADOOP-9961. versions of a few transitive dependencies diverged between hadoop
subprojects. (rvs via tucu)
Release 2.1.0-beta - 2013-08-22
INCOMPATIBLE CHANGES

View File

@ -250,7 +250,6 @@
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-compress</artifactId>
<version>1.4</version>
</dependency>
</dependencies>

View File

@ -929,6 +929,8 @@ KVNO Timestamp Principal
*-------------------------+-------------------------+------------------------+
| <<<banned.users>>> | hfds,yarn,mapred,bin | Banned users. |
*-------------------------+-------------------------+------------------------+
| <<<allowed.system.users>>> | foo,bar | Allowed system users. |
*-------------------------+-------------------------+------------------------+
| <<<min.user.id>>> | 1000 | Prevent other super-users. |
*-------------------------+-------------------------+------------------------+

View File

@ -86,13 +86,11 @@
<dependency>
<groupId>io.netty</groupId>
<artifactId>netty</artifactId>
<version>3.6.2.Final</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>11.0.2</version>
</dependency>
</dependencies>

View File

@ -49,7 +49,6 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd">
<dependency>
<groupId>io.netty</groupId>
<artifactId>netty</artifactId>
<version>3.6.2.Final</version>
<scope>compile</scope>
</dependency>
<dependency>

View File

@ -36,7 +36,25 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd">
<hadoop.common.build.dir>${basedir}/../../../../../hadoop-common-project/hadoop-common/target</hadoop.common.build.dir>
</properties>
<dependencyManagement>
<dependencies>
<!-- This is a really old version of netty, that gets privatized
via shading and hence it is not managed via a parent pom -->
<dependency>
<groupId>org.jboss.netty</groupId>
<artifactId>netty</artifactId>
<version>3.2.4.Final</version>
</dependency>
</dependencies>
</dependencyManagement>
<dependencies>
<dependency>
<groupId>org.jboss.netty</groupId>
<artifactId>netty</artifactId>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>

View File

@ -190,6 +190,8 @@ Release 2.1.1-beta - UNRELEASED
but just before ClientService to avoid race conditions during RM restart.
(Jian He via vinodkv)
MAPREDUCE-5379. Include token tracking ids in jobconf. (kkambatl via tucu)
OPTIMIZATIONS
MAPREDUCE-5446. TestJobHistoryEvents and TestJobHistoryParsing have race
@ -265,6 +267,9 @@ Release 2.1.1-beta - UNRELEASED
MAPREDUCE-5164. mapred job and queue commands omit HADOOP_CLIENT_OPTS
(Nemon Lou via devaraj)
MAPREDUCE-5493. Cleanup in-memory & on-disk segments to prevent leak on
shuffle completion. (jlowe via acmurthy)
Release 2.1.0-beta - 2013-08-22
INCOMPATIBLE CHANGES

View File

@ -19,6 +19,7 @@
option java_package = "org.apache.hadoop.yarn.proto";
option java_outer_classname = "MRClientProtocol";
option java_generic_services = true;
package hadoop.mapreduce;
import "Security.proto";
import "mr_service_protos.proto";

View File

@ -20,6 +20,7 @@ option java_package = "org.apache.hadoop.mapreduce.v2.proto";
option java_outer_classname = "MRProtos";
option java_generic_services = true;
option java_generate_equals_and_hash = true;
package hadoop.mapreduce;
import "yarn_protos.proto";
@ -29,7 +30,7 @@ enum TaskTypeProto {
}
message JobIdProto {
optional ApplicationIdProto app_id = 1;
optional hadoop.yarn.ApplicationIdProto app_id = 1;
optional int32 id = 2;
}
@ -115,7 +116,7 @@ message TaskAttemptReportProto {
optional string node_manager_host = 12;
optional int32 node_manager_port = 13;
optional int32 node_manager_http_port = 14;
optional ContainerIdProto container_id = 15;
optional hadoop.yarn.ContainerIdProto container_id = 15;
}
enum JobStateProto {
@ -148,9 +149,9 @@ message JobReportProto {
}
message AMInfoProto {
optional ApplicationAttemptIdProto application_attempt_id = 1;
optional hadoop.yarn.ApplicationAttemptIdProto application_attempt_id = 1;
optional int64 start_time = 2;
optional ContainerIdProto container_id = 3;
optional hadoop.yarn.ContainerIdProto container_id = 3;
optional string node_manager_host = 4;
optional int32 node_manager_port = 5;
optional int32 node_manager_http_port = 6;

View File

@ -20,6 +20,7 @@ option java_package = "org.apache.hadoop.mapreduce.v2.proto";
option java_outer_classname = "MRServiceProtos";
option java_generic_services = true;
option java_generate_equals_and_hash = true;
package hadoop.mapreduce;
import "Security.proto";
import "mr_protos.proto";

View File

@ -24,6 +24,7 @@
import java.net.URISyntaxException;
import java.net.UnknownHostException;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
@ -56,6 +57,7 @@
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.authorize.AccessControlList;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.security.token.TokenIdentifier;
import org.apache.hadoop.util.ReflectionUtils;
import org.codehaus.jackson.JsonParseException;
import org.codehaus.jackson.map.JsonMappingException;
@ -405,6 +407,19 @@ JobStatus submitJobInternal(Job job, Cluster cluster)
// different job.
TokenCache.cleanUpTokenReferral(conf);
if (conf.getBoolean(
MRJobConfig.JOB_TOKEN_TRACKING_IDS_ENABLED,
MRJobConfig.DEFAULT_JOB_TOKEN_TRACKING_IDS_ENABLED)) {
// Add HDFS tracking ids
ArrayList<String> trackingIds = new ArrayList<String>();
for (Token<? extends TokenIdentifier> t :
job.getCredentials().getAllTokens()) {
trackingIds.add(t.decodeIdentifier().getTrackingId());
}
conf.setStrings(MRJobConfig.JOB_TOKEN_TRACKING_IDS,
trackingIds.toArray(new String[trackingIds.size()]));
}
// Write job file to submit dir
writeConf(conf, submitJobFile);

View File

@ -313,6 +313,13 @@ public interface MRJobConfig {
public static final String MAPREDUCE_JOB_CREDENTIALS_BINARY =
"mapreduce.job.credentials.binary";
/* Configs for tracking ids of tokens used by a job */
public static final String JOB_TOKEN_TRACKING_IDS_ENABLED =
"mapreduce.job.token.tracking.ids.enabled";
public static final boolean DEFAULT_JOB_TOKEN_TRACKING_IDS_ENABLED = false;
public static final String JOB_TOKEN_TRACKING_IDS =
"mapreduce.job.token.tracking.ids";
public static final String JOB_SUBMITHOST =
"mapreduce.job.submithostname";
public static final String JOB_SUBMITHOSTADDR =

View File

@ -355,8 +355,11 @@ public RawKeyValueIterator close() throws Throwable {
List<InMemoryMapOutput<K, V>> memory =
new ArrayList<InMemoryMapOutput<K, V>>(inMemoryMergedMapOutputs);
inMemoryMergedMapOutputs.clear();
memory.addAll(inMemoryMapOutputs);
inMemoryMapOutputs.clear();
List<CompressAwarePath> disk = new ArrayList<CompressAwarePath>(onDiskMapOutputs);
onDiskMapOutputs.clear();
return finalMerge(jobConf, rfs, memory, disk);
}

View File

@ -748,6 +748,23 @@
</description>
</property>
<property>
<name>mapreduce.job.token.tracking.ids.enabled</name>
<value>false</value>
<description>Whether to write tracking ids of tokens to
job-conf. When true, the configuration property
"mapreduce.job.token.tracking.ids" is set to the token-tracking-ids of
the job</description>
</property>
<property>
<name>mapreduce.job.token.tracking.ids</name>
<value></value>
<description>When mapreduce.job.token.tracking.ids.enabled is
set to true, this is set by the framework to the
token-tracking-ids used by the job.</description>
</property>
<property>
<name>mapreduce.task.merge.progress.records</name>
<value>10000</value>

View File

@ -82,7 +82,7 @@ public void cleanup() throws IOException {
}
@Test
public void testInMemoryMerger() throws IOException {
public void testInMemoryMerger() throws Throwable {
JobID jobId = new JobID("a", 0);
TaskAttemptID reduceId = new TaskAttemptID(
new TaskID(jobId, TaskType.REDUCE, 0), 0);
@ -132,6 +132,11 @@ public void testInMemoryMerger() throws IOException {
readOnDiskMapOutput(conf, fs, outPath, keys, values);
Assert.assertEquals(keys, Arrays.asList("apple", "banana", "carrot"));
Assert.assertEquals(values, Arrays.asList("disgusting", "pretty good", "delicious"));
mergeManager.close();
Assert.assertEquals(0, mergeManager.inMemoryMapOutputs.size());
Assert.assertEquals(0, mergeManager.inMemoryMergedMapOutputs.size());
Assert.assertEquals(0, mergeManager.onDiskMapOutputs.size());
}
private byte[] writeMapOutput(Configuration conf, Map<String, String> keysToValues)

View File

@ -20,6 +20,7 @@ option java_package = "org.apache.hadoop.mapreduce.v2.hs.proto";
option java_outer_classname = "HSAdminRefreshProtocolProtos";
option java_generic_services = true;
option java_generate_equals_and_hash = true;
package hadoop.mapreduce;
/**
* refresh admin acls request.

View File

@ -318,6 +318,11 @@
<artifactId>commons-math</artifactId>
<version>2.1</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-compress</artifactId>
<version>1.4.1</version>
</dependency>
<dependency>
<groupId>xmlenc</groupId>
<artifactId>xmlenc</artifactId>
@ -360,6 +365,23 @@
<version>6.1.26</version>
</dependency>
<dependency>
<groupId>org.glassfish</groupId>
<artifactId>javax.servlet</artifactId>
<version>3.1</version>
</dependency>
<dependency>
<groupId>org.codehaus.plexus</groupId>
<artifactId>plexus-utils</artifactId>
<version>2.0.5</version>
</dependency>
<dependency>
<groupId>org.codehaus.plexus</groupId>
<artifactId>plexus-component-annotations</artifactId>
<version>1.5.5</version>
</dependency>
<dependency>
<groupId>asm</groupId>
<artifactId>asm</artifactId>
@ -420,7 +442,7 @@
<dependency>
<groupId>io.netty</groupId>
<artifactId>netty</artifactId>
<version>3.5.11.Final</version>
<version>3.6.2.Final</version>
</dependency>
<dependency>
@ -544,6 +566,7 @@
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.10</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>commons-lang</groupId>
@ -604,6 +627,7 @@
<groupId>org.mockito</groupId>
<artifactId>mockito-all</artifactId>
<version>1.8.5</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.avro</groupId>
@ -664,6 +688,10 @@
<groupId>com.sun.jmx</groupId>
<artifactId>jmxri</artifactId>
</exclusion>
<exclusion>
<groupId>org.jboss.netty</groupId>
<artifactId>netty</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
@ -672,6 +700,12 @@
<version>3.4.2</version>
<type>test-jar</type>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>org.jboss.netty</groupId>
<artifactId>netty</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.bookkeeper</groupId>
@ -706,7 +740,7 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<version>2.1</version>
<version>2.2</version>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
@ -874,6 +908,26 @@
<includeReports>false</includeReports>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-enforcer-plugin</artifactId>
<executions>
<execution>
<id>depcheck</id>
<configuration>
<rules>
<DependencyConvergence>
<uniqueVersions>true</uniqueVersions>
</DependencyConvergence>
</rules>
</configuration>
<goals>
<goal>enforce</goal>
</goals>
<phase>verify</phase>
</execution>
</executions>
</plugin>
</plugins>
</build>

View File

@ -29,6 +29,7 @@ Release 2.3.0 - UNRELEASED
YARN-905. Add state filters to nodes CLI (Wei Yan via Sandy Ryza)
YARN-1098. Separate out RM services into Always On and Active (Karthik
Kambatla via bikas)
YARN-1027. Implement RMHAProtocolService (Karthik Kambatla via bikas)
OPTIMIZATIONS
@ -44,6 +45,10 @@ Release 2.1.1-beta - UNRELEASED
YARN-707. Added user information also in the YARN ClientToken so that AMs
can implement authorization based on incoming users. (Jason Lowe via vinodkv)
YARN-1170. YARN & MapReduce proto definitions fixed to specify protobuf
package as hadoop.yarn and hadoop.mapreduce respectively. (Binglin Chang
via acmurthy)
NEW FEATURES
IMPROVEMENTS
@ -98,6 +103,9 @@ Release 2.1.1-beta - UNRELEASED
completions in addition to application events. (Alejandro Abdelnur via
vinodkv)
YARN-1137. Add support whitelist for system users to Yarn
container-executor.c. (rvs via tucu)
OPTIMIZATIONS
BUG FIXES

View File

@ -1,3 +1,4 @@
yarn.nodemanager.linux-container-executor.group=#configured value of yarn.nodemanager.linux-container-executor.group
banned.users=#comma separated list of users who can not run applications
min.user.id=1000#Prevent other super-users
allowed.system.users=##comma separated list of system users who CAN run applications

View File

@ -270,6 +270,11 @@ public class YarnConfiguration extends Configuration {
public static final String RECOVERY_ENABLED = RM_PREFIX + "recovery.enabled";
public static final boolean DEFAULT_RM_RECOVERY_ENABLED = false;
/** HA related configs */
public static final String RM_HA_PREFIX = RM_PREFIX + "ha.";
public static final String RM_HA_ENABLED = RM_HA_PREFIX + "enabled";
public static final boolean DEFAULT_RM_HA_ENABLED = false;
/** The class to use as the persistent store.*/
public static final String RM_STORE = RM_PREFIX + "store.class";

View File

@ -26,6 +26,7 @@ option java_package = "org.apache.hadoop.yarn.proto";
option java_outer_classname = "ApplicationClientProtocol";
option java_generic_services = true;
option java_generate_equals_and_hash = true;
package hadoop.yarn;
import "Security.proto";
import "yarn_service_protos.proto";

View File

@ -26,6 +26,7 @@ option java_package = "org.apache.hadoop.yarn.proto";
option java_outer_classname = "ApplicationMasterProtocol";
option java_generic_services = true;
option java_generate_equals_and_hash = true;
package hadoop.yarn;
import "yarn_service_protos.proto";

View File

@ -26,6 +26,7 @@ option java_package = "org.apache.hadoop.yarn.proto";
option java_outer_classname = "ContainerManagementProtocol";
option java_generic_services = true;
option java_generate_equals_and_hash = true;
package hadoop.yarn;
import "yarn_service_protos.proto";

View File

@ -26,6 +26,7 @@ option java_package = "org.apache.hadoop.yarn.proto";
option java_outer_classname = "ResourceManagerAdministrationProtocol";
option java_generic_services = true;
option java_generate_equals_and_hash = true;
package hadoop.yarn;
import "server/yarn_server_resourcemanager_service_protos.proto";

View File

@ -26,6 +26,7 @@ option java_package = "org.apache.hadoop.yarn.proto";
option java_outer_classname = "YarnServerResourceManagerServiceProtos";
option java_generic_services = true;
option java_generate_equals_and_hash = true;
package hadoop.yarn;
import "yarn_protos.proto";

View File

@ -26,6 +26,7 @@ option java_package = "org.apache.hadoop.yarn.proto";
option java_outer_classname = "YarnProtos";
option java_generic_services = true;
option java_generate_equals_and_hash = true;
package hadoop.yarn;
import "Security.proto";

View File

@ -26,6 +26,7 @@ option java_package = "org.apache.hadoop.yarn.proto";
option java_outer_classname = "YarnServiceProtos";
option java_generic_services = true;
option java_generate_equals_and_hash = true;
package hadoop.yarn;
import "Security.proto";
import "yarn_protos.proto";

View File

@ -268,6 +268,14 @@
<!--value>hdfs://localhost:9000/rmstore</value-->
</property>
<property>
<description>Enable RM high-availability. When enabled, the RM starts
in the Standby mode by default, and transitions to the Active mode when
prompted to.</description>
<name>yarn.resourcemanager.ha.enabled</name>
<value>false</value>
</property>
<property>
<description>The maximum number of completed applications RM keeps. </description>
<name>yarn.resourcemanager.max-completed-applications</name>

View File

@ -20,6 +20,7 @@ option java_package = "org.apache.hadoop.yarn.proto";
option java_outer_classname = "ResourceTracker";
option java_generic_services = true;
option java_generate_equals_and_hash = true;
package hadoop.yarn;
import "yarn_server_common_service_protos.proto";

View File

@ -20,6 +20,7 @@ option java_package = "org.apache.hadoop.yarn.proto";
option java_outer_classname = "YarnServerCommonProtos";
option java_generic_services = true;
option java_generate_equals_and_hash = true;
package hadoop.yarn;
import "yarn_protos.proto";

View File

@ -20,6 +20,7 @@ option java_package = "org.apache.hadoop.yarn.proto";
option java_outer_classname = "YarnServerCommonServiceProtos";
option java_generic_services = true;
option java_generate_equals_and_hash = true;
package hadoop.yarn;
import "yarn_protos.proto";
import "yarn_server_common_protos.proto";

View File

@ -30,6 +30,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include <sys/stat.h>
#include <sys/mount.h>
@ -492,6 +493,21 @@ static struct passwd* get_user_info(const char* user) {
return result;
}
int is_whitelisted(const char *user) {
char **whitelist = get_values(ALLOWED_SYSTEM_USERS_KEY);
char **users = whitelist;
if (whitelist != NULL) {
for(; *users; ++users) {
if (strncmp(*users, user, LOGIN_NAME_MAX) == 0) {
free_values(whitelist);
return 1;
}
}
free_values(whitelist);
}
return 0;
}
/**
* Is the user a real user account?
* Checks:
@ -526,9 +542,9 @@ struct passwd* check_user(const char *user) {
fflush(LOGFILE);
return NULL;
}
if (user_info->pw_uid < min_uid) {
fprintf(LOGFILE, "Requested user %s has id %d, which is below the "
"minimum allowed %d\n", user, user_info->pw_uid, min_uid);
if (user_info->pw_uid < min_uid && !is_whitelisted(user)) {
fprintf(LOGFILE, "Requested user %s is not whitelisted and has id %d,"
"which is below the minimum allowed %d\n", user, user_info->pw_uid, min_uid);
fflush(LOGFILE);
free(user_info);
return NULL;

View File

@ -65,6 +65,7 @@ enum errorcodes {
#define CREDENTIALS_FILENAME "container_tokens"
#define MIN_USERID_KEY "min.user.id"
#define BANNED_USERS_KEY "banned.users"
#define ALLOWED_SYSTEM_USERS_KEY "allowed.system.users"
#define TMP_DIR "tmp"
extern struct passwd *user_detail;

View File

@ -99,6 +99,7 @@ int write_config_file(char *file_name) {
}
fprintf(file, "banned.users=bannedUser\n");
fprintf(file, "min.user.id=500\n");
fprintf(file, "allowed.system.users=allowedUser,bin\n");
fclose(file);
return 0;
}
@ -195,6 +196,10 @@ void test_check_user() {
printf("FAIL: failed check for system user root\n");
exit(1);
}
if (check_user("bin") == NULL) {
printf("FAIL: failed check for whitelisted system user bin\n");
exit(1);
}
}
void test_resolve_config_path() {

View File

@ -20,6 +20,7 @@ option java_package = "org.apache.hadoop.yarn.proto";
option java_outer_classname = "LocalizationProtocol";
option java_generic_services = true;
option java_generate_equals_and_hash = true;
package hadoop.yarn;
import "yarn_server_nodemanager_service_protos.proto";

View File

@ -20,6 +20,7 @@ option java_package = "org.apache.hadoop.yarn.proto";
option java_outer_classname = "YarnServerNodemanagerServiceProtos";
option java_generic_services = true;
option java_generate_equals_and_hash = true;
package hadoop.yarn;
import "yarn_protos.proto";

View File

@ -196,7 +196,7 @@ private boolean checkAccess(UserGroupInformation callerUGI, String owner,
ApplicationId getNewApplicationId() {
ApplicationId applicationId = org.apache.hadoop.yarn.server.utils.BuilderUtils
.newApplicationId(recordFactory, ResourceManager.clusterTimeStamp,
.newApplicationId(recordFactory, ResourceManager.getClusterTimeStamp(),
applicationCounter.incrementAndGet());
LOG.info("Allocated new applicationId: " + applicationId.getId());
return applicationId;

View File

@ -0,0 +1,153 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.server.resourcemanager;
import com.google.common.annotations.VisibleForTesting;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.ha.HAServiceProtocol;
import org.apache.hadoop.ha.HAServiceStatus;
import org.apache.hadoop.ha.HealthCheckFailedException;
import org.apache.hadoop.service.AbstractService;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
import java.io.IOException;
@InterfaceAudience.Private
@InterfaceStability.Unstable
public class RMHAProtocolService extends AbstractService implements
HAServiceProtocol {
private static final Log LOG = LogFactory.getLog(RMHAProtocolService.class);
private Configuration conf;
private ResourceManager rm;
@VisibleForTesting
protected HAServiceState haState = HAServiceState.INITIALIZING;
public RMHAProtocolService(ResourceManager resourceManager) {
super("RMHAProtocolService");
this.rm = resourceManager;
}
@Override
public synchronized void serviceInit(Configuration conf) throws Exception {
this.conf = conf;
rm.createAndInitActiveServices();
super.serviceInit(this.conf);
}
@Override
public synchronized void serviceStart() throws Exception {
boolean haEnabled = this.conf.getBoolean(YarnConfiguration.RM_HA_ENABLED,
YarnConfiguration.DEFAULT_RM_HA_ENABLED);
if (haEnabled) {
transitionToStandby(true);
} else {
transitionToActive();
}
super.serviceStart();
}
@Override
public synchronized void serviceStop() throws Exception {
transitionToStandby(false);
haState = HAServiceState.STOPPING;
super.serviceStop();
}
@Override
public synchronized void monitorHealth() throws HealthCheckFailedException {
if (haState == HAServiceState.ACTIVE && !rm.areActiveServicesRunning()) {
throw new HealthCheckFailedException(
"Active ResourceManager services are not running!");
}
}
private synchronized void transitionToActive() throws Exception {
if (haState == HAServiceState.ACTIVE) {
LOG.info("Already in active state");
return;
}
LOG.info("Transitioning to active");
rm.startActiveServices();
haState = HAServiceState.ACTIVE;
LOG.info("Transitioned to active");
}
@Override
public synchronized void transitionToActive(StateChangeRequestInfo reqInfo) {
// TODO (YARN-1177): When automatic failover is enabled,
// check if transition should be allowed for this request
try {
transitionToActive();
} catch (Exception e) {
LOG.error("Error when transitioning to Active mode", e);
throw new YarnRuntimeException(e);
}
}
private synchronized void transitionToStandby(boolean initialize)
throws Exception {
if (haState == HAServiceState.STANDBY) {
LOG.info("Already in standby state");
return;
}
LOG.info("Transitioning to standby");
if (haState == HAServiceState.ACTIVE) {
rm.stopActiveServices();
if (initialize) {
rm.createAndInitActiveServices();
}
}
haState = HAServiceState.STANDBY;
LOG.info("Transitioned to standby");
}
@Override
public synchronized void transitionToStandby(StateChangeRequestInfo reqInfo) {
// TODO (YARN-1177): When automatic failover is enabled,
// check if transition should be allowed for this request
try {
transitionToStandby(true);
} catch (Exception e) {
LOG.error("Error when transitioning to Standby mode", e);
throw new YarnRuntimeException(e);
}
}
@Override
public synchronized HAServiceStatus getServiceStatus() throws IOException {
HAServiceStatus ret = new HAServiceStatus(haState);
if (haState == HAServiceState.ACTIVE || haState == HAServiceState.STANDBY) {
ret.setReadyToBecomeActive();
} else {
ret.setNotReadyToBecomeActive("State is " + haState);
}
return ret;
}
}

View File

@ -105,7 +105,14 @@ public class ResourceManager extends CompositeService implements Recoverable {
public static final int SHUTDOWN_HOOK_PRIORITY = 30;
private static final Log LOG = LogFactory.getLog(ResourceManager.class);
public static final long clusterTimeStamp = System.currentTimeMillis();
private static long clusterTimeStamp = System.currentTimeMillis();
/**
* "Always On" services. Services that need to run always irrespective of
* the HA state of the RM.
*/
@VisibleForTesting
protected RMHAProtocolService haService;
/**
* "Active" services. Services that need to run only on the Active RM.
@ -155,14 +162,18 @@ public ResourceManager() {
public RMContext getRMContext() {
return this.rmContext;
}
public static long getClusterTimeStamp() {
return clusterTimeStamp;
}
@Override
protected void serviceInit(Configuration conf) throws Exception {
validateConfigs(conf);
this.conf = conf;
activeServices = new RMActiveServices();
addService(activeServices);
haService = new RMHAProtocolService(this);
addService(haService);
super.serviceInit(conf);
}
@ -470,6 +481,7 @@ protected void serviceStop() throws Exception {
LOG.error("Error closing store.", e);
}
}
super.serviceStop();
}
}
@ -708,6 +720,43 @@ protected void startWepApp() {
webApp = builder.start(new RMWebApp(this));
}
/**
* Helper method to create and init {@link #activeServices}. This creates an
* instance of {@link RMActiveServices} and initializes it.
* @throws Exception
*/
void createAndInitActiveServices() throws Exception {
activeServices = new RMActiveServices();
activeServices.init(conf);
}
/**
* Helper method to start {@link #activeServices}.
* @throws Exception
*/
void startActiveServices() throws Exception {
if (activeServices != null) {
clusterTimeStamp = System.currentTimeMillis();
activeServices.start();
}
}
/**
* Helper method to stop {@link #activeServices}.
* @throws Exception
*/
void stopActiveServices() throws Exception {
if (activeServices != null) {
activeServices.stop();
activeServices = null;
}
}
@VisibleForTesting
protected boolean areActiveServicesRunning() {
return activeServices != null && activeServices.isInState(STATE.STARTED);
}
@Override
protected void serviceStart() throws Exception {
try {
@ -715,7 +764,6 @@ protected void serviceStart() throws Exception {
} catch(IOException ie) {
throw new YarnRuntimeException("Failed to login", ie);
}
super.serviceStart();
}

View File

@ -229,7 +229,7 @@ public RegisterNodeManagerResponse registerNodeManager(
+ ", assigned nodeId " + nodeId;
LOG.info(message);
response.setNodeAction(NodeAction.NORMAL);
response.setRMIdentifier(ResourceManager.clusterTimeStamp);
response.setRMIdentifier(ResourceManager.getClusterTimeStamp());
return response;
}

View File

@ -185,7 +185,7 @@ public void run() {
tokenIdentifier =
new ContainerTokenIdentifier(containerId, nodeId.toString(),
appSubmitter, capability, expiryTimeStamp, this.currentMasterKey
.getMasterKey().getKeyId(), ResourceManager.clusterTimeStamp);
.getMasterKey().getKeyId(), ResourceManager.getClusterTimeStamp());
password = this.createPassword(tokenIdentifier);
} finally {

View File

@ -108,7 +108,7 @@ public AppInfo(RMApp app, Boolean hasAccess) {
this.diagnostics = "";
}
this.finalStatus = app.getFinalApplicationStatus();
this.clusterId = ResourceManager.clusterTimeStamp;
this.clusterId = ResourceManager.getClusterTimeStamp();
if (hasAccess) {
this.startedTime = app.getStartTime();
this.finishedTime = app.getFinishTime();

View File

@ -44,7 +44,7 @@ public ClusterInfo() {
} // JAXB needs this
public ClusterInfo(ResourceManager rm) {
long ts = ResourceManager.clusterTimeStamp;
long ts = ResourceManager.getClusterTimeStamp();
this.id = ts;
this.state = rm.getServiceState();

View File

@ -0,0 +1,151 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.server.resourcemanager;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.ha.HAServiceProtocol;
import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
import org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo;
import org.apache.hadoop.ha.HealthCheckFailedException;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.junit.Before;
import org.junit.Test;
import java.io.IOException;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
public class TestRMHA {
private Log LOG = LogFactory.getLog(TestRMHA.class);
private MockRM rm = null;
private static final String STATE_ERR =
"ResourceManager is in wrong HA state";
@Before
public void setUp() throws Exception {
Configuration conf = new YarnConfiguration();
conf.setBoolean(YarnConfiguration.RM_HA_ENABLED, true);
rm = new MockRM(conf);
rm.init(conf);
}
private void checkMonitorHealth() {
try {
rm.haService.monitorHealth();
} catch (HealthCheckFailedException e) {
fail("The RM is in bad health: it is Active, but the active services " +
"are not running");
}
}
private void checkStandbyRMFunctionality() throws IOException {
assertEquals(STATE_ERR, HAServiceState.STANDBY,
rm.haService.getServiceStatus().getState());
assertFalse("Active RM services are started",
rm.areActiveServicesRunning());
assertTrue("RM is not ready to become active",
rm.haService.getServiceStatus().isReadyToBecomeActive());
}
private void checkActiveRMFunctionality() throws IOException {
assertEquals(STATE_ERR, HAServiceState.ACTIVE,
rm.haService.getServiceStatus().getState());
assertTrue("Active RM services aren't started",
rm.areActiveServicesRunning());
assertTrue("RM is not ready to become active",
rm.haService.getServiceStatus().isReadyToBecomeActive());
try {
rm.getNewAppId();
rm.registerNode("127.0.0.1:0", 2048);
rm.submitApp(1024);
} catch (Exception e) {
fail("Unable to perform Active RM functions");
LOG.error("ActiveRM check failed", e);
}
}
/**
* Test to verify the following RM HA transitions to the following states.
* 1. Standby: Should be a no-op
* 2. Active: Active services should start
* 3. Active: Should be a no-op.
* While active, submit a couple of jobs
* 4. Standby: Active services should stop
* 5. Active: Active services should start
* 6. Stop the RM: All services should stop and RM should not be ready to
* become Active
*/
@Test (timeout = 30000)
public void testStartAndTransitions() throws IOException {
StateChangeRequestInfo requestInfo = new StateChangeRequestInfo(
HAServiceProtocol.RequestSource.REQUEST_BY_USER);
assertEquals(STATE_ERR, HAServiceState.INITIALIZING,
rm.haService.getServiceStatus().getState());
assertFalse("RM is ready to become active before being started",
rm.haService.getServiceStatus().isReadyToBecomeActive());
checkMonitorHealth();
rm.start();
checkMonitorHealth();
checkStandbyRMFunctionality();
// 1. Transition to Standby - must be a no-op
rm.haService.transitionToStandby(requestInfo);
checkMonitorHealth();
checkStandbyRMFunctionality();
// 2. Transition to active
rm.haService.transitionToActive(requestInfo);
checkMonitorHealth();
checkActiveRMFunctionality();
// 3. Transition to active - no-op
rm.haService.transitionToActive(requestInfo);
checkMonitorHealth();
checkActiveRMFunctionality();
// 4. Transition to standby
rm.haService.transitionToStandby(requestInfo);
checkMonitorHealth();
checkStandbyRMFunctionality();
// 5. Transition to active to check Active->Standby->Active works
rm.haService.transitionToActive(requestInfo);
checkMonitorHealth();
checkActiveRMFunctionality();
// 6. Stop the RM. All services should stop and RM should not be ready to
// become active
rm.stop();
assertEquals(STATE_ERR, HAServiceState.STOPPING,
rm.haService.getServiceStatus().getState());
assertFalse("RM is ready to become active even after it is stopped",
rm.haService.getServiceStatus().isReadyToBecomeActive());
assertFalse("Active RM services are started",
rm.areActiveServicesRunning());
checkMonitorHealth();
}
}

View File

@ -283,7 +283,7 @@ public void testSetRMIdentifierInRegistration() throws Exception {
RegisterNodeManagerResponse response = nm.registerNode();
// Verify the RMIdentifier is correctly set in RegisterNodeManagerResponse
Assert.assertEquals(ResourceManager.clusterTimeStamp,
Assert.assertEquals(ResourceManager.getClusterTimeStamp(),
response.getRMIdentifier());
}

View File

@ -88,7 +88,7 @@ public void testRMIdentifierOnContainerAllocation() throws Exception {
ContainerTokenIdentifier tokenId =
BuilderUtils.newContainerTokenIdentifier(allocatedContainer
.getContainerToken());
Assert.assertEquals(MockRM.clusterTimeStamp, tokenId.getRMIdentifer());
Assert.assertEquals(MockRM.getClusterTimeStamp(), tokenId.getRMIdentifer());
rm.stop();
}

View File

@ -295,10 +295,10 @@ public void verifyClusterGeneric(long clusterid, long startedon,
String hadoopVersion, String resourceManagerVersionBuiltOn,
String resourceManagerBuildVersion, String resourceManagerVersion) {
assertEquals("clusterId doesn't match: ", ResourceManager.clusterTimeStamp,
clusterid);
assertEquals("startedOn doesn't match: ", ResourceManager.clusterTimeStamp,
startedon);
assertEquals("clusterId doesn't match: ",
ResourceManager.getClusterTimeStamp(), clusterid);
assertEquals("startedOn doesn't match: ",
ResourceManager.getClusterTimeStamp(), startedon);
assertTrue("stated doesn't match: " + state,
state.matches(STATE.INITED.toString()));

View File

@ -1181,8 +1181,8 @@ public void verifyAppInfoGeneric(RMApp app, String id, String user,
trackingUI);
WebServicesTestUtils.checkStringMatch("diagnostics", app.getDiagnostics()
.toString(), diagnostics);
assertEquals("clusterId doesn't match", ResourceManager.clusterTimeStamp,
clusterId);
assertEquals("clusterId doesn't match",
ResourceManager.getClusterTimeStamp(), clusterId);
assertEquals("startedTime doesn't match", app.getStartTime(), startedTime);
assertEquals("finishedTime doesn't match", app.getFinishTime(),
finishedTime);

View File

@ -105,10 +105,15 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs
<build>
<pluginManagement>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<version>2.4</version>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-enforcer-plugin</artifactId>
<version>1.0</version>
<version>1.3.1</version>
<configuration>
<rules>
<requireMavenVersion>
@ -224,7 +229,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs
<pluginExecutionFilter>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<versionRange>[2.1,)</versionRange>
<versionRange>[2.4,)</versionRange>
<goals>
<goal>copy-dependencies</goal>
<goal>build-classpath</goal>