Merge trunk into HA branch.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1232184 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
commit
940eeb866c
|
@ -83,6 +83,8 @@ Trunk (unreleased changes)
|
||||||
|
|
||||||
HADOOP-4515. Configuration#getBoolean must not be case sensitive. (Sho Shimauchi via harsh)
|
HADOOP-4515. Configuration#getBoolean must not be case sensitive. (Sho Shimauchi via harsh)
|
||||||
|
|
||||||
|
HADOOP-7968. Errant println left in RPC.getHighestSupportedProtocol (Sho Shimauchi via harsh)
|
||||||
|
|
||||||
BUGS
|
BUGS
|
||||||
|
|
||||||
HADOOP-7851. Configuration.getClasses() never returns the default value.
|
HADOOP-7851. Configuration.getClasses() never returns the default value.
|
||||||
|
@ -203,6 +205,8 @@ Release 0.23.1 - Unreleased
|
||||||
HADOOP-7348. Change 'addnl' in getmerge util to be a flag '-nl' instead.
|
HADOOP-7348. Change 'addnl' in getmerge util to be a flag '-nl' instead.
|
||||||
(XieXianshan via harsh)
|
(XieXianshan via harsh)
|
||||||
|
|
||||||
|
HADOOP-7975. Add LZ4 as an entry in the default codec list, missed by HADOOP-7657 (harsh)
|
||||||
|
|
||||||
OPTIMIZATIONS
|
OPTIMIZATIONS
|
||||||
|
|
||||||
BUG FIXES
|
BUG FIXES
|
||||||
|
@ -269,6 +273,9 @@ Release 0.23.1 - Unreleased
|
||||||
HADOOP-7964. Deadlock in NetUtils and SecurityUtil class initialization.
|
HADOOP-7964. Deadlock in NetUtils and SecurityUtil class initialization.
|
||||||
(Daryn Sharp via suresh)
|
(Daryn Sharp via suresh)
|
||||||
|
|
||||||
|
HADOOP-7974. TestViewFsTrash incorrectly determines the user's home
|
||||||
|
directory. (harsh via eli)
|
||||||
|
|
||||||
Release 0.23.0 - 2011-11-01
|
Release 0.23.0 - 2011-11-01
|
||||||
|
|
||||||
INCOMPATIBLE CHANGES
|
INCOMPATIBLE CHANGES
|
||||||
|
|
|
@ -791,7 +791,10 @@ public class RPC {
|
||||||
String protocolName) {
|
String protocolName) {
|
||||||
Long highestVersion = 0L;
|
Long highestVersion = 0L;
|
||||||
ProtoClassProtoImpl highest = null;
|
ProtoClassProtoImpl highest = null;
|
||||||
System.out.println("Size of protoMap for " + rpcKind + " =" + getProtocolImplMap(rpcKind).size());
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("Size of protoMap for " + rpcKind + " ="
|
||||||
|
+ getProtocolImplMap(rpcKind).size());
|
||||||
|
}
|
||||||
for (Map.Entry<ProtoNameVer, ProtoClassProtoImpl> pv :
|
for (Map.Entry<ProtoNameVer, ProtoClassProtoImpl> pv :
|
||||||
getProtocolImplMap(rpcKind).entrySet()) {
|
getProtocolImplMap(rpcKind).entrySet()) {
|
||||||
if (pv.getKey().protocol.equals(protocolName)) {
|
if (pv.getKey().protocol.equals(protocolName)) {
|
||||||
|
|
|
@ -161,7 +161,7 @@
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>io.compression.codecs</name>
|
<name>io.compression.codecs</name>
|
||||||
<value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec,org.apache.hadoop.io.compress.DeflateCodec,org.apache.hadoop.io.compress.SnappyCodec</value>
|
<value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec,org.apache.hadoop.io.compress.DeflateCodec,org.apache.hadoop.io.compress.SnappyCodec,org.apache.hadoop.io.compress.Lz4Codec</value>
|
||||||
<description>A list of the compression codec classes that can be used
|
<description>A list of the compression codec classes that can be used
|
||||||
for compression/decompression.</description>
|
for compression/decompression.</description>
|
||||||
</property>
|
</property>
|
||||||
|
|
|
@ -79,9 +79,8 @@ public class TestViewFsTrash {
|
||||||
// But home dir is different on linux, mac etc.
|
// But home dir is different on linux, mac etc.
|
||||||
// Figure it out by calling home dir on target
|
// Figure it out by calling home dir on target
|
||||||
|
|
||||||
String homeDir = fsTarget.getHomeDirectory().toUri().getPath();
|
String homeDirRoot = fsTarget.getHomeDirectory()
|
||||||
int indexOf2ndSlash = homeDir.indexOf('/', 1);
|
.getParent().toUri().getPath();
|
||||||
String homeDirRoot = homeDir.substring(0, indexOf2ndSlash);
|
|
||||||
ConfigUtil.addLink(conf, homeDirRoot,
|
ConfigUtil.addLink(conf, homeDirRoot,
|
||||||
fsTarget.makeQualified(new Path(homeDirRoot)).toUri());
|
fsTarget.makeQualified(new Path(homeDirRoot)).toUri());
|
||||||
ConfigUtil.setHomeDirConf(conf, homeDirRoot);
|
ConfigUtil.setHomeDirConf(conf, homeDirRoot);
|
||||||
|
|
|
@ -264,6 +264,8 @@ Release 0.23.1 - UNRELEASED
|
||||||
|
|
||||||
HDFS-69. Improve the 'dfsadmin' commandline help. (harsh)
|
HDFS-69. Improve the 'dfsadmin' commandline help. (harsh)
|
||||||
|
|
||||||
|
HDFS-2788. HdfsServerConstants#DN_KEEPALIVE_TIMEOUT is dead code (eli)
|
||||||
|
|
||||||
OPTIMIZATIONS
|
OPTIMIZATIONS
|
||||||
|
|
||||||
HDFS-2130. Switch default checksum to CRC32C. (todd)
|
HDFS-2130. Switch default checksum to CRC32C. (todd)
|
||||||
|
@ -327,6 +329,9 @@ Release 0.23.1 - UNRELEASED
|
||||||
HDFS-2707. HttpFS should read the hadoop-auth secret from a file
|
HDFS-2707. HttpFS should read the hadoop-auth secret from a file
|
||||||
instead inline from the configuration. (tucu)
|
instead inline from the configuration. (tucu)
|
||||||
|
|
||||||
|
HDFS-2790. FSNamesystem.setTimes throws exception with wrong
|
||||||
|
configuration name in the message. (Arpit Gupta via eli)
|
||||||
|
|
||||||
Release 0.23.0 - 2011-11-01
|
Release 0.23.0 - 2011-11-01
|
||||||
|
|
||||||
INCOMPATIBLE CHANGES
|
INCOMPATIBLE CHANGES
|
||||||
|
|
|
@ -86,7 +86,6 @@ public final class HdfsServerConstants {
|
||||||
public static int READ_TIMEOUT_EXTENSION = 5 * 1000;
|
public static int READ_TIMEOUT_EXTENSION = 5 * 1000;
|
||||||
public static int WRITE_TIMEOUT = 8 * 60 * 1000;
|
public static int WRITE_TIMEOUT = 8 * 60 * 1000;
|
||||||
public static int WRITE_TIMEOUT_EXTENSION = 5 * 1000; //for write pipeline
|
public static int WRITE_TIMEOUT_EXTENSION = 5 * 1000; //for write pipeline
|
||||||
public static int DN_KEEPALIVE_TIMEOUT = 5 * 1000;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Defines the NameNode role.
|
* Defines the NameNode role.
|
||||||
|
|
|
@ -1219,7 +1219,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
|
||||||
throws IOException, UnresolvedLinkException {
|
throws IOException, UnresolvedLinkException {
|
||||||
if (!isAccessTimeSupported() && atime != -1) {
|
if (!isAccessTimeSupported() && atime != -1) {
|
||||||
throw new IOException("Access time for hdfs is not configured. " +
|
throw new IOException("Access time for hdfs is not configured. " +
|
||||||
" Please set dfs.support.accessTime configuration parameter.");
|
" Please set " + DFS_NAMENODE_ACCESSTIME_PRECISION_KEY + " configuration parameter.");
|
||||||
}
|
}
|
||||||
writeLock();
|
writeLock();
|
||||||
try {
|
try {
|
||||||
|
|
|
@ -481,6 +481,24 @@ Release 0.23.1 - Unreleased
|
||||||
MAPREDUCE-3625. CapacityScheduler web-ui display of queue's used capacity is broken.
|
MAPREDUCE-3625. CapacityScheduler web-ui display of queue's used capacity is broken.
|
||||||
(Jason Lowe via mahadev)
|
(Jason Lowe via mahadev)
|
||||||
|
|
||||||
|
MAPREDUCE-3596. Fix scheduler to handle cleaned up containers, which NMs
|
||||||
|
may subsequently report as running. (Vinod Kumar Vavilapalli via sseth)
|
||||||
|
|
||||||
|
MAPREDUCE-3656. Fixed a race condition in MR AM which is failing the sort
|
||||||
|
benchmark consistently. (Siddarth Seth via vinodkv)
|
||||||
|
|
||||||
|
MAPREDUCE-3532. Modified NM to report correct http address when an ephemeral
|
||||||
|
web port is configured. (Bhallamudi Venkata Siva Kamesh via vinodkv)
|
||||||
|
|
||||||
|
MAPREDUCE-3404. Corrected MR AM to honor speculative configuration and enable
|
||||||
|
speculating either maps or reduces. (Eric Payne via vinodkv)
|
||||||
|
|
||||||
|
MAPREDUCE-3649. Job End notification gives an error on calling back.
|
||||||
|
(Ravi Prakash via mahadev)
|
||||||
|
|
||||||
|
MAPREDUCE-3657. State machine visualize build fails. (Jason Lowe
|
||||||
|
via mahadev)
|
||||||
|
|
||||||
Release 0.23.0 - 2011-11-01
|
Release 0.23.0 - 2011-11-01
|
||||||
|
|
||||||
INCOMPATIBLE CHANGES
|
INCOMPATIBLE CHANGES
|
||||||
|
|
|
@ -22,7 +22,9 @@ import java.io.IOException;
|
||||||
import java.net.InetAddress;
|
import java.net.InetAddress;
|
||||||
import java.net.InetSocketAddress;
|
import java.net.InetSocketAddress;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
import java.util.concurrent.ConcurrentMap;
|
import java.util.concurrent.ConcurrentMap;
|
||||||
|
|
||||||
|
@ -77,6 +79,9 @@ public class TaskAttemptListenerImpl extends CompositeService
|
||||||
private ConcurrentMap<WrappedJvmID, org.apache.hadoop.mapred.Task>
|
private ConcurrentMap<WrappedJvmID, org.apache.hadoop.mapred.Task>
|
||||||
jvmIDToActiveAttemptMap
|
jvmIDToActiveAttemptMap
|
||||||
= new ConcurrentHashMap<WrappedJvmID, org.apache.hadoop.mapred.Task>();
|
= new ConcurrentHashMap<WrappedJvmID, org.apache.hadoop.mapred.Task>();
|
||||||
|
private Set<WrappedJvmID> launchedJVMs = Collections
|
||||||
|
.newSetFromMap(new ConcurrentHashMap<WrappedJvmID, Boolean>());
|
||||||
|
|
||||||
private JobTokenSecretManager jobTokenSecretManager = null;
|
private JobTokenSecretManager jobTokenSecretManager = null;
|
||||||
|
|
||||||
public TaskAttemptListenerImpl(AppContext context,
|
public TaskAttemptListenerImpl(AppContext context,
|
||||||
|
@ -412,18 +417,24 @@ public class TaskAttemptListenerImpl extends CompositeService
|
||||||
|
|
||||||
// Try to look up the task. We remove it directly as we don't give
|
// Try to look up the task. We remove it directly as we don't give
|
||||||
// multiple tasks to a JVM
|
// multiple tasks to a JVM
|
||||||
org.apache.hadoop.mapred.Task task = jvmIDToActiveAttemptMap
|
if (!jvmIDToActiveAttemptMap.containsKey(wJvmID)) {
|
||||||
.remove(wJvmID);
|
|
||||||
if (task != null) {
|
|
||||||
LOG.info("JVM with ID: " + jvmId + " given task: " + task.getTaskID());
|
|
||||||
jvmTask = new JvmTask(task, false);
|
|
||||||
|
|
||||||
// remove the task as it is no more needed and free up the memory
|
|
||||||
// Also we have already told the JVM to process a task, so it is no
|
|
||||||
// longer pending, and further request should ask it to exit.
|
|
||||||
} else {
|
|
||||||
LOG.info("JVM with ID: " + jvmId + " is invalid and will be killed.");
|
LOG.info("JVM with ID: " + jvmId + " is invalid and will be killed.");
|
||||||
jvmTask = TASK_FOR_INVALID_JVM;
|
jvmTask = TASK_FOR_INVALID_JVM;
|
||||||
|
} else {
|
||||||
|
if (!launchedJVMs.contains(wJvmID)) {
|
||||||
|
jvmTask = null;
|
||||||
|
LOG.info("JVM with ID: " + jvmId
|
||||||
|
+ " asking for task before AM launch registered. Given null task");
|
||||||
|
} else {
|
||||||
|
// remove the task as it is no more needed and free up the memory.
|
||||||
|
// Also we have already told the JVM to process a task, so it is no
|
||||||
|
// longer pending, and further request should ask it to exit.
|
||||||
|
org.apache.hadoop.mapred.Task task =
|
||||||
|
jvmIDToActiveAttemptMap.remove(wJvmID);
|
||||||
|
launchedJVMs.remove(wJvmID);
|
||||||
|
LOG.info("JVM with ID: " + jvmId + " given task: " + task.getTaskID());
|
||||||
|
jvmTask = new JvmTask(task, false);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return jvmTask;
|
return jvmTask;
|
||||||
}
|
}
|
||||||
|
@ -440,13 +451,12 @@ public class TaskAttemptListenerImpl extends CompositeService
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void registerLaunchedTask(
|
public void registerLaunchedTask(
|
||||||
org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId attemptID) {
|
org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId attemptID,
|
||||||
|
WrappedJvmID jvmId) {
|
||||||
|
// The AM considers the task to be launched (Has asked the NM to launch it)
|
||||||
|
// The JVM will only be given a task after this registartion.
|
||||||
|
launchedJVMs.add(jvmId);
|
||||||
|
|
||||||
// The task is launched. Register this for expiry-tracking.
|
|
||||||
|
|
||||||
// Timing can cause this to happen after the real JVM launches and gets a
|
|
||||||
// task which is still fine as we will only be tracking for expiry a little
|
|
||||||
// late than usual.
|
|
||||||
taskHeartbeatHandler.register(attemptID);
|
taskHeartbeatHandler.register(attemptID);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -459,7 +469,12 @@ public class TaskAttemptListenerImpl extends CompositeService
|
||||||
// registration. Events are ordered at TaskAttempt, so unregistration will
|
// registration. Events are ordered at TaskAttempt, so unregistration will
|
||||||
// always come after registration.
|
// always come after registration.
|
||||||
|
|
||||||
// remove the mapping if not already removed
|
// Remove from launchedJVMs before jvmIDToActiveAttemptMap to avoid
|
||||||
|
// synchronization issue with getTask(). getTask should be checking
|
||||||
|
// jvmIDToActiveAttemptMap before it checks launchedJVMs.
|
||||||
|
|
||||||
|
// remove the mappings if not already removed
|
||||||
|
launchedJVMs.remove(jvmID);
|
||||||
jvmIDToActiveAttemptMap.remove(jvmID);
|
jvmIDToActiveAttemptMap.remove(jvmID);
|
||||||
|
|
||||||
//unregister this attempt
|
//unregister this attempt
|
||||||
|
|
|
@ -19,12 +19,11 @@
|
||||||
package org.apache.hadoop.mapreduce.v2.app;
|
package org.apache.hadoop.mapreduce.v2.app;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.net.HttpURLConnection;
|
||||||
import java.net.InetSocketAddress;
|
import java.net.InetSocketAddress;
|
||||||
import java.net.MalformedURLException;
|
import java.net.MalformedURLException;
|
||||||
import java.net.URL;
|
|
||||||
import java.net.URLConnection;
|
|
||||||
import java.net.Proxy;
|
import java.net.Proxy;
|
||||||
|
import java.net.URL;
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configurable;
|
import org.apache.hadoop.conf.Configurable;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
@ -40,7 +39,8 @@ import org.mortbay.log.Log;
|
||||||
* User can specify number of retry attempts and a time interval at which to
|
* User can specify number of retry attempts and a time interval at which to
|
||||||
* attempt retries</li><li>
|
* attempt retries</li><li>
|
||||||
* Cluster administrators can set final parameters to set maximum number of
|
* Cluster administrators can set final parameters to set maximum number of
|
||||||
* tries (0 would disable job end notification) and max time interval</li><li>
|
* tries (0 would disable job end notification) and max time interval and a
|
||||||
|
* proxy if needed</li><li>
|
||||||
* The URL may contain sentinels which will be replaced by jobId and jobStatus
|
* The URL may contain sentinels which will be replaced by jobId and jobStatus
|
||||||
* (eg. SUCCEEDED/KILLED/FAILED) </li> </ul>
|
* (eg. SUCCEEDED/KILLED/FAILED) </li> </ul>
|
||||||
* </p>
|
* </p>
|
||||||
|
@ -59,8 +59,8 @@ public class JobEndNotifier implements Configurable {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parse the URL that needs to be notified of the end of the job, along
|
* Parse the URL that needs to be notified of the end of the job, along
|
||||||
* with the number of retries in case of failure and the amount of time to
|
* with the number of retries in case of failure, the amount of time to
|
||||||
* wait between retries
|
* wait between retries and proxy settings
|
||||||
* @param conf the configuration
|
* @param conf the configuration
|
||||||
*/
|
*/
|
||||||
public void setConf(Configuration conf) {
|
public void setConf(Configuration conf) {
|
||||||
|
@ -119,15 +119,19 @@ public class JobEndNotifier implements Configurable {
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
try {
|
try {
|
||||||
Log.info("Job end notification trying " + urlToNotify);
|
Log.info("Job end notification trying " + urlToNotify);
|
||||||
URLConnection conn = urlToNotify.openConnection(proxyToUse);
|
HttpURLConnection conn = (HttpURLConnection) urlToNotify.openConnection();
|
||||||
conn.setConnectTimeout(5*1000);
|
conn.setConnectTimeout(5*1000);
|
||||||
conn.setReadTimeout(5*1000);
|
conn.setReadTimeout(5*1000);
|
||||||
conn.setAllowUserInteraction(false);
|
conn.setAllowUserInteraction(false);
|
||||||
InputStream is = conn.getInputStream();
|
if(conn.getResponseCode() != HttpURLConnection.HTTP_OK) {
|
||||||
conn.getContent();
|
Log.warn("Job end notification to " + urlToNotify +" failed with code: "
|
||||||
is.close();
|
+ conn.getResponseCode() + " and message \"" + conn.getResponseMessage()
|
||||||
|
+"\"");
|
||||||
|
}
|
||||||
|
else {
|
||||||
success = true;
|
success = true;
|
||||||
Log.info("Job end notification to " + urlToNotify + " succeeded");
|
Log.info("Job end notification to " + urlToNotify + " succeeded");
|
||||||
|
}
|
||||||
} catch(IOException ioe) {
|
} catch(IOException ioe) {
|
||||||
Log.warn("Job end notification to " + urlToNotify + " failed", ioe);
|
Log.warn("Job end notification to " + urlToNotify + " failed", ioe);
|
||||||
}
|
}
|
||||||
|
@ -135,8 +139,8 @@ public class JobEndNotifier implements Configurable {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Notify a server of the completion of a submitted job. The server must have
|
* Notify a server of the completion of a submitted job. The user must have
|
||||||
* configured MRConfig.JOB_END_NOTIFICATION_URLS
|
* configured MRJobConfig.MR_JOB_END_NOTIFICATION_URL
|
||||||
* @param jobReport JobReport used to read JobId and JobStatus
|
* @param jobReport JobReport used to read JobId and JobStatus
|
||||||
* @throws InterruptedException
|
* @throws InterruptedException
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -881,9 +881,31 @@ public class MRAppMaster extends CompositeService {
|
||||||
}
|
}
|
||||||
@Override
|
@Override
|
||||||
public void handle(SpeculatorEvent event) {
|
public void handle(SpeculatorEvent event) {
|
||||||
if (!disabled &&
|
if (disabled) {
|
||||||
(conf.getBoolean(MRJobConfig.MAP_SPECULATIVE, false)
|
return;
|
||||||
|| conf.getBoolean(MRJobConfig.REDUCE_SPECULATIVE, false))) {
|
}
|
||||||
|
|
||||||
|
TaskId tId = event.getTaskID();
|
||||||
|
TaskType tType = null;
|
||||||
|
/* event's TaskId will be null if the event type is JOB_CREATE or
|
||||||
|
* ATTEMPT_STATUS_UPDATE
|
||||||
|
*/
|
||||||
|
if (tId != null) {
|
||||||
|
tType = tId.getTaskType();
|
||||||
|
}
|
||||||
|
boolean shouldMapSpec =
|
||||||
|
conf.getBoolean(MRJobConfig.MAP_SPECULATIVE, false);
|
||||||
|
boolean shouldReduceSpec =
|
||||||
|
conf.getBoolean(MRJobConfig.REDUCE_SPECULATIVE, false);
|
||||||
|
|
||||||
|
/* The point of the following is to allow the MAP and REDUCE speculative
|
||||||
|
* config values to be independent:
|
||||||
|
* IF spec-exec is turned on for maps AND the task is a map task
|
||||||
|
* OR IF spec-exec is turned on for reduces AND the task is a reduce task
|
||||||
|
* THEN call the speculator to handle the event.
|
||||||
|
*/
|
||||||
|
if ( (shouldMapSpec && (tType == null || tType == TaskType.MAP))
|
||||||
|
|| (shouldReduceSpec && (tType == null || tType == TaskType.REDUCE))) {
|
||||||
// Speculator IS enabled, direct the event to there.
|
// Speculator IS enabled, direct the event to there.
|
||||||
speculator.handle(event);
|
speculator.handle(event);
|
||||||
}
|
}
|
||||||
|
|
|
@ -45,8 +45,9 @@ public interface TaskAttemptListener {
|
||||||
*
|
*
|
||||||
* @param attemptID
|
* @param attemptID
|
||||||
* the id of the attempt for this JVM.
|
* the id of the attempt for this JVM.
|
||||||
|
* @param jvmID the ID of the JVM.
|
||||||
*/
|
*/
|
||||||
void registerLaunchedTask(TaskAttemptId attemptID);
|
void registerLaunchedTask(TaskAttemptId attemptID, WrappedJvmID jvmID);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Unregister the JVM and the attempt associated with it. This should be
|
* Unregister the JVM and the attempt associated with it. This should be
|
||||||
|
|
|
@ -93,6 +93,7 @@ public class TaskHeartbeatHandler extends AbstractService {
|
||||||
|
|
||||||
public void receivedPing(TaskAttemptId attemptID) {
|
public void receivedPing(TaskAttemptId attemptID) {
|
||||||
//only put for the registered attempts
|
//only put for the registered attempts
|
||||||
|
//TODO throw an exception if the task isn't registered.
|
||||||
runningAttempts.replace(attemptID, clock.getTime());
|
runningAttempts.replace(attemptID, clock.getTime());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1201,7 +1201,7 @@ public abstract class TaskAttemptImpl implements
|
||||||
|
|
||||||
// register it to TaskAttemptListener so that it can start monitoring it.
|
// register it to TaskAttemptListener so that it can start monitoring it.
|
||||||
taskAttempt.taskAttemptListener
|
taskAttempt.taskAttemptListener
|
||||||
.registerLaunchedTask(taskAttempt.attemptId);
|
.registerLaunchedTask(taskAttempt.attemptId, taskAttempt.jvmID);
|
||||||
//TODO Resolve to host / IP in case of a local address.
|
//TODO Resolve to host / IP in case of a local address.
|
||||||
InetSocketAddress nodeHttpInetAddr =
|
InetSocketAddress nodeHttpInetAddr =
|
||||||
NetUtils.createSocketAddr(taskAttempt.nodeHttpAddress); // TODO:
|
NetUtils.createSocketAddr(taskAttempt.nodeHttpAddress); // TODO:
|
||||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.hadoop.mapred;
|
||||||
|
|
||||||
import static org.junit.Assert.assertFalse;
|
import static org.junit.Assert.assertFalse;
|
||||||
import static org.junit.Assert.assertNotNull;
|
import static org.junit.Assert.assertNotNull;
|
||||||
|
import static org.junit.Assert.assertNull;
|
||||||
import static org.junit.Assert.assertTrue;
|
import static org.junit.Assert.assertTrue;
|
||||||
import static org.mockito.Mockito.mock;
|
import static org.mockito.Mockito.mock;
|
||||||
import static org.mockito.Mockito.verify;
|
import static org.mockito.Mockito.verify;
|
||||||
|
@ -79,21 +80,21 @@ public class TestTaskAttemptListenerImpl {
|
||||||
assertNotNull(result);
|
assertNotNull(result);
|
||||||
assertTrue(result.shouldDie);
|
assertTrue(result.shouldDie);
|
||||||
|
|
||||||
// Verify ask after registration but before launch
|
// Verify ask after registration but before launch.
|
||||||
|
// Don't kill, should be null.
|
||||||
TaskAttemptId attemptID = mock(TaskAttemptId.class);
|
TaskAttemptId attemptID = mock(TaskAttemptId.class);
|
||||||
Task task = mock(Task.class);
|
Task task = mock(Task.class);
|
||||||
//Now put a task with the ID
|
//Now put a task with the ID
|
||||||
listener.registerPendingTask(task, wid);
|
listener.registerPendingTask(task, wid);
|
||||||
result = listener.getTask(context);
|
result = listener.getTask(context);
|
||||||
assertNotNull(result);
|
assertNull(result);
|
||||||
assertFalse(result.shouldDie);
|
|
||||||
// Unregister for more testing.
|
// Unregister for more testing.
|
||||||
listener.unregister(attemptID, wid);
|
listener.unregister(attemptID, wid);
|
||||||
|
|
||||||
// Verify ask after registration and launch
|
// Verify ask after registration and launch
|
||||||
//Now put a task with the ID
|
//Now put a task with the ID
|
||||||
listener.registerPendingTask(task, wid);
|
listener.registerPendingTask(task, wid);
|
||||||
listener.registerLaunchedTask(attemptID);
|
listener.registerLaunchedTask(attemptID, wid);
|
||||||
verify(hbHandler).register(attemptID);
|
verify(hbHandler).register(attemptID);
|
||||||
result = listener.getTask(context);
|
result = listener.getTask(context);
|
||||||
assertNotNull(result);
|
assertNotNull(result);
|
||||||
|
|
|
@ -324,7 +324,9 @@ public class MRApp extends MRAppMaster {
|
||||||
return NetUtils.createSocketAddr("localhost:54321");
|
return NetUtils.createSocketAddr("localhost:54321");
|
||||||
}
|
}
|
||||||
@Override
|
@Override
|
||||||
public void registerLaunchedTask(TaskAttemptId attemptID) {}
|
public void registerLaunchedTask(TaskAttemptId attemptID,
|
||||||
|
WrappedJvmID jvmID) {
|
||||||
|
}
|
||||||
@Override
|
@Override
|
||||||
public void unregister(TaskAttemptId attemptID, WrappedJvmID jvmID) {
|
public void unregister(TaskAttemptId attemptID, WrappedJvmID jvmID) {
|
||||||
}
|
}
|
||||||
|
@ -463,6 +465,7 @@ public class MRApp extends MRAppMaster {
|
||||||
return localStateMachine;
|
return localStateMachine;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings("rawtypes")
|
||||||
public TestJob(JobId jobId, ApplicationAttemptId applicationAttemptId,
|
public TestJob(JobId jobId, ApplicationAttemptId applicationAttemptId,
|
||||||
Configuration conf, EventHandler eventHandler,
|
Configuration conf, EventHandler eventHandler,
|
||||||
TaskAttemptListener taskAttemptListener, Clock clock,
|
TaskAttemptListener taskAttemptListener, Clock clock,
|
||||||
|
|
|
@ -0,0 +1,309 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.mapreduce.v2;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.fs.permission.FsPermission;
|
||||||
|
import org.apache.hadoop.io.IntWritable;
|
||||||
|
import org.apache.hadoop.io.Text;
|
||||||
|
import org.apache.hadoop.mapreduce.Counters;
|
||||||
|
import org.apache.hadoop.mapreduce.Job;
|
||||||
|
import org.apache.hadoop.mapreduce.JobCounter;
|
||||||
|
import org.apache.hadoop.mapreduce.JobStatus;
|
||||||
|
import org.apache.hadoop.mapreduce.MRJobConfig;
|
||||||
|
import org.apache.hadoop.mapreduce.Mapper;
|
||||||
|
import org.apache.hadoop.mapreduce.Reducer;
|
||||||
|
import org.apache.hadoop.mapreduce.TaskAttemptID;
|
||||||
|
import org.apache.hadoop.mapreduce.TaskType;
|
||||||
|
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
|
||||||
|
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
|
||||||
|
import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
|
||||||
|
import org.apache.hadoop.mapreduce.v2.app.speculate.LegacyTaskRuntimeEstimator;
|
||||||
|
import org.apache.hadoop.mapreduce.v2.app.speculate.TaskRuntimeEstimator;
|
||||||
|
import org.junit.AfterClass;
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
public class TestSpeculativeExecution {
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This class is used to control when speculative execution happens.
|
||||||
|
*/
|
||||||
|
public static class TestSpecEstimator extends LegacyTaskRuntimeEstimator {
|
||||||
|
private static final long SPECULATE_THIS = 999999L;
|
||||||
|
|
||||||
|
public TestSpecEstimator() {
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This will only be called if speculative execution is turned on.
|
||||||
|
*
|
||||||
|
* If either mapper or reducer speculation is turned on, this will be
|
||||||
|
* called.
|
||||||
|
*
|
||||||
|
* This will cause speculation to engage for the first mapper or first
|
||||||
|
* reducer (that is, attempt ID "*_m_000000_0" or "*_r_000000_0")
|
||||||
|
*
|
||||||
|
* If this attempt is killed, the retry will have attempt id 1, so it
|
||||||
|
* will not engage speculation again.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public long estimatedRuntime(TaskAttemptId id) {
|
||||||
|
if ((id.getTaskId().getId() == 0) && (id.getId() == 0)) {
|
||||||
|
return SPECULATE_THIS;
|
||||||
|
}
|
||||||
|
return super.estimatedRuntime(id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final Log LOG = LogFactory.getLog(TestSpeculativeExecution.class);
|
||||||
|
|
||||||
|
protected static MiniMRYarnCluster mrCluster;
|
||||||
|
|
||||||
|
private static Configuration initialConf = new Configuration();
|
||||||
|
private static FileSystem localFs;
|
||||||
|
static {
|
||||||
|
try {
|
||||||
|
localFs = FileSystem.getLocal(initialConf);
|
||||||
|
} catch (IOException io) {
|
||||||
|
throw new RuntimeException("problem getting local fs", io);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Path TEST_ROOT_DIR =
|
||||||
|
new Path("target",TestSpeculativeExecution.class.getName() + "-tmpDir")
|
||||||
|
.makeQualified(localFs.getUri(), localFs.getWorkingDirectory());
|
||||||
|
static Path APP_JAR = new Path(TEST_ROOT_DIR, "MRAppJar.jar");
|
||||||
|
private static Path TEST_OUT_DIR = new Path(TEST_ROOT_DIR, "test.out.dir");
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public static void setup() throws IOException {
|
||||||
|
|
||||||
|
if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
|
||||||
|
LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR
|
||||||
|
+ " not found. Not running test.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mrCluster == null) {
|
||||||
|
mrCluster = new MiniMRYarnCluster(TestSpeculativeExecution.class.getName(), 4);
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
mrCluster.init(conf);
|
||||||
|
mrCluster.start();
|
||||||
|
}
|
||||||
|
|
||||||
|
// workaround the absent public distcache.
|
||||||
|
localFs.copyFromLocalFile(new Path(MiniMRYarnCluster.APPJAR), APP_JAR);
|
||||||
|
localFs.setPermission(APP_JAR, new FsPermission("700"));
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterClass
|
||||||
|
public static void tearDown() {
|
||||||
|
if (mrCluster != null) {
|
||||||
|
mrCluster.stop();
|
||||||
|
mrCluster = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class SpeculativeMapper extends
|
||||||
|
Mapper<Object, Text, Text, IntWritable> {
|
||||||
|
|
||||||
|
public void map(Object key, Text value, Context context)
|
||||||
|
throws IOException, InterruptedException {
|
||||||
|
// Make one mapper slower for speculative execution
|
||||||
|
TaskAttemptID taid = context.getTaskAttemptID();
|
||||||
|
long sleepTime = 100;
|
||||||
|
Configuration conf = context.getConfiguration();
|
||||||
|
boolean test_speculate_map =
|
||||||
|
conf.getBoolean(MRJobConfig.MAP_SPECULATIVE, false);
|
||||||
|
|
||||||
|
// IF TESTING MAPPER SPECULATIVE EXECUTION:
|
||||||
|
// Make the "*_m_000000_0" attempt take much longer than the others.
|
||||||
|
// When speculative execution is enabled, this should cause the attempt
|
||||||
|
// to be killed and restarted. At that point, the attempt ID will be
|
||||||
|
// "*_m_000000_1", so sleepTime will still remain 100ms.
|
||||||
|
if ( (taid.getTaskType() == TaskType.MAP) && test_speculate_map
|
||||||
|
&& (taid.getTaskID().getId() == 0) && (taid.getId() == 0)) {
|
||||||
|
sleepTime = 10000;
|
||||||
|
}
|
||||||
|
try{
|
||||||
|
Thread.sleep(sleepTime);
|
||||||
|
} catch(InterruptedException ie) {
|
||||||
|
// Ignore
|
||||||
|
}
|
||||||
|
context.write(value, new IntWritable(1));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class SpeculativeReducer extends
|
||||||
|
Reducer<Text,IntWritable,Text,IntWritable> {
|
||||||
|
|
||||||
|
public void reduce(Text key, Iterable<IntWritable> values,
|
||||||
|
Context context) throws IOException, InterruptedException {
|
||||||
|
// Make one reducer slower for speculative execution
|
||||||
|
TaskAttemptID taid = context.getTaskAttemptID();
|
||||||
|
long sleepTime = 100;
|
||||||
|
Configuration conf = context.getConfiguration();
|
||||||
|
boolean test_speculate_reduce =
|
||||||
|
conf.getBoolean(MRJobConfig.REDUCE_SPECULATIVE, false);
|
||||||
|
|
||||||
|
// IF TESTING REDUCE SPECULATIVE EXECUTION:
|
||||||
|
// Make the "*_r_000000_0" attempt take much longer than the others.
|
||||||
|
// When speculative execution is enabled, this should cause the attempt
|
||||||
|
// to be killed and restarted. At that point, the attempt ID will be
|
||||||
|
// "*_r_000000_1", so sleepTime will still remain 100ms.
|
||||||
|
if ( (taid.getTaskType() == TaskType.REDUCE) && test_speculate_reduce
|
||||||
|
&& (taid.getTaskID().getId() == 0) && (taid.getId() == 0)) {
|
||||||
|
sleepTime = 10000;
|
||||||
|
}
|
||||||
|
try{
|
||||||
|
Thread.sleep(sleepTime);
|
||||||
|
} catch(InterruptedException ie) {
|
||||||
|
// Ignore
|
||||||
|
}
|
||||||
|
context.write(key,new IntWritable(0));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSpeculativeExecution() throws Exception {
|
||||||
|
if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
|
||||||
|
LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR
|
||||||
|
+ " not found. Not running test.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*------------------------------------------------------------------
|
||||||
|
* Test that Map/Red does not speculate if MAP_SPECULATIVE and
|
||||||
|
* REDUCE_SPECULATIVE are both false.
|
||||||
|
* -----------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
Job job = runSpecTest(false, false);
|
||||||
|
|
||||||
|
boolean succeeded = job.waitForCompletion(true);
|
||||||
|
Assert.assertTrue(succeeded);
|
||||||
|
Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState());
|
||||||
|
Counters counters = job.getCounters();
|
||||||
|
Assert.assertEquals(2, counters.findCounter(JobCounter.TOTAL_LAUNCHED_MAPS)
|
||||||
|
.getValue());
|
||||||
|
Assert.assertEquals(2, counters.findCounter(JobCounter.TOTAL_LAUNCHED_REDUCES)
|
||||||
|
.getValue());
|
||||||
|
Assert.assertEquals(0, counters.findCounter(JobCounter.NUM_FAILED_MAPS)
|
||||||
|
.getValue());
|
||||||
|
|
||||||
|
/*----------------------------------------------------------------------
|
||||||
|
* Test that Mapper speculates if MAP_SPECULATIVE is true and
|
||||||
|
* REDUCE_SPECULATIVE is false.
|
||||||
|
* ---------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
job = runSpecTest(true, false);
|
||||||
|
|
||||||
|
succeeded = job.waitForCompletion(true);
|
||||||
|
Assert.assertTrue(succeeded);
|
||||||
|
Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState());
|
||||||
|
counters = job.getCounters();
|
||||||
|
|
||||||
|
// The long-running map will be killed and a new one started.
|
||||||
|
Assert.assertEquals(3, counters.findCounter(JobCounter.TOTAL_LAUNCHED_MAPS)
|
||||||
|
.getValue());
|
||||||
|
Assert.assertEquals(2, counters.findCounter(JobCounter.TOTAL_LAUNCHED_REDUCES)
|
||||||
|
.getValue());
|
||||||
|
Assert.assertEquals(1, counters.findCounter(JobCounter.NUM_FAILED_MAPS)
|
||||||
|
.getValue());
|
||||||
|
|
||||||
|
/*----------------------------------------------------------------------
|
||||||
|
* Test that Reducer speculates if REDUCE_SPECULATIVE is true and
|
||||||
|
* MAP_SPECULATIVE is false.
|
||||||
|
* ---------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
job = runSpecTest(false, true);
|
||||||
|
|
||||||
|
succeeded = job.waitForCompletion(true);
|
||||||
|
Assert.assertTrue(succeeded);
|
||||||
|
Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState());
|
||||||
|
counters = job.getCounters();
|
||||||
|
|
||||||
|
// The long-running map will be killed and a new one started.
|
||||||
|
Assert.assertEquals(2, counters.findCounter(JobCounter.TOTAL_LAUNCHED_MAPS)
|
||||||
|
.getValue());
|
||||||
|
Assert.assertEquals(3, counters.findCounter(JobCounter.TOTAL_LAUNCHED_REDUCES)
|
||||||
|
.getValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
private Path createTempFile(String filename, String contents)
|
||||||
|
throws IOException {
|
||||||
|
Path path = new Path(TEST_ROOT_DIR, filename);
|
||||||
|
FSDataOutputStream os = localFs.create(path);
|
||||||
|
os.writeBytes(contents);
|
||||||
|
os.close();
|
||||||
|
localFs.setPermission(path, new FsPermission("700"));
|
||||||
|
return path;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Job runSpecTest(boolean mapspec, boolean redspec)
|
||||||
|
throws IOException, ClassNotFoundException, InterruptedException {
|
||||||
|
|
||||||
|
Path first = createTempFile("specexec_map_input1", "a\nz");
|
||||||
|
Path secnd = createTempFile("specexec_map_input2", "a\nz");
|
||||||
|
|
||||||
|
Configuration conf = mrCluster.getConfig();
|
||||||
|
conf.setBoolean(MRJobConfig.MAP_SPECULATIVE,mapspec);
|
||||||
|
conf.setBoolean(MRJobConfig.REDUCE_SPECULATIVE,redspec);
|
||||||
|
conf.setClass(MRJobConfig.MR_AM_TASK_ESTIMATOR,
|
||||||
|
TestSpecEstimator.class,
|
||||||
|
TaskRuntimeEstimator.class);
|
||||||
|
|
||||||
|
Job job = Job.getInstance(conf);
|
||||||
|
job.setJarByClass(TestSpeculativeExecution.class);
|
||||||
|
job.setMapperClass(SpeculativeMapper.class);
|
||||||
|
job.setReducerClass(SpeculativeReducer.class);
|
||||||
|
job.setOutputKeyClass(Text.class);
|
||||||
|
job.setOutputValueClass(IntWritable.class);
|
||||||
|
job.setNumReduceTasks(2);
|
||||||
|
FileInputFormat.setInputPaths(job, first);
|
||||||
|
FileInputFormat.addInputPath(job, secnd);
|
||||||
|
FileOutputFormat.setOutputPath(job, TEST_OUT_DIR);
|
||||||
|
|
||||||
|
// Delete output directory if it exists.
|
||||||
|
try {
|
||||||
|
localFs.delete(TEST_OUT_DIR,true);
|
||||||
|
} catch (IOException e) {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
|
||||||
|
// Creates the Job Configuration
|
||||||
|
job.addFileToClassPath(APP_JAR); // The AppMaster jar itself.
|
||||||
|
job.createSymlink();
|
||||||
|
job.setMaxMapAttempts(2);
|
||||||
|
|
||||||
|
job.submit();
|
||||||
|
|
||||||
|
return job;
|
||||||
|
}
|
||||||
|
}
|
|
@ -205,6 +205,17 @@ public class BuilderUtils {
|
||||||
return nodeId;
|
return nodeId;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static ContainerStatus newContainerStatus(ContainerId containerId,
|
||||||
|
ContainerState containerState, String diagnostics, int exitStatus) {
|
||||||
|
ContainerStatus containerStatus = recordFactory
|
||||||
|
.newRecordInstance(ContainerStatus.class);
|
||||||
|
containerStatus.setState(containerState);
|
||||||
|
containerStatus.setContainerId(containerId);
|
||||||
|
containerStatus.setDiagnostics(diagnostics);
|
||||||
|
containerStatus.setExitStatus(exitStatus);
|
||||||
|
return containerStatus;
|
||||||
|
}
|
||||||
|
|
||||||
public static Container newContainer(ContainerId containerId,
|
public static Container newContainer(ContainerId containerId,
|
||||||
NodeId nodeId, String nodeHttpAddress,
|
NodeId nodeId, String nodeHttpAddress,
|
||||||
Resource resource, Priority priority, ContainerToken containerToken) {
|
Resource resource, Priority priority, ContainerToken containerToken) {
|
||||||
|
|
|
@ -109,6 +109,7 @@
|
||||||
</goals>
|
</goals>
|
||||||
<configuration>
|
<configuration>
|
||||||
<mainClass>org.apache.hadoop.yarn.util.VisualizeStateMachine</mainClass>
|
<mainClass>org.apache.hadoop.yarn.util.VisualizeStateMachine</mainClass>
|
||||||
|
<classpathScope>compile</classpathScope>
|
||||||
<arguments>
|
<arguments>
|
||||||
<argument>NodeManager</argument>
|
<argument>NodeManager</argument>
|
||||||
<argument>org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationImpl,
|
<argument>org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationImpl,
|
||||||
|
|
|
@ -65,6 +65,7 @@ import org.apache.hadoop.yarn.state.MultipleArcTransition;
|
||||||
import org.apache.hadoop.yarn.state.SingleArcTransition;
|
import org.apache.hadoop.yarn.state.SingleArcTransition;
|
||||||
import org.apache.hadoop.yarn.state.StateMachine;
|
import org.apache.hadoop.yarn.state.StateMachine;
|
||||||
import org.apache.hadoop.yarn.state.StateMachineFactory;
|
import org.apache.hadoop.yarn.state.StateMachineFactory;
|
||||||
|
import org.apache.hadoop.yarn.util.BuilderUtils;
|
||||||
import org.apache.hadoop.yarn.util.ConverterUtils;
|
import org.apache.hadoop.yarn.util.ConverterUtils;
|
||||||
|
|
||||||
public class ContainerImpl implements Container {
|
public class ContainerImpl implements Container {
|
||||||
|
@ -370,13 +371,8 @@ public class ContainerImpl implements Container {
|
||||||
public ContainerStatus cloneAndGetContainerStatus() {
|
public ContainerStatus cloneAndGetContainerStatus() {
|
||||||
this.readLock.lock();
|
this.readLock.lock();
|
||||||
try {
|
try {
|
||||||
ContainerStatus containerStatus =
|
return BuilderUtils.newContainerStatus(this.getContainerID(),
|
||||||
recordFactory.newRecordInstance(ContainerStatus.class);
|
getCurrentState(), diagnostics.toString(), exitCode);
|
||||||
containerStatus.setState(getCurrentState());
|
|
||||||
containerStatus.setContainerId(this.launchContext.getContainerId());
|
|
||||||
containerStatus.setDiagnostics(diagnostics.toString());
|
|
||||||
containerStatus.setExitStatus(exitCode);
|
|
||||||
return containerStatus;
|
|
||||||
} finally {
|
} finally {
|
||||||
this.readLock.unlock();
|
this.readLock.unlock();
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.hadoop.yarn.server.nodemanager.webapp;
|
||||||
|
|
||||||
import static org.apache.hadoop.yarn.util.StringHelper.pajoin;
|
import static org.apache.hadoop.yarn.util.StringHelper.pajoin;
|
||||||
|
|
||||||
|
import org.apache.commons.lang.StringUtils;
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
@ -65,6 +66,9 @@ public class WebServer extends AbstractService {
|
||||||
this.webApp =
|
this.webApp =
|
||||||
WebApps.$for("node", Context.class, this.nmContext, "ws")
|
WebApps.$for("node", Context.class, this.nmContext, "ws")
|
||||||
.at(bindAddress).with(getConfig()).start(this.nmWebApp);
|
.at(bindAddress).with(getConfig()).start(this.nmWebApp);
|
||||||
|
int port = this.webApp.httpServer().getPort();
|
||||||
|
String webAddress = StringUtils.split(bindAddress, ':')[0] + ":" + port;
|
||||||
|
getConfig().set(YarnConfiguration.NM_WEBAPP_ADDRESS, webAddress);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
String msg = "NMWebapps failed to start.";
|
String msg = "NMWebapps failed to start.";
|
||||||
LOG.error(msg, e);
|
LOG.error(msg, e);
|
||||||
|
|
|
@ -26,6 +26,7 @@ import java.io.FileWriter;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.Writer;
|
import java.io.Writer;
|
||||||
|
|
||||||
|
import org.apache.commons.lang.StringUtils;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FileUtil;
|
import org.apache.hadoop.fs.FileUtil;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||||
|
@ -51,6 +52,7 @@ import org.apache.hadoop.yarn.server.security.ApplicationACLsManager;
|
||||||
import org.apache.hadoop.yarn.util.BuilderUtils;
|
import org.apache.hadoop.yarn.util.BuilderUtils;
|
||||||
import org.apache.hadoop.yarn.util.ConverterUtils;
|
import org.apache.hadoop.yarn.util.ConverterUtils;
|
||||||
import org.junit.After;
|
import org.junit.After;
|
||||||
|
import org.junit.Assert;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
|
@ -73,6 +75,45 @@ public class TestNMWebServer {
|
||||||
FileUtil.fullyDelete(testLogDir);
|
FileUtil.fullyDelete(testLogDir);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private String startNMWebAppServer(String webAddr) {
|
||||||
|
Context nmContext = new NodeManager.NMContext();
|
||||||
|
ResourceView resourceView = new ResourceView() {
|
||||||
|
@Override
|
||||||
|
public long getVmemAllocatedForContainers() {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
@Override
|
||||||
|
public long getPmemAllocatedForContainers() {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
conf.set(YarnConfiguration.NM_LOCAL_DIRS, testRootDir.getAbsolutePath());
|
||||||
|
conf.set(YarnConfiguration.NM_LOG_DIRS, testLogDir.getAbsolutePath());
|
||||||
|
NodeHealthCheckerService healthChecker = new NodeHealthCheckerService();
|
||||||
|
healthChecker.init(conf);
|
||||||
|
LocalDirsHandlerService dirsHandler = healthChecker.getDiskHandler();
|
||||||
|
conf.set(YarnConfiguration.NM_WEBAPP_ADDRESS, webAddr);
|
||||||
|
WebServer server = new WebServer(nmContext, resourceView,
|
||||||
|
new ApplicationACLsManager(conf), dirsHandler);
|
||||||
|
server.init(conf);
|
||||||
|
server.start();
|
||||||
|
String webAppAddr = conf.get(YarnConfiguration.NM_WEBAPP_ADDRESS);
|
||||||
|
return StringUtils.split(webAppAddr, ':')[1];
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNMWebAppWithOutPort() throws IOException {
|
||||||
|
String port = startNMWebAppServer("0.0.0.0");
|
||||||
|
Assert.assertTrue("Port is not updated", Integer.parseInt(port) > 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNMWebAppWithEphemeralPort() throws IOException {
|
||||||
|
String port = startNMWebAppServer("0.0.0.0:0");
|
||||||
|
Assert.assertTrue("Port is not updated", Integer.parseInt(port) > 0);
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testNMWebApp() throws IOException {
|
public void testNMWebApp() throws IOException {
|
||||||
Context nmContext = new NodeManager.NMContext();
|
Context nmContext = new NodeManager.NMContext();
|
||||||
|
|
|
@ -137,6 +137,7 @@
|
||||||
</goals>
|
</goals>
|
||||||
<configuration>
|
<configuration>
|
||||||
<mainClass>org.apache.hadoop.yarn.util.VisualizeStateMachine</mainClass>
|
<mainClass>org.apache.hadoop.yarn.util.VisualizeStateMachine</mainClass>
|
||||||
|
<classpathScope>compile</classpathScope>
|
||||||
<arguments>
|
<arguments>
|
||||||
<argument>ResourceManager</argument>
|
<argument>ResourceManager</argument>
|
||||||
<argument>org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptImpl,
|
<argument>org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptImpl,
|
||||||
|
|
|
@ -67,16 +67,16 @@ import org.apache.hadoop.yarn.server.resourcemanager.security.DelegationTokenRen
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.webapp.RMWebApp;
|
import org.apache.hadoop.yarn.server.resourcemanager.webapp.RMWebApp;
|
||||||
import org.apache.hadoop.yarn.server.security.ApplicationACLsManager;
|
import org.apache.hadoop.yarn.server.security.ApplicationACLsManager;
|
||||||
import org.apache.hadoop.yarn.server.security.ContainerTokenSecretManager;
|
import org.apache.hadoop.yarn.server.security.ContainerTokenSecretManager;
|
||||||
|
import org.apache.hadoop.yarn.server.webproxy.AppReportFetcher;
|
||||||
|
import org.apache.hadoop.yarn.server.webproxy.ProxyUriUtils;
|
||||||
|
import org.apache.hadoop.yarn.server.webproxy.WebAppProxy;
|
||||||
|
import org.apache.hadoop.yarn.server.webproxy.WebAppProxyServlet;
|
||||||
import org.apache.hadoop.yarn.service.AbstractService;
|
import org.apache.hadoop.yarn.service.AbstractService;
|
||||||
import org.apache.hadoop.yarn.service.CompositeService;
|
import org.apache.hadoop.yarn.service.CompositeService;
|
||||||
import org.apache.hadoop.yarn.service.Service;
|
import org.apache.hadoop.yarn.service.Service;
|
||||||
import org.apache.hadoop.yarn.webapp.WebApp;
|
import org.apache.hadoop.yarn.webapp.WebApp;
|
||||||
import org.apache.hadoop.yarn.webapp.WebApps;
|
import org.apache.hadoop.yarn.webapp.WebApps;
|
||||||
import org.apache.hadoop.yarn.webapp.WebApps.Builder;
|
import org.apache.hadoop.yarn.webapp.WebApps.Builder;
|
||||||
import org.apache.hadoop.yarn.server.webproxy.AppReportFetcher;
|
|
||||||
import org.apache.hadoop.yarn.server.webproxy.ProxyUriUtils;
|
|
||||||
import org.apache.hadoop.yarn.server.webproxy.WebAppProxyServlet;
|
|
||||||
import org.apache.hadoop.yarn.server.webproxy.WebAppProxy;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The ResourceManager is the main class that is a set of components.
|
* The ResourceManager is the main class that is a set of components.
|
||||||
|
@ -256,7 +256,7 @@ public class ResourceManager extends CompositeService implements Recoverable {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Private
|
@Private
|
||||||
public static final class SchedulerEventDispatcher extends AbstractService
|
public static class SchedulerEventDispatcher extends AbstractService
|
||||||
implements EventHandler<SchedulerEvent> {
|
implements EventHandler<SchedulerEvent> {
|
||||||
|
|
||||||
private final ResourceScheduler scheduler;
|
private final ResourceScheduler scheduler;
|
||||||
|
|
|
@ -265,8 +265,8 @@ public class ResourceTrackerService extends AbstractService implements
|
||||||
HeartbeatResponse latestResponse = recordFactory
|
HeartbeatResponse latestResponse = recordFactory
|
||||||
.newRecordInstance(HeartbeatResponse.class);
|
.newRecordInstance(HeartbeatResponse.class);
|
||||||
latestResponse.setResponseId(lastHeartbeatResponse.getResponseId() + 1);
|
latestResponse.setResponseId(lastHeartbeatResponse.getResponseId() + 1);
|
||||||
latestResponse.addAllContainersToCleanup(rmNode.pullContainersToCleanUp());
|
latestResponse.addAllContainersToCleanup(rmNode.getContainersToCleanUp());
|
||||||
latestResponse.addAllApplicationsToCleanup(rmNode.pullAppsToCleanup());
|
latestResponse.addAllApplicationsToCleanup(rmNode.getAppsToCleanup());
|
||||||
latestResponse.setNodeAction(NodeAction.NORMAL);
|
latestResponse.setNodeAction(NodeAction.NORMAL);
|
||||||
|
|
||||||
// 4. Send status to RMNode, saving the latest response.
|
// 4. Send status to RMNode, saving the latest response.
|
||||||
|
|
|
@ -101,9 +101,9 @@ public interface RMNode {
|
||||||
|
|
||||||
public RMNodeState getState();
|
public RMNodeState getState();
|
||||||
|
|
||||||
public List<ContainerId> pullContainersToCleanUp();
|
public List<ContainerId> getContainersToCleanUp();
|
||||||
|
|
||||||
public List<ApplicationId> pullAppsToCleanup();
|
public List<ApplicationId> getAppsToCleanup();
|
||||||
|
|
||||||
public HeartbeatResponse getLastHeartBeatResponse();
|
public HeartbeatResponse getLastHeartBeatResponse();
|
||||||
}
|
}
|
|
@ -90,7 +90,6 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
|
||||||
private final Map<ContainerId, ContainerStatus> justLaunchedContainers =
|
private final Map<ContainerId, ContainerStatus> justLaunchedContainers =
|
||||||
new HashMap<ContainerId, ContainerStatus>();
|
new HashMap<ContainerId, ContainerStatus>();
|
||||||
|
|
||||||
|
|
||||||
/* set of containers that need to be cleaned */
|
/* set of containers that need to be cleaned */
|
||||||
private final Set<ContainerId> containersToClean = new TreeSet<ContainerId>(
|
private final Set<ContainerId> containersToClean = new TreeSet<ContainerId>(
|
||||||
new ContainerIdComparator());
|
new ContainerIdComparator());
|
||||||
|
@ -248,54 +247,38 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<ApplicationId> pullAppsToCleanup() {
|
public List<ApplicationId> getAppsToCleanup() {
|
||||||
this.writeLock.lock();
|
|
||||||
|
|
||||||
try {
|
|
||||||
List<ApplicationId> lastfinishedApplications = new ArrayList<ApplicationId>();
|
|
||||||
lastfinishedApplications.addAll(this.finishedApplications);
|
|
||||||
this.finishedApplications.clear();
|
|
||||||
return lastfinishedApplications;
|
|
||||||
} finally {
|
|
||||||
this.writeLock.unlock();
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
@Private
|
|
||||||
public List<ContainerId> getContainersToCleanUp() {
|
|
||||||
this.readLock.lock();
|
this.readLock.lock();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
return new ArrayList<ContainerId>(containersToClean);
|
return new ArrayList<ApplicationId>(this.finishedApplications);
|
||||||
} finally {
|
} finally {
|
||||||
this.readLock.unlock();
|
this.readLock.unlock();
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<ContainerId> pullContainersToCleanUp() {
|
public List<ContainerId> getContainersToCleanUp() {
|
||||||
|
|
||||||
this.writeLock.lock();
|
this.readLock.lock();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
List<ContainerId> containersToCleanUp = new ArrayList<ContainerId>();
|
return new ArrayList<ContainerId>(this.containersToClean);
|
||||||
containersToCleanUp.addAll(this.containersToClean);
|
|
||||||
this.containersToClean.clear();
|
|
||||||
return containersToCleanUp;
|
|
||||||
} finally {
|
} finally {
|
||||||
this.writeLock.unlock();
|
this.readLock.unlock();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public HeartbeatResponse getLastHeartBeatResponse() {
|
public HeartbeatResponse getLastHeartBeatResponse() {
|
||||||
|
|
||||||
this.writeLock.lock();
|
this.readLock.lock();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
return this.latestHeartBeatResponse;
|
return this.latestHeartBeatResponse;
|
||||||
} finally {
|
} finally {
|
||||||
this.writeLock.unlock();
|
this.readLock.unlock();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -407,13 +390,21 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
|
||||||
for (ContainerStatus remoteContainer : statusEvent.getContainers()) {
|
for (ContainerStatus remoteContainer : statusEvent.getContainers()) {
|
||||||
ContainerId containerId = remoteContainer.getContainerId();
|
ContainerId containerId = remoteContainer.getContainerId();
|
||||||
|
|
||||||
// Don't bother with containers already scheduled for cleanup,
|
// Don't bother with containers already scheduled for cleanup, or for
|
||||||
// the scheduler doens't need to know any more about this container
|
// applications already killed. The scheduler doens't need to know any
|
||||||
|
// more about this container
|
||||||
if (rmNode.containersToClean.contains(containerId)) {
|
if (rmNode.containersToClean.contains(containerId)) {
|
||||||
LOG.info("Container " + containerId + " already scheduled for " +
|
LOG.info("Container " + containerId + " already scheduled for " +
|
||||||
"cleanup, no further processing");
|
"cleanup, no further processing");
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
if (rmNode.finishedApplications.contains(containerId
|
||||||
|
.getApplicationAttemptId().getApplicationId())) {
|
||||||
|
LOG.info("Container " + containerId
|
||||||
|
+ " belongs to an application that is already killed,"
|
||||||
|
+ " no further processing");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
// Process running containers
|
// Process running containers
|
||||||
if (remoteContainer.getState() == ContainerState.RUNNING) {
|
if (remoteContainer.getState() == ContainerState.RUNNING) {
|
||||||
|
@ -435,6 +426,12 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
|
||||||
|
|
||||||
rmNode.context.getDelegationTokenRenewer().updateKeepAliveApplications(
|
rmNode.context.getDelegationTokenRenewer().updateKeepAliveApplications(
|
||||||
statusEvent.getKeepAliveAppIds());
|
statusEvent.getKeepAliveAppIds());
|
||||||
|
|
||||||
|
// HeartBeat processing from our end is done, as node pulls the following
|
||||||
|
// lists before sending status-updates. Clear data-structures
|
||||||
|
rmNode.containersToClean.clear();
|
||||||
|
rmNode.finishedApplications.clear();
|
||||||
|
|
||||||
return RMNodeState.RUNNING;
|
return RMNodeState.RUNNING;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,9 +39,9 @@ import org.apache.hadoop.yarn.api.records.Resource;
|
||||||
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
||||||
import org.apache.hadoop.yarn.factories.RecordFactory;
|
import org.apache.hadoop.yarn.factories.RecordFactory;
|
||||||
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
|
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
|
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger;
|
import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants;
|
import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.recovery.ApplicationsStore.ApplicationStore;
|
import org.apache.hadoop.yarn.server.resourcemanager.recovery.ApplicationsStore.ApplicationStore;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
|
import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
|
||||||
|
@ -52,6 +52,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerFini
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerReservedEvent;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerReservedEvent;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeCleanContainerEvent;
|
||||||
|
|
||||||
import com.google.common.collect.HashMultiset;
|
import com.google.common.collect.HashMultiset;
|
||||||
import com.google.common.collect.Multiset;
|
import com.google.common.collect.Multiset;
|
||||||
|
@ -61,6 +62,7 @@ import com.google.common.collect.Multiset;
|
||||||
* Each running Application in the RM corresponds to one instance
|
* Each running Application in the RM corresponds to one instance
|
||||||
* of this class.
|
* of this class.
|
||||||
*/
|
*/
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
public class SchedulerApp {
|
public class SchedulerApp {
|
||||||
|
|
||||||
private static final Log LOG = LogFactory.getLog(SchedulerApp.class);
|
private static final Log LOG = LogFactory.getLog(SchedulerApp.class);
|
||||||
|
@ -174,12 +176,19 @@ public class SchedulerApp {
|
||||||
this.appSchedulingInfo.stop(rmAppAttemptFinalState);
|
this.appSchedulingInfo.stop(rmAppAttemptFinalState);
|
||||||
}
|
}
|
||||||
|
|
||||||
synchronized public void containerLaunchedOnNode(ContainerId containerId) {
|
public synchronized void containerLaunchedOnNode(ContainerId containerId,
|
||||||
|
NodeId nodeId) {
|
||||||
// Inform the container
|
// Inform the container
|
||||||
RMContainer rmContainer =
|
RMContainer rmContainer =
|
||||||
getRMContainer(containerId);
|
getRMContainer(containerId);
|
||||||
rmContainer.handle(
|
if (rmContainer == null) {
|
||||||
new RMContainerEvent(containerId,
|
// Some unknown container sneaked into the system. Kill it.
|
||||||
|
this.rmContext.getDispatcher().getEventHandler()
|
||||||
|
.handle(new RMNodeCleanContainerEvent(nodeId, containerId));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
rmContainer.handle(new RMContainerEvent(containerId,
|
||||||
RMContainerEventType.LAUNCHED));
|
RMContainerEventType.LAUNCHED));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -57,6 +57,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAt
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeCleanContainerEvent;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
|
||||||
|
@ -76,6 +77,7 @@ import org.apache.hadoop.yarn.server.security.ContainerTokenSecretManager;
|
||||||
|
|
||||||
@LimitedPrivate("yarn")
|
@LimitedPrivate("yarn")
|
||||||
@Evolving
|
@Evolving
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
public class CapacityScheduler
|
public class CapacityScheduler
|
||||||
implements ResourceScheduler, CapacitySchedulerContext {
|
implements ResourceScheduler, CapacitySchedulerContext {
|
||||||
|
|
||||||
|
@ -588,10 +590,12 @@ implements ResourceScheduler, CapacitySchedulerContext {
|
||||||
LOG.info("Unknown application: " + applicationAttemptId +
|
LOG.info("Unknown application: " + applicationAttemptId +
|
||||||
" launched container " + containerId +
|
" launched container " + containerId +
|
||||||
" on node: " + node);
|
" on node: " + node);
|
||||||
|
this.rmContext.getDispatcher().getEventHandler()
|
||||||
|
.handle(new RMNodeCleanContainerEvent(node.getNodeID(), containerId));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
application.containerLaunchedOnNode(containerId);
|
application.containerLaunchedOnNode(containerId, node.getNodeID());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -65,6 +65,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptS
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeCleanContainerEvent;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Queue;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Queue;
|
||||||
|
@ -87,6 +88,7 @@ import org.apache.hadoop.yarn.util.BuilderUtils;
|
||||||
|
|
||||||
@LimitedPrivate("yarn")
|
@LimitedPrivate("yarn")
|
||||||
@Evolving
|
@Evolving
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
public class FifoScheduler implements ResourceScheduler {
|
public class FifoScheduler implements ResourceScheduler {
|
||||||
|
|
||||||
private static final Log LOG = LogFactory.getLog(FifoScheduler.class);
|
private static final Log LOG = LogFactory.getLog(FifoScheduler.class);
|
||||||
|
@ -282,7 +284,6 @@ public class FifoScheduler implements ResourceScheduler {
|
||||||
return nodes.get(nodeId);
|
return nodes.get(nodeId);
|
||||||
}
|
}
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
private synchronized void addApplication(ApplicationAttemptId appAttemptId,
|
private synchronized void addApplication(ApplicationAttemptId appAttemptId,
|
||||||
String user) {
|
String user) {
|
||||||
// TODO: Fix store
|
// TODO: Fix store
|
||||||
|
@ -655,10 +656,14 @@ public class FifoScheduler implements ResourceScheduler {
|
||||||
LOG.info("Unknown application: " + applicationAttemptId +
|
LOG.info("Unknown application: " + applicationAttemptId +
|
||||||
" launched container " + containerId +
|
" launched container " + containerId +
|
||||||
" on node: " + node);
|
" on node: " + node);
|
||||||
|
// Some unknown container sneaked into the system. Kill it.
|
||||||
|
this.rmContext.getDispatcher().getEventHandler()
|
||||||
|
.handle(new RMNodeCleanContainerEvent(node.getNodeID(), containerId));
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
application.containerLaunchedOnNode(containerId);
|
application.containerLaunchedOnNode(containerId, node.getNodeID());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Lock(FifoScheduler.class)
|
@Lock(FifoScheduler.class)
|
||||||
|
|
|
@ -39,15 +39,17 @@ public class MockNM {
|
||||||
|
|
||||||
private int responseId;
|
private int responseId;
|
||||||
private NodeId nodeId;
|
private NodeId nodeId;
|
||||||
private final String nodeIdStr;
|
|
||||||
private final int memory;
|
private final int memory;
|
||||||
private final ResourceTrackerService resourceTracker;
|
private final ResourceTrackerService resourceTracker;
|
||||||
private final int httpPort = 2;
|
private final int httpPort = 2;
|
||||||
|
|
||||||
MockNM(String nodeIdStr, int memory, ResourceTrackerService resourceTracker) {
|
MockNM(String nodeIdStr, int memory, ResourceTrackerService resourceTracker) {
|
||||||
this.nodeIdStr = nodeIdStr;
|
|
||||||
this.memory = memory;
|
this.memory = memory;
|
||||||
this.resourceTracker = resourceTracker;
|
this.resourceTracker = resourceTracker;
|
||||||
|
String[] splits = nodeIdStr.split(":");
|
||||||
|
nodeId = Records.newRecord(NodeId.class);
|
||||||
|
nodeId.setHost(splits[0]);
|
||||||
|
nodeId.setPort(Integer.parseInt(splits[1]));
|
||||||
}
|
}
|
||||||
|
|
||||||
public NodeId getNodeId() {
|
public NodeId getNodeId() {
|
||||||
|
@ -63,14 +65,10 @@ public class MockNM {
|
||||||
new HashMap<ApplicationId, List<ContainerStatus>>();
|
new HashMap<ApplicationId, List<ContainerStatus>>();
|
||||||
conts.put(container.getId().getApplicationAttemptId().getApplicationId(),
|
conts.put(container.getId().getApplicationAttemptId().getApplicationId(),
|
||||||
Arrays.asList(new ContainerStatus[] { container.getContainerStatus() }));
|
Arrays.asList(new ContainerStatus[] { container.getContainerStatus() }));
|
||||||
nodeHeartbeat(conts, true,nodeId);
|
nodeHeartbeat(conts, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
public NodeId registerNode() throws Exception {
|
public NodeId registerNode() throws Exception {
|
||||||
String[] splits = nodeIdStr.split(":");
|
|
||||||
nodeId = Records.newRecord(NodeId.class);
|
|
||||||
nodeId.setHost(splits[0]);
|
|
||||||
nodeId.setPort(Integer.parseInt(splits[1]));
|
|
||||||
RegisterNodeManagerRequest req = Records.newRecord(
|
RegisterNodeManagerRequest req = Records.newRecord(
|
||||||
RegisterNodeManagerRequest.class);
|
RegisterNodeManagerRequest.class);
|
||||||
req.setNodeId(nodeId);
|
req.setNodeId(nodeId);
|
||||||
|
@ -83,11 +81,11 @@ public class MockNM {
|
||||||
}
|
}
|
||||||
|
|
||||||
public HeartbeatResponse nodeHeartbeat(boolean b) throws Exception {
|
public HeartbeatResponse nodeHeartbeat(boolean b) throws Exception {
|
||||||
return nodeHeartbeat(new HashMap<ApplicationId, List<ContainerStatus>>(), b,nodeId);
|
return nodeHeartbeat(new HashMap<ApplicationId, List<ContainerStatus>>(), b);
|
||||||
}
|
}
|
||||||
|
|
||||||
public HeartbeatResponse nodeHeartbeat(Map<ApplicationId,
|
public HeartbeatResponse nodeHeartbeat(Map<ApplicationId,
|
||||||
List<ContainerStatus>> conts, boolean isHealthy, NodeId nodeId) throws Exception {
|
List<ContainerStatus>> conts, boolean isHealthy) throws Exception {
|
||||||
NodeHeartbeatRequest req = Records.newRecord(NodeHeartbeatRequest.class);
|
NodeHeartbeatRequest req = Records.newRecord(NodeHeartbeatRequest.class);
|
||||||
NodeStatus status = Records.newRecord(NodeStatus.class);
|
NodeStatus status = Records.newRecord(NodeStatus.class);
|
||||||
status.setNodeId(nodeId);
|
status.setNodeId(nodeId);
|
||||||
|
|
|
@ -152,13 +152,13 @@ public class MockNodes {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<ApplicationId> pullAppsToCleanup() {
|
public List<ApplicationId> getAppsToCleanup() {
|
||||||
// TODO Auto-generated method stub
|
// TODO Auto-generated method stub
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<ContainerId> pullContainersToCleanUp() {
|
public List<ContainerId> getContainersToCleanUp() {
|
||||||
// TODO Auto-generated method stub
|
// TODO Auto-generated method stub
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,26 +19,39 @@
|
||||||
package org.apache.hadoop.yarn.server.resourcemanager;
|
package org.apache.hadoop.yarn.server.resourcemanager;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
import junit.framework.Assert;
|
import junit.framework.Assert;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||||
import org.apache.hadoop.yarn.api.records.Container;
|
import org.apache.hadoop.yarn.api.records.Container;
|
||||||
import org.apache.hadoop.yarn.api.records.ContainerId;
|
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ContainerState;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ContainerStatus;
|
||||||
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
||||||
|
import org.apache.hadoop.yarn.event.Dispatcher;
|
||||||
|
import org.apache.hadoop.yarn.event.DrainDispatcher;
|
||||||
|
import org.apache.hadoop.yarn.event.EventHandler;
|
||||||
import org.apache.hadoop.yarn.server.api.records.HeartbeatResponse;
|
import org.apache.hadoop.yarn.server.api.records.HeartbeatResponse;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent;
|
||||||
|
import org.apache.hadoop.yarn.util.BuilderUtils;
|
||||||
import org.apache.log4j.Level;
|
import org.apache.log4j.Level;
|
||||||
import org.apache.log4j.LogManager;
|
import org.apache.log4j.LogManager;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import org.mortbay.log.Log;
|
|
||||||
|
|
||||||
public class TestApplicationCleanup {
|
public class TestApplicationCleanup {
|
||||||
|
|
||||||
|
private static final Log LOG = LogFactory
|
||||||
|
.getLog(TestApplicationCleanup.class);
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testAppCleanup() throws Exception {
|
public void testAppCleanup() throws Exception {
|
||||||
Logger rootLogger = LogManager.getRootLogger();
|
Logger rootLogger = LogManager.getRootLogger();
|
||||||
|
@ -67,11 +80,13 @@ public class TestApplicationCleanup {
|
||||||
List<Container> conts = am.allocate(new ArrayList<ResourceRequest>(),
|
List<Container> conts = am.allocate(new ArrayList<ResourceRequest>(),
|
||||||
new ArrayList<ContainerId>()).getAllocatedContainers();
|
new ArrayList<ContainerId>()).getAllocatedContainers();
|
||||||
int contReceived = conts.size();
|
int contReceived = conts.size();
|
||||||
while (contReceived < request) {
|
int waitCount = 0;
|
||||||
|
while (contReceived < request && waitCount++ < 20) {
|
||||||
conts = am.allocate(new ArrayList<ResourceRequest>(),
|
conts = am.allocate(new ArrayList<ResourceRequest>(),
|
||||||
new ArrayList<ContainerId>()).getAllocatedContainers();
|
new ArrayList<ContainerId>()).getAllocatedContainers();
|
||||||
contReceived += conts.size();
|
contReceived += conts.size();
|
||||||
Log.info("Got " + contReceived + " containers. Waiting to get " + request);
|
LOG.info("Got " + contReceived + " containers. Waiting to get "
|
||||||
|
+ request);
|
||||||
Thread.sleep(2000);
|
Thread.sleep(2000);
|
||||||
}
|
}
|
||||||
Assert.assertEquals(request, conts.size());
|
Assert.assertEquals(request, conts.size());
|
||||||
|
@ -86,11 +101,12 @@ public class TestApplicationCleanup {
|
||||||
|
|
||||||
//currently only containers are cleaned via this
|
//currently only containers are cleaned via this
|
||||||
//AM container is cleaned via container launcher
|
//AM container is cleaned via container launcher
|
||||||
while (cleanedConts < 2 || cleanedApps < 1) {
|
waitCount = 0;
|
||||||
|
while ((cleanedConts < 3 || cleanedApps < 1) && waitCount++ < 20) {
|
||||||
HeartbeatResponse resp = nm1.nodeHeartbeat(true);
|
HeartbeatResponse resp = nm1.nodeHeartbeat(true);
|
||||||
contsToClean = resp.getContainersToCleanupList();
|
contsToClean = resp.getContainersToCleanupList();
|
||||||
apps = resp.getApplicationsToCleanupList();
|
apps = resp.getApplicationsToCleanupList();
|
||||||
Log.info("Waiting to get cleanup events.. cleanedConts: "
|
LOG.info("Waiting to get cleanup events.. cleanedConts: "
|
||||||
+ cleanedConts + " cleanedApps: " + cleanedApps);
|
+ cleanedConts + " cleanedApps: " + cleanedApps);
|
||||||
cleanedConts += contsToClean.size();
|
cleanedConts += contsToClean.size();
|
||||||
cleanedApps += apps.size();
|
cleanedApps += apps.size();
|
||||||
|
@ -99,6 +115,130 @@ public class TestApplicationCleanup {
|
||||||
|
|
||||||
Assert.assertEquals(1, apps.size());
|
Assert.assertEquals(1, apps.size());
|
||||||
Assert.assertEquals(app.getApplicationId(), apps.get(0));
|
Assert.assertEquals(app.getApplicationId(), apps.get(0));
|
||||||
|
Assert.assertEquals(1, cleanedApps);
|
||||||
|
Assert.assertEquals(3, cleanedConts);
|
||||||
|
|
||||||
|
rm.stop();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testContainerCleanup() throws Exception {
|
||||||
|
|
||||||
|
Logger rootLogger = LogManager.getRootLogger();
|
||||||
|
rootLogger.setLevel(Level.DEBUG);
|
||||||
|
final DrainDispatcher dispatcher = new DrainDispatcher();
|
||||||
|
MockRM rm = new MockRM() {
|
||||||
|
@Override
|
||||||
|
protected EventHandler<SchedulerEvent> createSchedulerEventDispatcher() {
|
||||||
|
return new SchedulerEventDispatcher(this.scheduler) {
|
||||||
|
@Override
|
||||||
|
public void handle(SchedulerEvent event) {
|
||||||
|
scheduler.handle(event);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Dispatcher createDispatcher() {
|
||||||
|
return dispatcher;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
rm.start();
|
||||||
|
|
||||||
|
MockNM nm1 = rm.registerNode("h1:1234", 5000);
|
||||||
|
|
||||||
|
RMApp app = rm.submitApp(2000);
|
||||||
|
|
||||||
|
//kick the scheduling
|
||||||
|
nm1.nodeHeartbeat(true);
|
||||||
|
|
||||||
|
RMAppAttempt attempt = app.getCurrentAppAttempt();
|
||||||
|
MockAM am = rm.sendAMLaunched(attempt.getAppAttemptId());
|
||||||
|
am.registerAppAttempt();
|
||||||
|
|
||||||
|
//request for containers
|
||||||
|
int request = 2;
|
||||||
|
am.allocate("h1" , 1000, request,
|
||||||
|
new ArrayList<ContainerId>());
|
||||||
|
dispatcher.await();
|
||||||
|
|
||||||
|
//kick the scheduler
|
||||||
|
nm1.nodeHeartbeat(true);
|
||||||
|
List<Container> conts = am.allocate(new ArrayList<ResourceRequest>(),
|
||||||
|
new ArrayList<ContainerId>()).getAllocatedContainers();
|
||||||
|
int contReceived = conts.size();
|
||||||
|
int waitCount = 0;
|
||||||
|
while (contReceived < request && waitCount++ < 20) {
|
||||||
|
conts = am.allocate(new ArrayList<ResourceRequest>(),
|
||||||
|
new ArrayList<ContainerId>()).getAllocatedContainers();
|
||||||
|
dispatcher.await();
|
||||||
|
contReceived += conts.size();
|
||||||
|
LOG.info("Got " + contReceived + " containers. Waiting to get "
|
||||||
|
+ request);
|
||||||
|
Thread.sleep(2000);
|
||||||
|
}
|
||||||
|
Assert.assertEquals(request, conts.size());
|
||||||
|
|
||||||
|
// Release a container.
|
||||||
|
ArrayList<ContainerId> release = new ArrayList<ContainerId>();
|
||||||
|
release.add(conts.get(1).getId());
|
||||||
|
am.allocate(new ArrayList<ResourceRequest>(), release);
|
||||||
|
dispatcher.await();
|
||||||
|
|
||||||
|
// Send one more heartbeat with a fake running container. This is to
|
||||||
|
// simulate the situation that can happen if the NM reports that container
|
||||||
|
// is running in the same heartbeat when the RM asks it to clean it up.
|
||||||
|
Map<ApplicationId, List<ContainerStatus>> containerStatuses =
|
||||||
|
new HashMap<ApplicationId, List<ContainerStatus>>();
|
||||||
|
ArrayList<ContainerStatus> containerStatusList =
|
||||||
|
new ArrayList<ContainerStatus>();
|
||||||
|
containerStatusList.add(BuilderUtils.newContainerStatus(conts.get(1)
|
||||||
|
.getId(), ContainerState.RUNNING, "nothing", 0));
|
||||||
|
containerStatuses.put(app.getApplicationId(), containerStatusList);
|
||||||
|
|
||||||
|
HeartbeatResponse resp = nm1.nodeHeartbeat(containerStatuses, true);
|
||||||
|
dispatcher.await();
|
||||||
|
List<ContainerId> contsToClean = resp.getContainersToCleanupList();
|
||||||
|
int cleanedConts = contsToClean.size();
|
||||||
|
waitCount = 0;
|
||||||
|
while (cleanedConts < 1 && waitCount++ < 20) {
|
||||||
|
resp = nm1.nodeHeartbeat(true);
|
||||||
|
dispatcher.await();
|
||||||
|
contsToClean = resp.getContainersToCleanupList();
|
||||||
|
LOG.info("Waiting to get cleanup events.. cleanedConts: " + cleanedConts);
|
||||||
|
cleanedConts += contsToClean.size();
|
||||||
|
Thread.sleep(1000);
|
||||||
|
}
|
||||||
|
LOG.info("Got cleanup for " + contsToClean.get(0));
|
||||||
|
Assert.assertEquals(1, cleanedConts);
|
||||||
|
|
||||||
|
// Now to test the case when RM already gave cleanup, and NM suddenly
|
||||||
|
// realizes that the container is running.
|
||||||
|
LOG.info("Testing container launch much after release and "
|
||||||
|
+ "NM getting cleanup");
|
||||||
|
containerStatuses.clear();
|
||||||
|
containerStatusList.clear();
|
||||||
|
containerStatusList.add(BuilderUtils.newContainerStatus(conts.get(1)
|
||||||
|
.getId(), ContainerState.RUNNING, "nothing", 0));
|
||||||
|
containerStatuses.put(app.getApplicationId(), containerStatusList);
|
||||||
|
|
||||||
|
resp = nm1.nodeHeartbeat(containerStatuses, true);
|
||||||
|
dispatcher.await();
|
||||||
|
contsToClean = resp.getContainersToCleanupList();
|
||||||
|
cleanedConts = contsToClean.size();
|
||||||
|
// The cleanup list won't be instantaneous as it is given out by scheduler
|
||||||
|
// and not RMNodeImpl.
|
||||||
|
waitCount = 0;
|
||||||
|
while (cleanedConts < 1 && waitCount++ < 20) {
|
||||||
|
resp = nm1.nodeHeartbeat(true);
|
||||||
|
dispatcher.await();
|
||||||
|
contsToClean = resp.getContainersToCleanupList();
|
||||||
|
LOG.info("Waiting to get cleanup events.. cleanedConts: " + cleanedConts);
|
||||||
|
cleanedConts += contsToClean.size();
|
||||||
|
Thread.sleep(1000);
|
||||||
|
}
|
||||||
|
LOG.info("Got cleanup for " + contsToClean.get(0));
|
||||||
|
Assert.assertEquals(1, cleanedConts);
|
||||||
|
|
||||||
rm.stop();
|
rm.stop();
|
||||||
}
|
}
|
||||||
|
|
|
@ -164,8 +164,7 @@ public class TestResourceTrackerService {
|
||||||
Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction()));
|
Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction()));
|
||||||
|
|
||||||
nodeHeartbeat = nm2.nodeHeartbeat(
|
nodeHeartbeat = nm2.nodeHeartbeat(
|
||||||
new HashMap<ApplicationId, List<ContainerStatus>>(), true,
|
new HashMap<ApplicationId, List<ContainerStatus>>(), true);
|
||||||
recordFactory.newRecordInstance(NodeId.class));
|
|
||||||
Assert.assertTrue(NodeAction.REBOOT.equals(nodeHeartbeat.getNodeAction()));
|
Assert.assertTrue(NodeAction.REBOOT.equals(nodeHeartbeat.getNodeAction()));
|
||||||
checkRebootedNMCount(rm, ++initialMetricCount);
|
checkRebootedNMCount(rm, ++initialMetricCount);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue