YARN-974. Added more information to RMContainer to be collected and recorded in Application-History. Contributed by Zhijie Shen.
svn merge --ignore-ancestry -c 1556733 ../YARN-321 git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1562188 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e8a09094f5
commit
f8cd06194d
|
@ -487,6 +487,9 @@ Branch YARN-321: Generic ApplicationHistoryService
|
|||
and Containers from ApplicationHistoryProtocol. (Mayank Bansal and Zhijie Shen
|
||||
via vinodkv)
|
||||
|
||||
YARN-974. Added more information to RMContainer to be collected and recorded in
|
||||
Application-History. (Zhijie Shen via vinodkv)
|
||||
|
||||
Release 2.2.0 - 2013-10-13
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
|
|
@ -21,6 +21,7 @@ package org.apache.hadoop.yarn.server.resourcemanager.rmcontainer;
|
|||
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||
import org.apache.hadoop.yarn.api.records.Container;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerState;
|
||||
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||
import org.apache.hadoop.yarn.api.records.Priority;
|
||||
import org.apache.hadoop.yarn.api.records.Resource;
|
||||
|
@ -50,4 +51,22 @@ public interface RMContainer extends EventHandler<RMContainerEvent> {
|
|||
|
||||
Priority getReservedPriority();
|
||||
|
||||
Resource getAllocatedResource();
|
||||
|
||||
NodeId getAllocatedNode();
|
||||
|
||||
Priority getAllocatedPriority();
|
||||
|
||||
long getStartTime();
|
||||
|
||||
long getFinishTime();
|
||||
|
||||
String getDiagnosticsInfo();
|
||||
|
||||
String getLogURL();
|
||||
|
||||
int getContainerExitStatus();
|
||||
|
||||
ContainerState getContainerState();
|
||||
|
||||
}
|
||||
|
|
|
@ -18,6 +18,8 @@
|
|||
|
||||
package org.apache.hadoop.yarn.server.resourcemanager.rmcontainer;
|
||||
|
||||
import static org.apache.hadoop.yarn.util.StringHelper.join;
|
||||
|
||||
import java.util.EnumSet;
|
||||
import java.util.concurrent.locks.ReentrantReadWriteLock;
|
||||
import java.util.concurrent.locks.ReentrantReadWriteLock.ReadLock;
|
||||
|
@ -25,9 +27,12 @@ import java.util.concurrent.locks.ReentrantReadWriteLock.WriteLock;
|
|||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.http.HttpConfig;
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||
import org.apache.hadoop.yarn.api.records.Container;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerState;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerStatus;
|
||||
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||
import org.apache.hadoop.yarn.api.records.Priority;
|
||||
import org.apache.hadoop.yarn.api.records.Resource;
|
||||
|
@ -40,6 +45,7 @@ import org.apache.hadoop.yarn.state.InvalidStateTransitonException;
|
|||
import org.apache.hadoop.yarn.state.SingleArcTransition;
|
||||
import org.apache.hadoop.yarn.state.StateMachine;
|
||||
import org.apache.hadoop.yarn.state.StateMachineFactory;
|
||||
import org.apache.hadoop.yarn.util.ConverterUtils;
|
||||
|
||||
@SuppressWarnings({"unchecked", "rawtypes"})
|
||||
public class RMContainerImpl implements RMContainer {
|
||||
|
@ -135,15 +141,21 @@ public class RMContainerImpl implements RMContainer {
|
|||
private final Container container;
|
||||
private final EventHandler eventHandler;
|
||||
private final ContainerAllocationExpirer containerAllocationExpirer;
|
||||
private final String user;
|
||||
|
||||
private Resource reservedResource;
|
||||
private NodeId reservedNode;
|
||||
private Priority reservedPriority;
|
||||
private long startTime;
|
||||
private long finishTime;
|
||||
private String logURL;
|
||||
private ContainerStatus finishedStatus;
|
||||
|
||||
public RMContainerImpl(Container container,
|
||||
ApplicationAttemptId appAttemptId, NodeId nodeId,
|
||||
EventHandler handler,
|
||||
ContainerAllocationExpirer containerAllocationExpirer) {
|
||||
ContainerAllocationExpirer containerAllocationExpirer,
|
||||
String user) {
|
||||
this.stateMachine = stateMachineFactory.make(this);
|
||||
this.containerId = container.getId();
|
||||
this.nodeId = nodeId;
|
||||
|
@ -151,6 +163,8 @@ public class RMContainerImpl implements RMContainer {
|
|||
this.appAttemptId = appAttemptId;
|
||||
this.eventHandler = handler;
|
||||
this.containerAllocationExpirer = containerAllocationExpirer;
|
||||
this.user = user;
|
||||
this.startTime = System.currentTimeMillis();
|
||||
|
||||
ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
|
||||
this.readLock = lock.readLock();
|
||||
|
@ -197,7 +211,77 @@ public class RMContainerImpl implements RMContainer {
|
|||
public Priority getReservedPriority() {
|
||||
return reservedPriority;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Resource getAllocatedResource() {
|
||||
return container.getResource();
|
||||
}
|
||||
|
||||
@Override
|
||||
public NodeId getAllocatedNode() {
|
||||
return container.getNodeId();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Priority getAllocatedPriority() {
|
||||
return container.getPriority();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getStartTime() {
|
||||
return startTime;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getFinishTime() {
|
||||
try {
|
||||
readLock.lock();
|
||||
return finishTime;
|
||||
} finally {
|
||||
readLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDiagnosticsInfo() {
|
||||
try {
|
||||
readLock.lock();
|
||||
return finishedStatus.getDiagnostics();
|
||||
} finally {
|
||||
readLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getLogURL() {
|
||||
try {
|
||||
readLock.lock();
|
||||
return logURL;
|
||||
} finally {
|
||||
readLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getContainerExitStatus() {
|
||||
try {
|
||||
readLock.lock();
|
||||
return finishedStatus.getExitStatus();
|
||||
} finally {
|
||||
readLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public ContainerState getContainerState() {
|
||||
try {
|
||||
readLock.lock();
|
||||
return finishedStatus.getState();
|
||||
} finally {
|
||||
readLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return containerId.toString();
|
||||
|
@ -276,6 +360,12 @@ public class RMContainerImpl implements RMContainer {
|
|||
|
||||
@Override
|
||||
public void transition(RMContainerImpl container, RMContainerEvent event) {
|
||||
// The logs of running containers should be found on NM webUI
|
||||
// The logs should be accessible after the container is launched
|
||||
container.logURL = join(HttpConfig.getSchemePrefix(),
|
||||
container.container.getNodeHttpAddress(), "/node", "/containerlogs/",
|
||||
ConverterUtils.toString(container.containerId), "/",
|
||||
container.user);
|
||||
// Unregister from containerAllocationExpirer.
|
||||
container.containerAllocationExpirer.unregister(container
|
||||
.getContainerId());
|
||||
|
@ -288,6 +378,11 @@ public class RMContainerImpl implements RMContainer {
|
|||
public void transition(RMContainerImpl container, RMContainerEvent event) {
|
||||
RMContainerFinishedEvent finishedEvent = (RMContainerFinishedEvent) event;
|
||||
|
||||
container.finishTime = System.currentTimeMillis();
|
||||
container.finishedStatus = finishedEvent.getRemoteContainerStatus();
|
||||
// TODO: when AHS webUI is ready, logURL should be updated to point to
|
||||
// the web page that will show the aggregated logs
|
||||
|
||||
// Inform AppAttempt
|
||||
container.eventHandler.handle(new RMAppAttemptContainerFinishedEvent(
|
||||
container.appAttemptId, finishedEvent.getRemoteContainerStatus()));
|
||||
|
|
|
@ -234,7 +234,8 @@ public abstract class SchedulerApplicationAttempt {
|
|||
rmContainer =
|
||||
new RMContainerImpl(container, getApplicationAttemptId(),
|
||||
node.getNodeID(), rmContext.getDispatcher().getEventHandler(),
|
||||
rmContext.getContainerAllocationExpirer());
|
||||
rmContext.getContainerAllocationExpirer(),
|
||||
appSchedulingInfo.getUser());
|
||||
|
||||
Resources.addTo(currentReservation, container.getResource());
|
||||
|
||||
|
|
|
@ -123,7 +123,7 @@ public class FiCaSchedulerApp extends SchedulerApplicationAttempt {
|
|||
RMContainer rmContainer = new RMContainerImpl(container, this
|
||||
.getApplicationAttemptId(), node.getNodeID(), this.rmContext
|
||||
.getDispatcher().getEventHandler(), this.rmContext
|
||||
.getContainerAllocationExpirer());
|
||||
.getContainerAllocationExpirer(), appSchedulingInfo.getUser());
|
||||
|
||||
// Add it to allContainers list.
|
||||
newlyAllocatedContainers.add(rmContainer);
|
||||
|
|
|
@ -273,7 +273,7 @@ public class FSSchedulerApp extends SchedulerApplicationAttempt {
|
|||
RMContainer rmContainer = new RMContainerImpl(container,
|
||||
getApplicationAttemptId(), node.getNodeID(), rmContext
|
||||
.getDispatcher().getEventHandler(), rmContext
|
||||
.getContainerAllocationExpirer());
|
||||
.getContainerAllocationExpirer(), appSchedulingInfo.getUser());
|
||||
|
||||
// Add it to allContainers list.
|
||||
newlyAllocatedContainers.add(rmContainer);
|
||||
|
|
|
@ -26,7 +26,9 @@ import static org.mockito.Mockito.verify;
|
|||
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||
import org.apache.hadoop.yarn.api.records.Container;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerState;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerStatus;
|
||||
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||
import org.apache.hadoop.yarn.api.records.Priority;
|
||||
|
@ -46,6 +48,7 @@ import org.mockito.ArgumentCaptor;
|
|||
@SuppressWarnings({ "unchecked", "rawtypes" })
|
||||
public class TestRMContainerImpl {
|
||||
|
||||
@SuppressWarnings("resource")
|
||||
@Test
|
||||
public void testReleaseWhileRunning() {
|
||||
|
||||
|
@ -72,9 +75,12 @@ public class TestRMContainerImpl {
|
|||
"host:3465", resource, priority, null);
|
||||
|
||||
RMContainer rmContainer = new RMContainerImpl(container, appAttemptId,
|
||||
nodeId, eventHandler, expirer);
|
||||
nodeId, eventHandler, expirer, "user");
|
||||
|
||||
assertEquals(RMContainerState.NEW, rmContainer.getState());
|
||||
assertEquals(resource, rmContainer.getAllocatedResource());
|
||||
assertEquals(nodeId, rmContainer.getAllocatedNode());
|
||||
assertEquals(priority, rmContainer.getAllocatedPriority());
|
||||
|
||||
rmContainer.handle(new RMContainerEvent(containerId,
|
||||
RMContainerEventType.START));
|
||||
|
@ -90,6 +96,9 @@ public class TestRMContainerImpl {
|
|||
RMContainerEventType.LAUNCHED));
|
||||
drainDispatcher.await();
|
||||
assertEquals(RMContainerState.RUNNING, rmContainer.getState());
|
||||
assertEquals(
|
||||
"http://host:3465/node/containerlogs/container_1_0001_01_000001/user",
|
||||
rmContainer.getLogURL());
|
||||
|
||||
// In RUNNING state. Verify RELEASED and associated actions.
|
||||
reset(appAttemptEventHandler);
|
||||
|
@ -100,6 +109,11 @@ public class TestRMContainerImpl {
|
|||
containerStatus, RMContainerEventType.RELEASED));
|
||||
drainDispatcher.await();
|
||||
assertEquals(RMContainerState.RELEASED, rmContainer.getState());
|
||||
assertEquals(SchedulerUtils.RELEASED_CONTAINER,
|
||||
rmContainer.getDiagnosticsInfo());
|
||||
assertEquals(ContainerExitStatus.ABORTED,
|
||||
rmContainer.getContainerExitStatus());
|
||||
assertEquals(ContainerState.COMPLETE, rmContainer.getContainerState());
|
||||
|
||||
ArgumentCaptor<RMAppAttemptContainerFinishedEvent> captor = ArgumentCaptor
|
||||
.forClass(RMAppAttemptContainerFinishedEvent.class);
|
||||
|
@ -116,6 +130,7 @@ public class TestRMContainerImpl {
|
|||
assertEquals(RMContainerState.RELEASED, rmContainer.getState());
|
||||
}
|
||||
|
||||
@SuppressWarnings("resource")
|
||||
@Test
|
||||
public void testExpireWhileRunning() {
|
||||
|
||||
|
@ -142,9 +157,12 @@ public class TestRMContainerImpl {
|
|||
"host:3465", resource, priority, null);
|
||||
|
||||
RMContainer rmContainer = new RMContainerImpl(container, appAttemptId,
|
||||
nodeId, eventHandler, expirer);
|
||||
nodeId, eventHandler, expirer, "user");
|
||||
|
||||
assertEquals(RMContainerState.NEW, rmContainer.getState());
|
||||
assertEquals(resource, rmContainer.getAllocatedResource());
|
||||
assertEquals(nodeId, rmContainer.getAllocatedNode());
|
||||
assertEquals(priority, rmContainer.getAllocatedPriority());
|
||||
|
||||
rmContainer.handle(new RMContainerEvent(containerId,
|
||||
RMContainerEventType.START));
|
||||
|
@ -160,6 +178,9 @@ public class TestRMContainerImpl {
|
|||
RMContainerEventType.LAUNCHED));
|
||||
drainDispatcher.await();
|
||||
assertEquals(RMContainerState.RUNNING, rmContainer.getState());
|
||||
assertEquals(
|
||||
"http://host:3465/node/containerlogs/container_1_0001_01_000001/user",
|
||||
rmContainer.getLogURL());
|
||||
|
||||
// In RUNNING state. Verify EXPIRE and associated actions.
|
||||
reset(appAttemptEventHandler);
|
||||
|
|
|
@ -255,7 +255,7 @@ public class TestChildQueueOrder {
|
|||
Container container=TestUtils.getMockContainer(containerId,
|
||||
node_0.getNodeID(), Resources.createResource(1*GB), priority);
|
||||
RMContainer rmContainer = new RMContainerImpl(container, appAttemptId,
|
||||
node_0.getNodeID(), eventHandler, expirer);
|
||||
node_0.getNodeID(), eventHandler, expirer, "user");
|
||||
|
||||
// Assign {1,2,3,4} 1GB containers respectively to queues
|
||||
stubQueueAllocation(a, clusterResource, node_0, 1*GB);
|
||||
|
|
Loading…
Reference in New Issue