YARN-820. Fixed an invalid state transition in NodeManager caused by failing resource localization. Contributed by Mayank Bansal.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1503947 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Vinod Kumar Vavilapalli 2013-07-16 23:44:56 +00:00
parent af0d2fc3e3
commit 6d12709b19
4 changed files with 49 additions and 5 deletions

View File

@ -65,6 +65,9 @@ Release 2.1.1-beta - UNRELEASED
YARN-661. Fixed NM to cleanup users' local directories correctly when
starting up. (Omkar Vinit Joshi via vinodkv)
YARN-820. Fixed an invalid state transition in NodeManager caused by failing
resource localization. (Mayank Bansal via vinodkv)
Release 2.1.0-beta - 2013-07-02
INCOMPATIBLE CHANGES

View File

@ -290,6 +290,11 @@ public class ContainerImpl implements Container {
.addTransition(ContainerState.DONE, ContainerState.DONE,
ContainerEventType.UPDATE_DIAGNOSTICS_MSG,
UPDATE_DIAGNOSTICS_TRANSITION)
// This transition may result when
// we notify container of failed localization if localizer thread (for
// that container) fails for some reason
.addTransition(ContainerState.DONE, ContainerState.DONE,
ContainerEventType.RESOURCE_FAILED)
// create the topology tables
.installTopology();

View File

@ -112,12 +112,17 @@ public class LocalizedResource implements EventHandler<ResourceEvent> {
.append(getState() == ResourceState.LOCALIZED
? getLocalPath() + "," + getSize()
: "pending").append(",[");
for (ContainerId c : ref) {
sb.append("(").append(c.toString()).append(")");
try {
this.readLock.lock();
for (ContainerId c : ref) {
sb.append("(").append(c.toString()).append(")");
}
sb.append("],").append(getTimestamp()).append(",").append(getState())
.append("}");
return sb.toString();
} finally {
this.readLock.unlock();
}
sb.append("],").append(getTimestamp()).append(",")
.append(getState()).append("}");
return sb.toString();
}
private void release(ContainerId container) {

View File

@ -240,6 +240,32 @@ public class TestContainer {
}
}
}
@Test
@SuppressWarnings("unchecked")
// mocked generic
public void testLocalizationFailureAtDone() throws Exception {
WrappedContainer wc = null;
try {
wc = new WrappedContainer(6, 314159265358979L, 4344, "yak");
wc.initContainer();
wc.localizeResources();
wc.launchContainer();
reset(wc.localizerBus);
wc.containerSuccessful();
wc.containerResourcesCleanup();
assertEquals(ContainerState.DONE, wc.c.getContainerState());
// Now in DONE, issue RESOURCE_FAILED as done by LocalizeRunner
wc.resourceFailedContainer();
// Verify still in DONE
assertEquals(ContainerState.DONE, wc.c.getContainerState());
verifyCleanupCall(wc);
} finally {
if (wc != null) {
wc.finished();
}
}
}
@Test
@SuppressWarnings("unchecked") // mocked generic
@ -624,6 +650,11 @@ public class TestContainer {
drainDispatcherEvents();
}
public void resourceFailedContainer() {
c.handle(new ContainerEvent(cId, ContainerEventType.RESOURCE_FAILED));
drainDispatcherEvents();
}
// Localize resources
// Skip some resources so as to consider them failed
public Map<Path, List<String>> doLocalizeResources(