YARN-9640. Slow event processing could cause too many attempt unregister events. Contributed by Bibin A Chundatt.
This commit is contained in:
parent
07e3cf952e
commit
d70f5231a7
|
@ -94,6 +94,8 @@ public class ApplicationMasterService extends AbstractService implements
|
|||
RecordFactoryProvider.getRecordFactory(null);
|
||||
private final ConcurrentMap<ApplicationAttemptId, AllocateResponseLock> responseMap =
|
||||
new ConcurrentHashMap<ApplicationAttemptId, AllocateResponseLock>();
|
||||
private final ConcurrentHashMap<ApplicationAttemptId, Boolean>
|
||||
finishedAttemptCache = new ConcurrentHashMap<>();
|
||||
protected final RMContext rmContext;
|
||||
private final AMSProcessingChain amsProcessingChain;
|
||||
private boolean timelineServiceV2Enabled;
|
||||
|
@ -339,11 +341,14 @@ public class ApplicationMasterService extends AbstractService implements
|
|||
throw new ApplicationMasterNotRegisteredException(message);
|
||||
}
|
||||
|
||||
this.amLivelinessMonitor.receivedPing(applicationAttemptId);
|
||||
FinishApplicationMasterResponse response =
|
||||
FinishApplicationMasterResponse.newInstance(false);
|
||||
this.amsProcessingChain.finishApplicationMaster(
|
||||
applicationAttemptId, request, response);
|
||||
if (finishedAttemptCache.putIfAbsent(applicationAttemptId, true)
|
||||
== null) {
|
||||
this.amsProcessingChain
|
||||
.finishApplicationMaster(applicationAttemptId, request, response);
|
||||
}
|
||||
this.amLivelinessMonitor.receivedPing(applicationAttemptId);
|
||||
return response;
|
||||
}
|
||||
}
|
||||
|
@ -492,6 +497,7 @@ public class ApplicationMasterService extends AbstractService implements
|
|||
public void unregisterAttempt(ApplicationAttemptId attemptId) {
|
||||
LOG.info("Unregistering app attempt : " + attemptId);
|
||||
responseMap.remove(attemptId);
|
||||
finishedAttemptCache.remove(attemptId);
|
||||
rmContext.getNMTokenSecretManager().unregisterApplicationAttempt(attemptId);
|
||||
}
|
||||
|
||||
|
@ -506,6 +512,8 @@ public class ApplicationMasterService extends AbstractService implements
|
|||
if (this.server != null) {
|
||||
this.server.stop();
|
||||
}
|
||||
responseMap.clear();
|
||||
finishedAttemptCache.clear();
|
||||
super.serviceStop();
|
||||
}
|
||||
|
||||
|
|
|
@ -17,6 +17,10 @@
|
|||
package org.apache.hadoop.yarn.server.resourcemanager;
|
||||
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import org.apache.hadoop.yarn.event.Dispatcher;
|
||||
import org.apache.hadoop.yarn.event.DrainDispatcher;
|
||||
import org.apache.hadoop.yarn.event.Event;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEventType;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
|
||||
|
@ -354,6 +358,55 @@ public abstract class ApplicationMasterServiceTestBase {
|
|||
}
|
||||
}
|
||||
|
||||
@Test(timeout = 1200000)
|
||||
public void testRepeatedFinishApplicationMaster() throws Exception {
|
||||
|
||||
CountingDispatcher dispatcher = new CountingDispatcher();
|
||||
MockRM rm = new MockRM(conf) {
|
||||
@Override
|
||||
protected Dispatcher createDispatcher() {
|
||||
return dispatcher;
|
||||
}
|
||||
};
|
||||
|
||||
try {
|
||||
rm.start();
|
||||
// Register node1
|
||||
MockNM nm1 = rm.registerNode(DEFAULT_HOST + ":" + DEFAULT_PORT, 6 * GB);
|
||||
// Submit an application
|
||||
RMApp app1 = rm.submitApp(2048);
|
||||
MockAM am1 = MockRM.launchAM(app1, rm, nm1);
|
||||
am1.registerAppAttempt();
|
||||
FinishApplicationMasterRequest req = FinishApplicationMasterRequest
|
||||
.newInstance(FinalApplicationStatus.FAILED, "", "");
|
||||
for (int i = 0; i < 10; i++) {
|
||||
am1.unregisterAppAttempt(req, false);
|
||||
}
|
||||
Assert.assertEquals("Expecting only one event", 1,
|
||||
dispatcher.getEventCount());
|
||||
} finally {
|
||||
rm.stop();
|
||||
}
|
||||
}
|
||||
|
||||
static class CountingDispatcher extends DrainDispatcher {
|
||||
private int eventreceived = 0;
|
||||
|
||||
@SuppressWarnings("rawtypes")
|
||||
@Override
|
||||
protected void dispatch(Event event) {
|
||||
if (event.getType() == RMAppAttemptEventType.UNREGISTERED) {
|
||||
eventreceived++;
|
||||
} else {
|
||||
super.dispatch(event);
|
||||
}
|
||||
}
|
||||
|
||||
public int getEventCount() {
|
||||
return eventreceived;
|
||||
}
|
||||
}
|
||||
|
||||
@Test(timeout = 3000000)
|
||||
public void testResourceTypes() throws Exception {
|
||||
HashMap<YarnConfiguration,
|
||||
|
|
Loading…
Reference in New Issue