YARN-1121. Changed ResourceManager's state-store to drain all events on shut-down. Contributed by Jian He.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1540232 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
a4bc1a6218
commit
e28015ed1b
|
@ -88,6 +88,9 @@ Release 2.3.0 - UNRELEASED
|
|||
YARN-1323. Set HTTPS webapp address along with other RPC addresses in HAUtil
|
||||
(Karthik Kambatla via Sandy Ryza)
|
||||
|
||||
YARN-1121. Changed ResourceManager's state-store to drain all events on
|
||||
shut-down. (Jian He via vinodkv)
|
||||
|
||||
OPTIMIZATIONS
|
||||
|
||||
BUG FIXES
|
||||
|
|
|
@ -49,6 +49,19 @@ public class AsyncDispatcher extends AbstractService implements Dispatcher {
|
|||
private final BlockingQueue<Event> eventQueue;
|
||||
private volatile boolean stopped = false;
|
||||
|
||||
// Configuration flag for enabling/disabling draining dispatcher's events on
|
||||
// stop functionality.
|
||||
private volatile boolean drainEventsOnStop = false;
|
||||
|
||||
// Indicates all the remaining dispatcher's events on stop have been drained
|
||||
// and processed.
|
||||
private volatile boolean drained = true;
|
||||
|
||||
// For drainEventsOnStop enabled only, block newly coming events into the
|
||||
// queue while stopping.
|
||||
private volatile boolean blockNewEvents = false;
|
||||
private EventHandler handlerInstance = null;
|
||||
|
||||
private Thread eventHandlingThread;
|
||||
protected final Map<Class<? extends Enum>, EventHandler> eventDispatchers;
|
||||
private boolean exitOnDispatchException;
|
||||
|
@ -68,6 +81,7 @@ public class AsyncDispatcher extends AbstractService implements Dispatcher {
|
|||
@Override
|
||||
public void run() {
|
||||
while (!stopped && !Thread.currentThread().isInterrupted()) {
|
||||
drained = eventQueue.isEmpty();
|
||||
Event event;
|
||||
try {
|
||||
event = eventQueue.take();
|
||||
|
@ -102,8 +116,19 @@ public class AsyncDispatcher extends AbstractService implements Dispatcher {
|
|||
eventHandlingThread.start();
|
||||
}
|
||||
|
||||
public void setDrainEventsOnStop() {
|
||||
drainEventsOnStop = true;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void serviceStop() throws Exception {
|
||||
if (drainEventsOnStop) {
|
||||
blockNewEvents = true;
|
||||
LOG.info("AsyncDispatcher is draining to stop, igonring any new events.");
|
||||
while(!drained) {
|
||||
Thread.yield();
|
||||
}
|
||||
}
|
||||
stopped = true;
|
||||
if (eventHandlingThread != null) {
|
||||
eventHandlingThread.interrupt();
|
||||
|
@ -173,11 +198,19 @@ public class AsyncDispatcher extends AbstractService implements Dispatcher {
|
|||
|
||||
@Override
|
||||
public EventHandler getEventHandler() {
|
||||
return new GenericEventHandler();
|
||||
if (handlerInstance == null) {
|
||||
handlerInstance = new GenericEventHandler();
|
||||
}
|
||||
return handlerInstance;
|
||||
}
|
||||
|
||||
class GenericEventHandler implements EventHandler<Event> {
|
||||
public void handle(Event event) {
|
||||
if (blockNewEvents) {
|
||||
return;
|
||||
}
|
||||
drained = false;
|
||||
|
||||
/* all this method does is enqueue all the events onto the queue */
|
||||
int qSize = eventQueue.size();
|
||||
if (qSize !=0 && qSize %1000 == 0) {
|
||||
|
|
|
@ -261,17 +261,20 @@ public abstract class RMStateStore extends AbstractService {
|
|||
}
|
||||
|
||||
AsyncDispatcher dispatcher;
|
||||
|
||||
public synchronized void serviceInit(Configuration conf) throws Exception{
|
||||
|
||||
@Override
|
||||
protected void serviceInit(Configuration conf) throws Exception{
|
||||
// create async handler
|
||||
dispatcher = new AsyncDispatcher();
|
||||
dispatcher.init(conf);
|
||||
dispatcher.register(RMStateStoreEventType.class,
|
||||
new ForwardingEventHandler());
|
||||
dispatcher.setDrainEventsOnStop();
|
||||
initInternal(conf);
|
||||
}
|
||||
|
||||
protected synchronized void serviceStart() throws Exception {
|
||||
|
||||
@Override
|
||||
protected void serviceStart() throws Exception {
|
||||
dispatcher.start();
|
||||
startInternal();
|
||||
}
|
||||
|
@ -288,11 +291,12 @@ public abstract class RMStateStore extends AbstractService {
|
|||
*/
|
||||
protected abstract void startInternal() throws Exception;
|
||||
|
||||
public synchronized void serviceStop() throws Exception {
|
||||
@Override
|
||||
protected void serviceStop() throws Exception {
|
||||
closeInternal();
|
||||
dispatcher.stop();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Derived classes close themselves using this method.
|
||||
* The base class will be closed and the event dispatcher will be shutdown
|
||||
|
@ -509,8 +513,7 @@ public abstract class RMStateStore extends AbstractService {
|
|||
}
|
||||
|
||||
// Dispatcher related code
|
||||
|
||||
private synchronized void handleStoreEvent(RMStateStoreEvent event) {
|
||||
protected void handleStoreEvent(RMStateStoreEvent event) {
|
||||
if (event.getType().equals(RMStateStoreEventType.STORE_APP)
|
||||
|| event.getType().equals(RMStateStoreEventType.UPDATE_APP)) {
|
||||
ApplicationState appState = null;
|
||||
|
|
|
@ -163,6 +163,14 @@ public class MockRM extends ResourceManager {
|
|||
public RMApp submitApp(int masterMemory, String name, String user,
|
||||
Map<ApplicationAccessType, String> acls, boolean unmanaged, String queue,
|
||||
int maxAppAttempts, Credentials ts, String appType) throws Exception {
|
||||
return submitApp(masterMemory, name, user, acls, unmanaged, queue,
|
||||
maxAppAttempts, ts, appType, true);
|
||||
}
|
||||
|
||||
public RMApp submitApp(int masterMemory, String name, String user,
|
||||
Map<ApplicationAccessType, String> acls, boolean unmanaged, String queue,
|
||||
int maxAppAttempts, Credentials ts, String appType,
|
||||
boolean waitForAccepted) throws Exception {
|
||||
ApplicationClientProtocol client = getClientRMService();
|
||||
GetNewApplicationResponse resp = client.getNewApplication(Records
|
||||
.newRecord(GetNewApplicationRequest.class));
|
||||
|
@ -222,7 +230,9 @@ public class MockRM extends ResourceManager {
|
|||
}.setClientReq(client, req);
|
||||
fakeUser.doAs(action);
|
||||
// make sure app is immediately available after submit
|
||||
waitForState(appId, RMAppState.ACCEPTED);
|
||||
if (waitForAccepted) {
|
||||
waitForState(appId, RMAppState.ACCEPTED);
|
||||
}
|
||||
return getRMContext().getRMApps().get(appId);
|
||||
}
|
||||
|
||||
|
|
|
@ -70,6 +70,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore;
|
|||
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.ApplicationAttemptState;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.ApplicationState;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStoreEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
|
||||
|
@ -1062,6 +1063,65 @@ public class TestRMRestart {
|
|||
rm2.stop();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRMStateStoreDispatcherDrainedOnRMStop() throws Exception {
|
||||
MemoryRMStateStore memStore = new MemoryRMStateStore() {
|
||||
volatile boolean wait = true;
|
||||
@Override
|
||||
public void serviceStop() throws Exception {
|
||||
// Unblock app saving request.
|
||||
wait = false;
|
||||
super.serviceStop();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void handleStoreEvent(RMStateStoreEvent event) {
|
||||
// Block app saving request.
|
||||
while (wait);
|
||||
super.handleStoreEvent(event);
|
||||
}
|
||||
};
|
||||
memStore.init(conf);
|
||||
|
||||
// start RM
|
||||
final MockRM rm1 = new MockRM(conf, memStore);
|
||||
rm1.start();
|
||||
|
||||
// create apps.
|
||||
final ArrayList<RMApp> appList = new ArrayList<RMApp>();
|
||||
final int NUM_APPS = 5;
|
||||
|
||||
for (int i = 0; i < NUM_APPS; i++) {
|
||||
RMApp app = rm1.submitApp(200, "name", "user",
|
||||
new HashMap<ApplicationAccessType, String>(), false,
|
||||
"default", -1, null, "MAPREDUCE", false);
|
||||
appList.add(app);
|
||||
rm1.waitForState(app.getApplicationId(), RMAppState.NEW_SAVING);
|
||||
}
|
||||
// all apps's saving request are now enqueued to RMStateStore's dispatcher
|
||||
// queue, and will be processed once rm.stop() is called.
|
||||
|
||||
// Nothing exist in state store before stop is called.
|
||||
Map<ApplicationId, ApplicationState> rmAppState =
|
||||
memStore.getState().getApplicationState();
|
||||
Assert.assertTrue(rmAppState.size() == 0);
|
||||
|
||||
// stop rm
|
||||
rm1.stop();
|
||||
|
||||
// Assert app info is still saved even if stop is called with pending saving
|
||||
// request on dispatcher.
|
||||
for (RMApp app : appList) {
|
||||
ApplicationState appState = rmAppState.get(app.getApplicationId());
|
||||
Assert.assertNotNull(appState);
|
||||
Assert.assertEquals(0, appState.getAttemptCount());
|
||||
Assert.assertEquals(appState.getApplicationSubmissionContext()
|
||||
.getApplicationId(), app.getApplicationSubmissionContext()
|
||||
.getApplicationId());
|
||||
}
|
||||
Assert.assertTrue(rmAppState.size() == NUM_APPS);
|
||||
}
|
||||
|
||||
public static class TestSecurityMockRM extends MockRM {
|
||||
|
||||
public TestSecurityMockRM(Configuration conf, RMStateStore store) {
|
||||
|
|
Loading…
Reference in New Issue