YARN-4392. ApplicationCreatedEvent event time resets after RM

restart/failover. Contributed by Naganarasimha G R and Xuan Gong
This commit is contained in:
Xuan 2015-12-07 12:24:55 -08:00
parent 01a641bc44
commit 4546c7582b
5 changed files with 37 additions and 10 deletions

View File

@ -1103,6 +1103,9 @@ Release 2.8.0 - UNRELEASED
YARN-4408. Fix issue that NodeManager reports negative running containers.
(Robert Kanter via junping_du)
YARN-4392. ApplicationCreatedEvent event time resets after RM restart/failover.
(Naganarasimha G R and Xuan Gong via xgong)
Release 2.7.3 - UNRELEASED
INCOMPATIBLE CHANGES

View File

@ -26,7 +26,6 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.DataInputByteBuffer;
import org.apache.hadoop.ipc.CallerContext;
import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.StringUtils;

View File

@ -444,9 +444,6 @@ public class RMAppImpl implements RMApp, Recoverable {
this.callerContext = CallerContext.getCurrent();
rmContext.getRMApplicationHistoryWriter().applicationStarted(this);
rmContext.getSystemMetricsPublisher().appCreated(this, startTime);
long localLogAggregationStatusTimeout =
conf.getLong(YarnConfiguration.LOG_AGGREGATION_STATUS_TIME_OUT_MS,
YarnConfiguration.DEFAULT_LOG_AGGREGATION_STATUS_TIME_OUT_MS);
@ -813,6 +810,9 @@ public class RMAppImpl implements RMApp, Recoverable {
this.startTime = appState.getStartTime();
this.callerContext = appState.getCallerContext();
// send the ATS create Event
sendATSCreateEvent(this, this.startTime);
for(int i=0; i<appState.getAttemptCount(); ++i) {
// create attempt
createNewAttempt();
@ -1084,6 +1084,9 @@ public class RMAppImpl implements RMApp, Recoverable {
// communication
LOG.info("Storing application with id " + app.applicationId);
app.rmContext.getStateStore().storeNewApplication(app);
// send the ATS create Event
app.sendATSCreateEvent(app, app.startTime);
}
}
@ -1734,4 +1737,9 @@ public class RMAppImpl implements RMApp, Recoverable {
public CallerContext getCallerContext() {
return callerContext;
}
private void sendATSCreateEvent(RMApp app, long startTime) {
rmContext.getRMApplicationHistoryWriter().applicationStarted(app);
rmContext.getSystemMetricsPublisher().appCreated(app, startTime);
}
}

View File

@ -18,15 +18,15 @@
package org.apache.hadoop.yarn.server.resourcemanager;
import static org.mockito.Matchers.any;
import static org.mockito.Matchers.anyLong;
import static org.mockito.Matchers.isA;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.spy;
import static org.mockito.Mockito.timeout;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
import com.google.common.base.Supplier;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
@ -87,6 +87,7 @@ import org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier;
import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus;
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
import org.apache.hadoop.yarn.server.api.records.NodeAction;
import org.apache.hadoop.yarn.server.resourcemanager.metrics.SystemMetricsPublisher;
import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore;
@ -113,7 +114,9 @@ import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.mockito.Mockito;
import com.google.common.base.Supplier;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Sets;
@ -896,7 +899,13 @@ public class TestRMRestart extends ParameterizedSchedulerTestBase {
memStore.init(conf);
// start RM
MockRM rm1 = createMockRM(conf, memStore);
MockRM rm1 = new MockRM(conf, memStore) {
@Override
protected SystemMetricsPublisher createSystemMetricsPublisher() {
return spy(super.createSystemMetricsPublisher());
}
};
rms.add(rm1);
rm1.start();
MockNM nm1 =
new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
@ -925,6 +934,8 @@ public class TestRMRestart extends ParameterizedSchedulerTestBase {
rm1.waitForState(app2.getApplicationId(), RMAppState.KILLED);
rm1.waitForState(am2.getApplicationAttemptId(), RMAppAttemptState.KILLED);
verify(rm1.getRMContext().getSystemMetricsPublisher(),Mockito.times(3))
.appCreated(any(RMApp.class), anyLong());
// restart rm
MockRM rm2 = new MockRM(conf, memStore) {
@ -932,10 +943,18 @@ public class TestRMRestart extends ParameterizedSchedulerTestBase {
protected RMAppManager createRMAppManager() {
return spy(super.createRMAppManager());
}
@Override
protected SystemMetricsPublisher createSystemMetricsPublisher() {
return spy(super.createSystemMetricsPublisher());
}
};
rms.add(rm2);
rm2.start();
verify(rm2.getRMContext().getSystemMetricsPublisher(),Mockito.times(3))
.appCreated(any(RMApp.class), anyLong());
GetApplicationsRequest request1 =
GetApplicationsRequest.newInstance(EnumSet.of(
YarnApplicationState.FINISHED, YarnApplicationState.KILLED,

View File

@ -18,7 +18,6 @@
package org.apache.hadoop.yarn.server.resourcemanager.rmapp;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData;
import static org.mockito.Matchers.any;
import static org.mockito.Matchers.anyLong;
import static org.mockito.Mockito.doReturn;
@ -64,6 +63,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.ahs.RMApplicationHistoryWri
import org.apache.hadoop.yarn.server.resourcemanager.metrics.SystemMetricsPublisher;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AMLivelinessMonitor;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent;
@ -368,8 +368,6 @@ public class TestRMAppTransitions {
protected RMApp testCreateAppNewSaving(
ApplicationSubmissionContext submissionContext) throws IOException {
RMApp application = createNewTestApp(submissionContext);
verify(writer).applicationStarted(any(RMApp.class));
verify(publisher).appCreated(any(RMApp.class), anyLong());
// NEW => NEW_SAVING event RMAppEventType.START
RMAppEvent event =
new RMAppEvent(application.getApplicationId(), RMAppEventType.START);