YARN-4720. Skip unnecessary NN operations in log aggregation. (Jun Gong via mingma)

(cherry picked from commit 7f3139e54d)
This commit is contained in:
Ming Ma 2016-02-26 08:40:05 -08:00
parent b1d497a7de
commit 1656bcec5f
3 changed files with 106 additions and 24 deletions

View File

@ -826,6 +826,9 @@ Release 2.8.0 - UNRELEASED
YARN-4066. Large number of queues choke fair scheduler. YARN-4066. Large number of queues choke fair scheduler.
(Johan Gustavsson via kasha) (Johan Gustavsson via kasha)
YARN-4720. Skip unnecessary NN operations in log aggregation.
(Jun Gong via mingma)
BUG FIXES BUG FIXES
YARN-4680. TimerTasks leak in ATS V1.5 Writer. (Xuan Gong via gtcarrera9) YARN-4680. TimerTasks leak in ATS V1.5 Writer. (Xuan Gong via gtcarrera9)

View File

@ -127,6 +127,9 @@ public class AppLogAggregatorImpl implements AppLogAggregator {
// This variable is only for testing // This variable is only for testing
private final AtomicBoolean waiting = new AtomicBoolean(false); private final AtomicBoolean waiting = new AtomicBoolean(false);
// This variable is only for testing
private int logAggregationTimes = 0;
private boolean renameTemporaryLogFileFailed = false; private boolean renameTemporaryLogFileFailed = false;
private final Map<ContainerId, ContainerLogAggregator> containerLogAggregators = private final Map<ContainerId, ContainerLogAggregator> containerLogAggregators =
@ -311,7 +314,15 @@ public class AppLogAggregatorImpl implements AppLogAggregator {
} }
LogWriter writer = null; LogWriter writer = null;
String diagnosticMessage = "";
boolean logAggregationSucceedInThisCycle = true;
try { try {
if (pendingContainerInThisCycle.isEmpty()) {
return;
}
logAggregationTimes++;
try { try {
writer = writer =
new LogWriter(this.conf, this.remoteNodeTmpLogFileForApp, new LogWriter(this.conf, this.remoteNodeTmpLogFileForApp,
@ -321,6 +332,7 @@ public class AppLogAggregatorImpl implements AppLogAggregator {
writer.writeApplicationOwner(this.userUgi.getShortUserName()); writer.writeApplicationOwner(this.userUgi.getShortUserName());
} catch (IOException e1) { } catch (IOException e1) {
logAggregationSucceedInThisCycle = false;
LOG.error("Cannot create writer for app " + this.applicationId LOG.error("Cannot create writer for app " + this.applicationId
+ ". Skip log upload this time. ", e1); + ". Skip log upload this time. ", e1);
return; return;
@ -369,21 +381,17 @@ public class AppLogAggregatorImpl implements AppLogAggregator {
remoteNodeLogFileForApp.getName() + "_" remoteNodeLogFileForApp.getName() + "_"
+ currentTime); + currentTime);
String diagnosticMessage = "";
boolean logAggregationSucceedInThisCycle = true;
final boolean rename = uploadedLogsInThisCycle; final boolean rename = uploadedLogsInThisCycle;
try { try {
userUgi.doAs(new PrivilegedExceptionAction<Object>() { userUgi.doAs(new PrivilegedExceptionAction<Object>() {
@Override @Override
public Object run() throws Exception { public Object run() throws Exception {
FileSystem remoteFS = remoteNodeLogFileForApp.getFileSystem(conf); FileSystem remoteFS = remoteNodeLogFileForApp.getFileSystem(conf);
if (remoteFS.exists(remoteNodeTmpLogFileForApp)) {
if (rename) { if (rename) {
remoteFS.rename(remoteNodeTmpLogFileForApp, renamedPath); remoteFS.rename(remoteNodeTmpLogFileForApp, renamedPath);
} else { } else {
remoteFS.delete(remoteNodeTmpLogFileForApp, false); remoteFS.delete(remoteNodeTmpLogFileForApp, false);
} }
}
return null; return null;
} }
}); });
@ -405,33 +413,39 @@ public class AppLogAggregatorImpl implements AppLogAggregator {
renameTemporaryLogFileFailed = true; renameTemporaryLogFileFailed = true;
logAggregationSucceedInThisCycle = false; logAggregationSucceedInThisCycle = false;
} }
} finally {
LogAggregationReport report = LogAggregationStatus logAggregationStatus =
Records.newRecord(LogAggregationReport.class); logAggregationSucceedInThisCycle
report.setApplicationId(appId);
report.setDiagnosticMessage(diagnosticMessage);
report.setLogAggregationStatus(logAggregationSucceedInThisCycle
? LogAggregationStatus.RUNNING ? LogAggregationStatus.RUNNING
: LogAggregationStatus.RUNNING_WITH_FAILURE); : LogAggregationStatus.RUNNING_WITH_FAILURE;
this.context.getLogAggregationStatusForApps().add(report); sendLogAggregationReport(logAggregationStatus, diagnosticMessage);
if (appFinished) { if (appFinished) {
// If the app is finished, one extra final report with log aggregation // If the app is finished, one extra final report with log aggregation
// status SUCCEEDED/FAILED will be sent to RM to inform the RM // status SUCCEEDED/FAILED will be sent to RM to inform the RM
// that the log aggregation in this NM is completed. // that the log aggregation in this NM is completed.
LogAggregationReport finalReport = LogAggregationStatus finalLogAggregationStatus =
Records.newRecord(LogAggregationReport.class); renameTemporaryLogFileFailed || !logAggregationSucceedInThisCycle
finalReport.setApplicationId(appId); ? LogAggregationStatus.FAILED
finalReport.setLogAggregationStatus(renameTemporaryLogFileFailed : LogAggregationStatus.SUCCEEDED;
? LogAggregationStatus.FAILED : LogAggregationStatus.SUCCEEDED); sendLogAggregationReport(finalLogAggregationStatus, "");
this.context.getLogAggregationStatusForApps().add(finalReport);
} }
} finally {
if (writer != null) { if (writer != null) {
writer.close(); writer.close();
} }
} }
} }
private void sendLogAggregationReport(
LogAggregationStatus logAggregationStatus, String diagnosticMessage) {
LogAggregationReport report =
Records.newRecord(LogAggregationReport.class);
report.setApplicationId(appId);
report.setDiagnosticMessage(diagnosticMessage);
report.setLogAggregationStatus(logAggregationStatus);
this.context.getLogAggregationStatusForApps().add(report);
}
private void cleanOldLogs() { private void cleanOldLogs() {
try { try {
final FileSystem remoteFS = final FileSystem remoteFS =
@ -669,4 +683,10 @@ public class AppLogAggregatorImpl implements AppLogAggregator {
public UserGroupInformation getUgi() { public UserGroupInformation getUgi() {
return this.userUgi; return this.userUgi;
} }
@Private
@VisibleForTesting
public int getLogAggregationTimes() {
return this.logAggregationTimes;
}
} }

View File

@ -2270,6 +2270,65 @@ public class TestLogAggregationService extends BaseContainerManagerTest {
logAggregationService.stop(); logAggregationService.stop();
} }
@Test (timeout = 20000)
public void testSkipUnnecessaryNNOperationsForShortJob() throws Exception {
LogAggregationContext logAggregationContext =
Records.newRecord(LogAggregationContext.class);
logAggregationContext.setLogAggregationPolicyClassName(
FailedOrKilledContainerLogAggregationPolicy.class.getName());
verifySkipUnnecessaryNNOperations(logAggregationContext, 0, 2);
}
@Test (timeout = 20000)
public void testSkipUnnecessaryNNOperationsForService() throws Exception {
this.conf.setLong(
YarnConfiguration.NM_LOG_AGGREGATION_ROLL_MONITORING_INTERVAL_SECONDS,
3600);
LogAggregationContext contextWithAMOnly =
Records.newRecord(LogAggregationContext.class);
contextWithAMOnly.setLogAggregationPolicyClassName(
AMOnlyLogAggregationPolicy.class.getName());
contextWithAMOnly.setRolledLogsIncludePattern("sys*");
contextWithAMOnly.setRolledLogsExcludePattern("std_final");
verifySkipUnnecessaryNNOperations(contextWithAMOnly, 1, 4);
}
private void verifySkipUnnecessaryNNOperations(
LogAggregationContext logAggregationContext,
int expectedLogAggregationTimes, int expectedAggregationReportNum)
throws Exception {
LogAggregationService logAggregationService = new LogAggregationService(
dispatcher, this.context, this.delSrvc, super.dirsHandler);
logAggregationService.init(this.conf);
logAggregationService.start();
ApplicationId appId = createApplication();
logAggregationService.handle(new LogHandlerAppStartedEvent(appId, this.user,
null, this.acls, logAggregationContext));
// Container finishes
String[] logFiles = new String[] { "stdout" };
finishContainer(appId, logAggregationService,
ContainerType.APPLICATION_MASTER, 1, 0, logFiles);
AppLogAggregatorImpl aggregator =
(AppLogAggregatorImpl) logAggregationService.getAppLogAggregators()
.get(appId);
aggregator.doLogAggregationOutOfBand();
Thread.sleep(2000);
aggregator.doLogAggregationOutOfBand();
Thread.sleep(2000);
// App finishes.
logAggregationService.handle(new LogHandlerAppFinishedEvent(appId));
logAggregationService.stop();
assertEquals(expectedLogAggregationTimes,
aggregator.getLogAggregationTimes());
assertEquals(expectedAggregationReportNum,
this.context.getLogAggregationStatusForApps().size());
}
private int numOfLogsAvailable(LogAggregationService logAggregationService, private int numOfLogsAvailable(LogAggregationService logAggregationService,
ApplicationId appId, boolean sizeLimited, String lastLogFile) ApplicationId appId, boolean sizeLimited, String lastLogFile)
throws IOException { throws IOException {