YARN-4720. Skip unnecessary NN operations in log aggregation. (Jun Gong via mingma)
(cherry picked from commit 7f3139e54d
)
This commit is contained in:
parent
cf8bcc73c2
commit
6f5ca1b293
|
@ -645,6 +645,9 @@ Release 2.8.0 - UNRELEASED
|
||||||
|
|
||||||
YARN-4207. Add a non-judgemental YARN app completion status. (Rich Haase via sseth)
|
YARN-4207. Add a non-judgemental YARN app completion status. (Rich Haase via sseth)
|
||||||
|
|
||||||
|
YARN-4720. Skip unnecessary NN operations in log aggregation.
|
||||||
|
(Jun Gong via mingma)
|
||||||
|
|
||||||
BUG FIXES
|
BUG FIXES
|
||||||
|
|
||||||
YARN-4680. TimerTasks leak in ATS V1.5 Writer. (Xuan Gong via gtcarrera9)
|
YARN-4680. TimerTasks leak in ATS V1.5 Writer. (Xuan Gong via gtcarrera9)
|
||||||
|
|
|
@ -127,6 +127,9 @@ public class AppLogAggregatorImpl implements AppLogAggregator {
|
||||||
// This variable is only for testing
|
// This variable is only for testing
|
||||||
private final AtomicBoolean waiting = new AtomicBoolean(false);
|
private final AtomicBoolean waiting = new AtomicBoolean(false);
|
||||||
|
|
||||||
|
// This variable is only for testing
|
||||||
|
private int logAggregationTimes = 0;
|
||||||
|
|
||||||
private boolean renameTemporaryLogFileFailed = false;
|
private boolean renameTemporaryLogFileFailed = false;
|
||||||
|
|
||||||
private final Map<ContainerId, ContainerLogAggregator> containerLogAggregators =
|
private final Map<ContainerId, ContainerLogAggregator> containerLogAggregators =
|
||||||
|
@ -311,7 +314,15 @@ public class AppLogAggregatorImpl implements AppLogAggregator {
|
||||||
}
|
}
|
||||||
|
|
||||||
LogWriter writer = null;
|
LogWriter writer = null;
|
||||||
|
String diagnosticMessage = "";
|
||||||
|
boolean logAggregationSucceedInThisCycle = true;
|
||||||
try {
|
try {
|
||||||
|
if (pendingContainerInThisCycle.isEmpty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
logAggregationTimes++;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
writer =
|
writer =
|
||||||
new LogWriter(this.conf, this.remoteNodeTmpLogFileForApp,
|
new LogWriter(this.conf, this.remoteNodeTmpLogFileForApp,
|
||||||
|
@ -321,6 +332,7 @@ public class AppLogAggregatorImpl implements AppLogAggregator {
|
||||||
writer.writeApplicationOwner(this.userUgi.getShortUserName());
|
writer.writeApplicationOwner(this.userUgi.getShortUserName());
|
||||||
|
|
||||||
} catch (IOException e1) {
|
} catch (IOException e1) {
|
||||||
|
logAggregationSucceedInThisCycle = false;
|
||||||
LOG.error("Cannot create writer for app " + this.applicationId
|
LOG.error("Cannot create writer for app " + this.applicationId
|
||||||
+ ". Skip log upload this time. ", e1);
|
+ ". Skip log upload this time. ", e1);
|
||||||
return;
|
return;
|
||||||
|
@ -369,21 +381,17 @@ public class AppLogAggregatorImpl implements AppLogAggregator {
|
||||||
remoteNodeLogFileForApp.getName() + "_"
|
remoteNodeLogFileForApp.getName() + "_"
|
||||||
+ currentTime);
|
+ currentTime);
|
||||||
|
|
||||||
String diagnosticMessage = "";
|
|
||||||
boolean logAggregationSucceedInThisCycle = true;
|
|
||||||
final boolean rename = uploadedLogsInThisCycle;
|
final boolean rename = uploadedLogsInThisCycle;
|
||||||
try {
|
try {
|
||||||
userUgi.doAs(new PrivilegedExceptionAction<Object>() {
|
userUgi.doAs(new PrivilegedExceptionAction<Object>() {
|
||||||
@Override
|
@Override
|
||||||
public Object run() throws Exception {
|
public Object run() throws Exception {
|
||||||
FileSystem remoteFS = remoteNodeLogFileForApp.getFileSystem(conf);
|
FileSystem remoteFS = remoteNodeLogFileForApp.getFileSystem(conf);
|
||||||
if (remoteFS.exists(remoteNodeTmpLogFileForApp)) {
|
|
||||||
if (rename) {
|
if (rename) {
|
||||||
remoteFS.rename(remoteNodeTmpLogFileForApp, renamedPath);
|
remoteFS.rename(remoteNodeTmpLogFileForApp, renamedPath);
|
||||||
} else {
|
} else {
|
||||||
remoteFS.delete(remoteNodeTmpLogFileForApp, false);
|
remoteFS.delete(remoteNodeTmpLogFileForApp, false);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
@ -405,33 +413,39 @@ public class AppLogAggregatorImpl implements AppLogAggregator {
|
||||||
renameTemporaryLogFileFailed = true;
|
renameTemporaryLogFileFailed = true;
|
||||||
logAggregationSucceedInThisCycle = false;
|
logAggregationSucceedInThisCycle = false;
|
||||||
}
|
}
|
||||||
|
} finally {
|
||||||
LogAggregationReport report =
|
LogAggregationStatus logAggregationStatus =
|
||||||
Records.newRecord(LogAggregationReport.class);
|
logAggregationSucceedInThisCycle
|
||||||
report.setApplicationId(appId);
|
|
||||||
report.setDiagnosticMessage(diagnosticMessage);
|
|
||||||
report.setLogAggregationStatus(logAggregationSucceedInThisCycle
|
|
||||||
? LogAggregationStatus.RUNNING
|
? LogAggregationStatus.RUNNING
|
||||||
: LogAggregationStatus.RUNNING_WITH_FAILURE);
|
: LogAggregationStatus.RUNNING_WITH_FAILURE;
|
||||||
this.context.getLogAggregationStatusForApps().add(report);
|
sendLogAggregationReport(logAggregationStatus, diagnosticMessage);
|
||||||
if (appFinished) {
|
if (appFinished) {
|
||||||
// If the app is finished, one extra final report with log aggregation
|
// If the app is finished, one extra final report with log aggregation
|
||||||
// status SUCCEEDED/FAILED will be sent to RM to inform the RM
|
// status SUCCEEDED/FAILED will be sent to RM to inform the RM
|
||||||
// that the log aggregation in this NM is completed.
|
// that the log aggregation in this NM is completed.
|
||||||
LogAggregationReport finalReport =
|
LogAggregationStatus finalLogAggregationStatus =
|
||||||
Records.newRecord(LogAggregationReport.class);
|
renameTemporaryLogFileFailed || !logAggregationSucceedInThisCycle
|
||||||
finalReport.setApplicationId(appId);
|
? LogAggregationStatus.FAILED
|
||||||
finalReport.setLogAggregationStatus(renameTemporaryLogFileFailed
|
: LogAggregationStatus.SUCCEEDED;
|
||||||
? LogAggregationStatus.FAILED : LogAggregationStatus.SUCCEEDED);
|
sendLogAggregationReport(finalLogAggregationStatus, "");
|
||||||
this.context.getLogAggregationStatusForApps().add(finalReport);
|
|
||||||
}
|
}
|
||||||
} finally {
|
|
||||||
if (writer != null) {
|
if (writer != null) {
|
||||||
writer.close();
|
writer.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void sendLogAggregationReport(
|
||||||
|
LogAggregationStatus logAggregationStatus, String diagnosticMessage) {
|
||||||
|
LogAggregationReport report =
|
||||||
|
Records.newRecord(LogAggregationReport.class);
|
||||||
|
report.setApplicationId(appId);
|
||||||
|
report.setDiagnosticMessage(diagnosticMessage);
|
||||||
|
report.setLogAggregationStatus(logAggregationStatus);
|
||||||
|
this.context.getLogAggregationStatusForApps().add(report);
|
||||||
|
}
|
||||||
|
|
||||||
private void cleanOldLogs() {
|
private void cleanOldLogs() {
|
||||||
try {
|
try {
|
||||||
final FileSystem remoteFS =
|
final FileSystem remoteFS =
|
||||||
|
@ -669,4 +683,10 @@ public class AppLogAggregatorImpl implements AppLogAggregator {
|
||||||
public UserGroupInformation getUgi() {
|
public UserGroupInformation getUgi() {
|
||||||
return this.userUgi;
|
return this.userUgi;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Private
|
||||||
|
@VisibleForTesting
|
||||||
|
public int getLogAggregationTimes() {
|
||||||
|
return this.logAggregationTimes;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -2127,6 +2127,65 @@ public class TestLogAggregationService extends BaseContainerManagerTest {
|
||||||
logAggregationService.stop();
|
logAggregationService.stop();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test (timeout = 20000)
|
||||||
|
public void testSkipUnnecessaryNNOperationsForShortJob() throws Exception {
|
||||||
|
LogAggregationContext logAggregationContext =
|
||||||
|
Records.newRecord(LogAggregationContext.class);
|
||||||
|
logAggregationContext.setLogAggregationPolicyClassName(
|
||||||
|
FailedOrKilledContainerLogAggregationPolicy.class.getName());
|
||||||
|
verifySkipUnnecessaryNNOperations(logAggregationContext, 0, 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test (timeout = 20000)
|
||||||
|
public void testSkipUnnecessaryNNOperationsForService() throws Exception {
|
||||||
|
this.conf.setLong(
|
||||||
|
YarnConfiguration.NM_LOG_AGGREGATION_ROLL_MONITORING_INTERVAL_SECONDS,
|
||||||
|
3600);
|
||||||
|
LogAggregationContext contextWithAMOnly =
|
||||||
|
Records.newRecord(LogAggregationContext.class);
|
||||||
|
contextWithAMOnly.setLogAggregationPolicyClassName(
|
||||||
|
AMOnlyLogAggregationPolicy.class.getName());
|
||||||
|
contextWithAMOnly.setRolledLogsIncludePattern("sys*");
|
||||||
|
contextWithAMOnly.setRolledLogsExcludePattern("std_final");
|
||||||
|
verifySkipUnnecessaryNNOperations(contextWithAMOnly, 1, 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void verifySkipUnnecessaryNNOperations(
|
||||||
|
LogAggregationContext logAggregationContext,
|
||||||
|
int expectedLogAggregationTimes, int expectedAggregationReportNum)
|
||||||
|
throws Exception {
|
||||||
|
LogAggregationService logAggregationService = new LogAggregationService(
|
||||||
|
dispatcher, this.context, this.delSrvc, super.dirsHandler);
|
||||||
|
logAggregationService.init(this.conf);
|
||||||
|
logAggregationService.start();
|
||||||
|
|
||||||
|
ApplicationId appId = createApplication();
|
||||||
|
logAggregationService.handle(new LogHandlerAppStartedEvent(appId, this.user,
|
||||||
|
null, this.acls, logAggregationContext));
|
||||||
|
|
||||||
|
// Container finishes
|
||||||
|
String[] logFiles = new String[] { "stdout" };
|
||||||
|
finishContainer(appId, logAggregationService,
|
||||||
|
ContainerType.APPLICATION_MASTER, 1, 0, logFiles);
|
||||||
|
AppLogAggregatorImpl aggregator =
|
||||||
|
(AppLogAggregatorImpl) logAggregationService.getAppLogAggregators()
|
||||||
|
.get(appId);
|
||||||
|
aggregator.doLogAggregationOutOfBand();
|
||||||
|
|
||||||
|
Thread.sleep(2000);
|
||||||
|
aggregator.doLogAggregationOutOfBand();
|
||||||
|
Thread.sleep(2000);
|
||||||
|
|
||||||
|
// App finishes.
|
||||||
|
logAggregationService.handle(new LogHandlerAppFinishedEvent(appId));
|
||||||
|
logAggregationService.stop();
|
||||||
|
|
||||||
|
assertEquals(expectedLogAggregationTimes,
|
||||||
|
aggregator.getLogAggregationTimes());
|
||||||
|
assertEquals(expectedAggregationReportNum,
|
||||||
|
this.context.getLogAggregationStatusForApps().size());
|
||||||
|
}
|
||||||
|
|
||||||
private int numOfLogsAvailable(LogAggregationService logAggregationService,
|
private int numOfLogsAvailable(LogAggregationService logAggregationService,
|
||||||
ApplicationId appId, boolean sizeLimited, String lastLogFile)
|
ApplicationId appId, boolean sizeLimited, String lastLogFile)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
|
Loading…
Reference in New Issue