YARN-4096. App local logs are leaked if log aggregation fails to initialize for the app. Contributed by Jason Lowe.

(cherry picked from commit 16b9037dc1)
This commit is contained in:
Zhihai Xu 2015-09-08 12:29:54 -07:00
parent fd100ccbae
commit 4b5767e456
5 changed files with 31 additions and 10 deletions

View File

@ -79,6 +79,9 @@ Release 2.7.2 - UNRELEASED
YARN-4087. Followup fixes after YARN-2019 regarding RM behavior when
state-store error occurs. (Jian He via xgong)
YARN-4096. App local logs are leaked if log aggregation fails to initialize
for the app. (Jason Lowe via zxu)
Release 2.7.1 - 2015-07-06
INCOMPATIBLE CHANGES

View File

@ -28,4 +28,6 @@ public interface AppLogAggregator extends Runnable {
void abortLogAggregation();
void finishLogAggregation();
void disableLogAggregation();
}

View File

@ -522,6 +522,11 @@ public class AppLogAggregatorImpl implements AppLogAggregator {
this.notifyAll();
}
@Override
public void disableLogAggregation() {
this.logAggregationDisabled = true;
}
@Private
@VisibleForTesting
// This is only used for testing.

View File

@ -361,19 +361,19 @@ public class LogAggregationService extends AbstractService implements
throw new YarnRuntimeException("Duplicate initApp for " + appId);
}
// wait until check for existing aggregator to create dirs
YarnRuntimeException appDirException = null;
try {
// Create the app dir
createAppDir(user, appId, userUgi);
} catch (Exception e) {
appLogAggregators.remove(appId);
closeFileSystems(userUgi);
appLogAggregator.disableLogAggregation();
if (!(e instanceof YarnRuntimeException)) {
e = new YarnRuntimeException(e);
appDirException = new YarnRuntimeException(e);
} else {
appDirException = (YarnRuntimeException)e;
}
throw (YarnRuntimeException)e;
}
// TODO Get the user configuration for the list of containers that need log
// aggregation.
@ -389,6 +389,10 @@ public class LogAggregationService extends AbstractService implements
}
};
this.threadPool.execute(aggregatorWrapper);
if (appDirException != null) {
throw appDirException;
}
}
protected void closeFileSystems(final UserGroupInformation userUgi) {

View File

@ -705,9 +705,10 @@ public class TestLogAggregationService extends BaseContainerManagerTest {
this.conf.set(YarnConfiguration.NM_LOG_DIRS, localLogDir.getAbsolutePath());
this.conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR,
this.remoteRootLogDir.getAbsolutePath());
DeletionService spyDelSrvc = spy(this.delSrvc);
LogAggregationService logAggregationService = spy(
new LogAggregationService(dispatcher, this.context, this.delSrvc,
new LogAggregationService(dispatcher, this.context, spyDelSrvc,
super.dirsHandler));
logAggregationService.init(this.conf);
logAggregationService.start();
@ -715,6 +716,11 @@ public class TestLogAggregationService extends BaseContainerManagerTest {
ApplicationId appId =
BuilderUtils.newApplicationId(System.currentTimeMillis(),
(int) (Math.random() * 1000));
File appLogDir =
new File(localLogDir, ConverterUtils.toString(appId));
appLogDir.mkdir();
Exception e = new RuntimeException("KABOOM!");
doThrow(e)
.when(logAggregationService).createAppDir(any(String.class),
@ -730,9 +736,6 @@ public class TestLogAggregationService extends BaseContainerManagerTest {
};
checkEvents(appEventHandler, expectedEvents, false,
"getType", "getApplicationID", "getDiagnostic");
// filesystems may have been instantiated
verify(logAggregationService).closeFileSystems(
any(UserGroupInformation.class));
// verify trying to collect logs for containers/apps we don't know about
// doesn't blow up and tear down the NM
@ -745,6 +748,10 @@ public class TestLogAggregationService extends BaseContainerManagerTest {
logAggregationService.stop();
assertEquals(0, logAggregationService.getNumAggregators());
verify(spyDelSrvc).delete(eq(user), any(Path.class),
Mockito.<Path>anyVararg());
verify(logAggregationService).closeFileSystems(
any(UserGroupInformation.class));
}
private void writeContainerLogs(File appLogDir, ContainerId containerId,