YARN-6288. Exceptions during aggregated log writes are mishandled. Contributed by Akira Ajisaka

This commit is contained in:
Jason Lowe 2017-04-06 16:26:24 -05:00
parent ab91ca7338
commit 6e41aec109
5 changed files with 147 additions and 140 deletions

View File

@ -390,18 +390,18 @@ public class TestLogsCLI {
Path path = Path path =
new Path(appDir, LogAggregationUtils.getNodeString(nodeId) new Path(appDir, LogAggregationUtils.getNodeString(nodeId)
+ System.currentTimeMillis()); + System.currentTimeMillis());
AggregatedLogFormat.LogWriter writer = try (AggregatedLogFormat.LogWriter writer =
new AggregatedLogFormat.LogWriter(configuration, path, ugi); new AggregatedLogFormat.LogWriter()) {
writer.writeApplicationOwner(ugi.getUserName()); writer.initialize(configuration, path, ugi);
writer.writeApplicationOwner(ugi.getUserName());
Map<ApplicationAccessType, String> appAcls = Map<ApplicationAccessType, String> appAcls = new HashMap<>();
new HashMap<ApplicationAccessType, String>(); appAcls.put(ApplicationAccessType.VIEW_APP, ugi.getUserName());
appAcls.put(ApplicationAccessType.VIEW_APP, ugi.getUserName()); writer.writeApplicationACLs(appAcls);
writer.writeApplicationACLs(appAcls); writer.append(new AggregatedLogFormat.LogKey(containerId),
writer.append(new AggregatedLogFormat.LogKey(containerId), new AggregatedLogFormat.LogValue(rootLogDirs, containerId,
new AggregatedLogFormat.LogValue(rootLogDirs, containerId, UserGroupInformation.getCurrentUser().getShortUserName()));
UserGroupInformation.getCurrentUser().getShortUserName())); }
writer.close();
} }
private static void uploadEmptyContainerLogIntoRemoteDir(UserGroupInformation ugi, private static void uploadEmptyContainerLogIntoRemoteDir(UserGroupInformation ugi,
@ -410,23 +410,23 @@ public class TestLogsCLI {
Path path = Path path =
new Path(appDir, LogAggregationUtils.getNodeString(nodeId) new Path(appDir, LogAggregationUtils.getNodeString(nodeId)
+ System.currentTimeMillis()); + System.currentTimeMillis());
AggregatedLogFormat.LogWriter writer = try (AggregatedLogFormat.LogWriter writer =
new AggregatedLogFormat.LogWriter(configuration, path, ugi); new AggregatedLogFormat.LogWriter()) {
writer.writeApplicationOwner(ugi.getUserName()); writer.initialize(configuration, path, ugi);
writer.writeApplicationOwner(ugi.getUserName());
Map<ApplicationAccessType, String> appAcls = Map<ApplicationAccessType, String> appAcls = new HashMap<>();
new HashMap<ApplicationAccessType, String>(); appAcls.put(ApplicationAccessType.VIEW_APP, ugi.getUserName());
appAcls.put(ApplicationAccessType.VIEW_APP, ugi.getUserName()); writer.writeApplicationACLs(appAcls);
writer.writeApplicationACLs(appAcls); DataOutputStream out = writer.getWriter().prepareAppendKey(-1);
DataOutputStream out = writer.getWriter().prepareAppendKey(-1); new AggregatedLogFormat.LogKey(containerId).write(out);
new AggregatedLogFormat.LogKey(containerId).write(out); out.close();
out.close(); out = writer.getWriter().prepareAppendValue(-1);
out = writer.getWriter().prepareAppendValue(-1); new AggregatedLogFormat.LogValue(rootLogDirs, containerId,
new AggregatedLogFormat.LogValue(rootLogDirs, containerId, UserGroupInformation.getCurrentUser().getShortUserName()).write(out,
UserGroupInformation.getCurrentUser().getShortUserName()).write(out, new HashSet<File>());
new HashSet<File>()); out.close();
out.close(); }
writer.close();
} }
private YarnClient createMockYarnClient(YarnApplicationState appState) private YarnClient createMockYarnClient(YarnApplicationState appState)

View File

@ -70,7 +70,6 @@ import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.LogAggregationContext; import org.apache.hadoop.yarn.api.records.LogAggregationContext;
import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
import org.apache.hadoop.yarn.util.ConverterUtils;
import org.apache.hadoop.yarn.util.Times; import org.apache.hadoop.yarn.util.Times;
import com.google.common.annotations.VisibleForTesting; import com.google.common.annotations.VisibleForTesting;
@ -378,14 +377,23 @@ public class AggregatedLogFormat {
* The writer that writes out the aggregated logs. * The writer that writes out the aggregated logs.
*/ */
@Private @Private
public static class LogWriter { public static class LogWriter implements AutoCloseable {
private final FSDataOutputStream fsDataOStream; private FSDataOutputStream fsDataOStream;
private final TFile.Writer writer; private TFile.Writer writer;
private FileContext fc; private FileContext fc;
public LogWriter(final Configuration conf, final Path remoteAppLogFile, /**
UserGroupInformation userUgi) throws IOException { * Initialize the LogWriter.
* Must be called just after the instance is created.
* @param conf Configuration
* @param remoteAppLogFile remote log file path
* @param userUgi Ugi of the user
* @throws IOException Failed to initialize
*/
public void initialize(final Configuration conf,
final Path remoteAppLogFile,
UserGroupInformation userUgi) throws IOException {
try { try {
this.fsDataOStream = this.fsDataOStream =
userUgi.doAs(new PrivilegedExceptionAction<FSDataOutputStream>() { userUgi.doAs(new PrivilegedExceptionAction<FSDataOutputStream>() {
@ -463,11 +471,14 @@ public class AggregatedLogFormat {
} }
} }
@Override
public void close() { public void close() {
try { if (writer != null) {
this.writer.close(); try {
} catch (IOException e) { this.writer.close();
LOG.warn("Exception closing writer", e); } catch (IOException e) {
LOG.warn("Exception closing writer", e);
}
} }
IOUtils.closeStream(fsDataOStream); IOUtils.closeStream(fsDataOStream);
} }

View File

@ -140,43 +140,44 @@ public class TestAggregatedLogFormat {
final int ch = filler; final int ch = filler;
UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
LogWriter logWriter = new LogWriter(conf, remoteAppLogFile, ugi); try (LogWriter logWriter = new LogWriter()) {
logWriter.initialize(conf, remoteAppLogFile, ugi);
LogKey logKey = new LogKey(testContainerId); LogKey logKey = new LogKey(testContainerId);
LogValue logValue = LogValue logValue =
spy(new LogValue(Collections.singletonList(srcFileRoot.toString()), spy(new LogValue(Collections.singletonList(srcFileRoot.toString()),
testContainerId, ugi.getShortUserName())); testContainerId, ugi.getShortUserName()));
final CountDownLatch latch = new CountDownLatch(1); final CountDownLatch latch = new CountDownLatch(1);
Thread t = new Thread() { Thread t = new Thread() {
public void run() { public void run() {
try { try {
for(int i=0; i < length/3; i++) { for (int i = 0; i < length / 3; i++) {
osw.write(ch); osw.write(ch);
} }
latch.countDown(); latch.countDown();
for(int i=0; i < (2*length)/3; i++) { for (int i = 0; i < (2 * length) / 3; i++) {
osw.write(ch); osw.write(ch);
}
osw.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} }
osw.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} }
} };
}; t.start();
t.start();
//Wait till the osw is partially written //Wait till the osw is partially written
//aggregation starts once the ows has completed 1/3rd of its work //aggregation starts once the ows has completed 1/3rd of its work
latch.await(); latch.await();
//Aggregate The Logs //Aggregate The Logs
logWriter.append(logKey, logValue); logWriter.append(logKey, logValue);
logWriter.close(); }
} }
@Test @Test
@ -215,22 +216,22 @@ public class TestAggregatedLogFormat {
writeSrcFile(srcFilePath, "stdout", numChars); writeSrcFile(srcFilePath, "stdout", numChars);
UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
LogWriter logWriter = new LogWriter(conf, remoteAppLogFile, ugi); try (LogWriter logWriter = new LogWriter()) {
logWriter.initialize(conf, remoteAppLogFile, ugi);
LogKey logKey = new LogKey(testContainerId);
LogValue logValue =
new LogValue(Collections.singletonList(srcFileRoot.toString()),
testContainerId, ugi.getShortUserName());
LogKey logKey = new LogKey(testContainerId); // When we try to open FileInputStream for stderr, it will throw out an
LogValue logValue = // IOException. Skip the log aggregation for stderr.
new LogValue(Collections.singletonList(srcFileRoot.toString()), LogValue spyLogValue = spy(logValue);
testContainerId, ugi.getShortUserName()); File errorFile = new File((new Path(srcFilePath, "stderr")).toString());
doThrow(new IOException("Mock can not open FileInputStream")).when(
spyLogValue).secureOpenFile(errorFile);
// When we try to open FileInputStream for stderr, it will throw out an IOException. logWriter.append(logKey, spyLogValue);
// Skip the log aggregation for stderr. }
LogValue spyLogValue = spy(logValue);
File errorFile = new File((new Path(srcFilePath, "stderr")).toString());
doThrow(new IOException("Mock can not open FileInputStream")).when(
spyLogValue).secureOpenFile(errorFile);
logWriter.append(logKey, spyLogValue);
logWriter.close();
// make sure permission are correct on the file // make sure permission are correct on the file
FileStatus fsStatus = fs.getFileStatus(remoteAppLogFile); FileStatus fsStatus = fs.getFileStatus(remoteAppLogFile);
@ -310,24 +311,23 @@ public class TestAggregatedLogFormat {
UserGroupInformation ugi = UserGroupInformation ugi =
UserGroupInformation.getCurrentUser(); UserGroupInformation.getCurrentUser();
LogWriter logWriter = new LogWriter(conf, remoteAppLogFile, ugi); try (LogWriter logWriter = new LogWriter()) {
logWriter.initialize(conf, remoteAppLogFile, ugi);
LogKey logKey = new LogKey(testContainerId1);
String randomUser = "randomUser";
LogValue logValue =
spy(new LogValue(Collections.singletonList(srcFileRoot.toString()),
testContainerId1, randomUser));
LogKey logKey = new LogKey(testContainerId1); // It is trying simulate a situation where first log file is owned by
String randomUser = "randomUser"; // different user (probably symlink) and second one by the user itself.
LogValue logValue = // The first file should not be aggregated. Because this log file has the
spy(new LogValue(Collections.singletonList(srcFileRoot.toString()), // invalid user name.
testContainerId1, randomUser)); when(logValue.getUser()).thenReturn(randomUser).thenReturn(
ugi.getShortUserName());
// It is trying simulate a situation where first log file is owned by logWriter.append(logKey, logValue);
// different user (probably symlink) and second one by the user itself. }
// The first file should not be aggregated. Because this log file has the invalid
// user name.
when(logValue.getUser()).thenReturn(randomUser).thenReturn(
ugi.getShortUserName());
logWriter.append(logKey, logValue);
logWriter.close();
BufferedReader in = BufferedReader in =
new BufferedReader(new FileReader(new File(remoteAppLogFile new BufferedReader(new FileReader(new File(remoteAppLogFile
.toUri().getRawPath()))); .toUri().getRawPath())));

View File

@ -275,17 +275,19 @@ public class TestAggregatedLogsBlock {
List<String> rootLogDirs = Arrays.asList("target/logs/logs"); List<String> rootLogDirs = Arrays.asList("target/logs/logs");
UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
AggregatedLogFormat.LogWriter writer = new AggregatedLogFormat.LogWriter( try (AggregatedLogFormat.LogWriter writer =
configuration, new Path(path), ugi); new AggregatedLogFormat.LogWriter()) {
writer.writeApplicationOwner(ugi.getUserName()); writer.initialize(configuration, new Path(path), ugi);
writer.writeApplicationOwner(ugi.getUserName());
Map<ApplicationAccessType, String> appAcls = new HashMap<ApplicationAccessType, String>(); Map<ApplicationAccessType, String> appAcls = new HashMap<ApplicationAccessType, String>();
appAcls.put(ApplicationAccessType.VIEW_APP, ugi.getUserName()); appAcls.put(ApplicationAccessType.VIEW_APP, ugi.getUserName());
writer.writeApplicationACLs(appAcls); writer.writeApplicationACLs(appAcls);
writer.append(new AggregatedLogFormat.LogKey("container_0_0001_01_000001"), writer.append(new AggregatedLogFormat.LogKey("container_0_0001_01_000001"),
new AggregatedLogFormat.LogValue(rootLogDirs, containerId,UserGroupInformation.getCurrentUser().getShortUserName())); new AggregatedLogFormat.LogValue(rootLogDirs, containerId,
writer.close(); UserGroupInformation.getCurrentUser().getShortUserName()));
}
} }
private void writeLogs(String dirName) throws Exception { private void writeLogs(String dirName) throws Exception {

View File

@ -68,7 +68,6 @@ import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService; import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEvent;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEventType; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEventType;
import org.apache.hadoop.yarn.util.ConverterUtils;
import org.apache.hadoop.yarn.util.Records; import org.apache.hadoop.yarn.util.Records;
import org.apache.hadoop.yarn.util.Times; import org.apache.hadoop.yarn.util.Times;
@ -313,24 +312,21 @@ public class AppLogAggregatorImpl implements AppLogAggregator {
} }
} }
LogWriter writer = null; if (pendingContainerInThisCycle.isEmpty()) {
sendLogAggregationReport(true, "", appFinished);
return;
}
logAggregationTimes++;
String diagnosticMessage = ""; String diagnosticMessage = "";
boolean logAggregationSucceedInThisCycle = true; boolean logAggregationSucceedInThisCycle = true;
try { try (LogWriter writer = new LogWriter()){
if (pendingContainerInThisCycle.isEmpty()) {
return;
}
logAggregationTimes++;
try { try {
writer = writer.initialize(this.conf, this.remoteNodeTmpLogFileForApp,
new LogWriter(this.conf, this.remoteNodeTmpLogFileForApp, this.userUgi);
this.userUgi);
// Write ACLs once when the writer is created. // Write ACLs once when the writer is created.
writer.writeApplicationACLs(appAcls); writer.writeApplicationACLs(appAcls);
writer.writeApplicationOwner(this.userUgi.getShortUserName()); writer.writeApplicationOwner(this.userUgi.getShortUserName());
} catch (IOException e1) { } catch (IOException e1) {
logAggregationSucceedInThisCycle = false; logAggregationSucceedInThisCycle = false;
LOG.error("Cannot create writer for app " + this.applicationId LOG.error("Cannot create writer for app " + this.applicationId
@ -371,11 +367,6 @@ public class AppLogAggregatorImpl implements AppLogAggregator {
cleanupOldLogTimes++; cleanupOldLogTimes++;
} }
if (writer != null) {
writer.close();
writer = null;
}
long currentTime = System.currentTimeMillis(); long currentTime = System.currentTimeMillis();
final Path renamedPath = this.rollingMonitorInterval <= 0 final Path renamedPath = this.rollingMonitorInterval <= 0
? remoteNodeLogFileForApp : new Path( ? remoteNodeLogFileForApp : new Path(
@ -416,29 +407,32 @@ public class AppLogAggregatorImpl implements AppLogAggregator {
logAggregationSucceedInThisCycle = false; logAggregationSucceedInThisCycle = false;
} }
} finally { } finally {
LogAggregationStatus logAggregationStatus = sendLogAggregationReport(logAggregationSucceedInThisCycle,
logAggregationSucceedInThisCycle diagnosticMessage, appFinished);
? LogAggregationStatus.RUNNING
: LogAggregationStatus.RUNNING_WITH_FAILURE;
sendLogAggregationReport(logAggregationStatus, diagnosticMessage);
if (appFinished) {
// If the app is finished, one extra final report with log aggregation
// status SUCCEEDED/FAILED will be sent to RM to inform the RM
// that the log aggregation in this NM is completed.
LogAggregationStatus finalLogAggregationStatus =
renameTemporaryLogFileFailed || !logAggregationSucceedInThisCycle
? LogAggregationStatus.FAILED
: LogAggregationStatus.SUCCEEDED;
sendLogAggregationReport(finalLogAggregationStatus, "");
}
if (writer != null) {
writer.close();
}
} }
} }
private void sendLogAggregationReport( private void sendLogAggregationReport(
boolean logAggregationSucceedInThisCycle, String diagnosticMessage,
boolean appFinished) {
LogAggregationStatus logAggregationStatus =
logAggregationSucceedInThisCycle
? LogAggregationStatus.RUNNING
: LogAggregationStatus.RUNNING_WITH_FAILURE;
sendLogAggregationReportInternal(logAggregationStatus, diagnosticMessage);
if (appFinished) {
// If the app is finished, one extra final report with log aggregation
// status SUCCEEDED/FAILED will be sent to RM to inform the RM
// that the log aggregation in this NM is completed.
LogAggregationStatus finalLogAggregationStatus =
renameTemporaryLogFileFailed || !logAggregationSucceedInThisCycle
? LogAggregationStatus.FAILED
: LogAggregationStatus.SUCCEEDED;
sendLogAggregationReportInternal(finalLogAggregationStatus, "");
}
}
private void sendLogAggregationReportInternal(
LogAggregationStatus logAggregationStatus, String diagnosticMessage) { LogAggregationStatus logAggregationStatus, String diagnosticMessage) {
LogAggregationReport report = LogAggregationReport report =
Records.newRecord(LogAggregationReport.class); Records.newRecord(LogAggregationReport.class);