From c0d666c74e9ea76564a2458c6c0a78ae7afa9fea Mon Sep 17 00:00:00 2001 From: Kihwal Lee Date: Thu, 18 Dec 2014 12:58:59 -0600 Subject: [PATCH] HDFS-7373. Clean up temporary files after fsimage transfer failures. Contributed by Kihwal Lee --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +++ .../hdfs/server/namenode/TransferFsImage.java | 21 +++++++++++++++++++ .../hdfs/server/namenode/TestCheckpoint.java | 19 +++++++++++++++++ 3 files changed, 43 insertions(+) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 8e9961a2488..8150a54bd92 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -468,6 +468,9 @@ Release 2.7.0 - UNRELEASED HDFS-7531. Improve the concurrent access on FsVolumeList (Lei Xu via Colin P. McCabe) + HDFS-7373. Clean up temporary files after fsimage transfer failures. + (kihwal) + OPTIMIZATIONS HDFS-7454. Reduce memory footprint for AclEntries in NameNode. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/TransferFsImage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/TransferFsImage.java index 160371a646e..1f52ff7af3d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/TransferFsImage.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/TransferFsImage.java @@ -528,10 +528,18 @@ public class TransferFsImage { fos.getChannel().force(true); fos.close(); } + + // Something went wrong and did not finish reading. + // Remove the temporary files. + if (!finishedReceiving) { + deleteTmpFiles(localPaths); + } + if (finishedReceiving && received != advertisedSize) { // only throw this exception if we think we read all of it on our end // -- otherwise a client-side IOException would be masked by this // exception that makes it look like a server-side problem! + deleteTmpFiles(localPaths); throw new IOException("File " + url + " received length " + received + " is not of the advertised size " + advertisedSize); @@ -548,6 +556,7 @@ public class TransferFsImage { if (advertisedDigest != null && !computedDigest.equals(advertisedDigest)) { + deleteTmpFiles(localPaths); throw new IOException("File " + url + " computed digest " + computedDigest + " does not match advertised digest " + advertisedDigest); @@ -558,6 +567,18 @@ public class TransferFsImage { } } + private static void deleteTmpFiles(List files) { + if (files == null) { + return; + } + + LOG.info("Deleting temporary files: " + files); + for (File file : files) { + file.delete(); // ignore the return value + } + } + + private static MD5Hash parseMD5Header(HttpURLConnection connection) { String header = connection.getHeaderField(MD5_HEADER); return (header != null) ? new MD5Hash(header) : null; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java index bb4689d3634..95da8387cdd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java @@ -633,6 +633,22 @@ public class TestCheckpoint { }); } + private void checkTempImages(NNStorage storage) throws IOException { + List dirs = new ArrayList(); + dirs.add(storage.getStorageDir(0).getCurrentDir()); + dirs.add(storage.getStorageDir(1).getCurrentDir()); + + for (File dir : dirs) { + File[] list = dir.listFiles(); + for (File f : list) { + // Throw an exception if a temp image file is found. + if(f.getName().contains(NNStorage.NameNodeFile.IMAGE_NEW.getName())) { + throw new IOException("Found " + f); + } + } + } + } + /** * Simulate 2NN failing to send the whole file (error type 3) * The length header in the HTTP transfer should prevent @@ -694,6 +710,9 @@ public class TestCheckpoint { GenericTestUtils.assertExceptionContains(exceptionSubstring, e); } Mockito.reset(faultInjector); + // Make sure there is no temporary files left around. + checkTempImages(cluster.getNameNode().getFSImage().getStorage()); + checkTempImages(secondary.getFSImage().getStorage()); secondary.shutdown(); // secondary namenode crash! secondary = null;