From efba7497604bb0af2f7ec257d2b545a10608fcb1 Mon Sep 17 00:00:00 2001 From: Kihwal Lee Date: Tue, 13 Oct 2015 13:25:10 -0500 Subject: [PATCH] HDFS-8676. Delayed rolling upgrade finalization can cause heartbeat expiration. Contributed by Walter Su. (cherry picked from commit 5b43db47a313decccdcca8f45c5708aab46396df) --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 ++ .../datanode/BlockPoolSliceStorage.java | 36 +++++++++++++++++-- .../hdfs/server/datanode/DataStorage.java | 1 + 3 files changed, 37 insertions(+), 3 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 1df7f02a0f4..b828e791930 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -66,6 +66,9 @@ Release 2.7.2 - UNRELEASED HDFS-9178. Slow datanode I/O can cause a wrong node to be marked bad (kihwal) + HDFS-8676. Delayed rolling upgrade finalization can cause heartbeat + expiration. (Walter Su via kihwal) + Release 2.7.1 - 2015-07-06 INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolSliceStorage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolSliceStorage.java index d26a9a591b8..cecca9a4244 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolSliceStorage.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolSliceStorage.java @@ -36,6 +36,7 @@ import org.apache.hadoop.util.Daemon; import java.io.File; import java.io.IOException; +import java.util.ArrayList; import java.util.Collection; import java.util.Iterator; import java.util.List; @@ -92,6 +93,7 @@ public class BlockPoolSliceStorage extends Storage { "^(.*)(" + BLOCK_POOL_ID_PATTERN_BASE + ")(" + TRASH_ROOT_DIR + ")(.*)$"); private String blockpoolID = ""; // id of the blockpool + private Daemon trashCleaner; public BlockPoolSliceStorage(StorageInfo storageInfo, String bpid) { super(storageInfo); @@ -738,11 +740,39 @@ public class BlockPoolSliceStorage extends Storage { * Delete all files and directories in the trash directories. */ public void clearTrash() { + final List trashRoots = new ArrayList<>(); for (StorageDirectory sd : storageDirs) { File trashRoot = getTrashRootDir(sd); - Preconditions.checkState(!(trashRoot.exists() && sd.getPreviousDir().exists())); - FileUtil.fullyDelete(trashRoot); - LOG.info("Cleared trash for storage directory " + sd); + if (trashRoot.exists() && sd.getPreviousDir().exists()) { + LOG.error("Trash and PreviousDir shouldn't both exist for storage " + + "directory " + sd); + assert false; + } else { + trashRoots.add(trashRoot); + } + } + + stopTrashCleaner(); + trashCleaner = new Daemon(new Runnable() { + @Override + public void run() { + for(File trashRoot : trashRoots){ + FileUtil.fullyDelete(trashRoot); + LOG.info("Cleared trash for storage directory " + trashRoot); + } + } + + @Override + public String toString() { + return "clearTrash() for " + blockpoolID; + } + }); + trashCleaner.start(); + } + + public void stopTrashCleaner() { + if (trashCleaner != null) { + trashCleaner.interrupt(); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataStorage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataStorage.java index 77fcfedb96b..deb0a3d7d2b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataStorage.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataStorage.java @@ -164,6 +164,7 @@ public class DataStorage extends Storage { */ public void enableTrash(String bpid) { if (trashEnabledBpids.add(bpid)) { + getBPStorage(bpid).stopTrashCleaner(); LOG.info("Enabled trash for bpid " + bpid); } }