From a72ef921f00f6536bcb3e09ef5bd147cd8890065 Mon Sep 17 00:00:00 2001 From: Kihwal Lee Date: Fri, 11 Dec 2015 08:46:03 -0600 Subject: [PATCH] HDFS-9445. Datanode may deadlock while handling a bad volume. Contributed by Walter Su. (cherry picked from commit a48301791e9564363bc2abad4e89e344b0d7a5ff) --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 + .../fsdataset/impl/FsDatasetImpl.java | 87 +++++++++++-------- .../fsdataset/impl/TestFsDatasetImpl.java | 4 + 3 files changed, 58 insertions(+), 36 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 3fd52315b40..d2eed689b35 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -1709,6 +1709,9 @@ Release 2.7.2 - UNRELEASED HDFS-9294. DFSClient deadlock when close file and failed to renew lease. (Brahma Reddy Battula via szetszwo) + HDFS-9445. Datanode may deadlock while handling a bad volume. + (Wlater Su via Kihwal) + Release 2.7.1 - 2015-07-06 INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java index a896287a9cc..cb5af9354ed 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java @@ -475,48 +475,67 @@ class FsDatasetImpl implements FsDatasetSpi { * Removes a set of volumes from FsDataset. * @param volumesToRemove a set of absolute root path of each volume. * @param clearFailure set true to clear failure information. - * - * DataNode should call this function before calling - * {@link DataStorage#removeVolumes(java.util.Collection)}. */ @Override - public synchronized void removeVolumes( - Set volumesToRemove, boolean clearFailure) { + public void removeVolumes(Set volumesToRemove, boolean clearFailure) { // Make sure that all volumes are absolute path. for (File vol : volumesToRemove) { Preconditions.checkArgument(vol.isAbsolute(), String.format("%s is not absolute path.", vol.getPath())); } - for (int idx = 0; idx < dataStorage.getNumStorageDirs(); idx++) { - Storage.StorageDirectory sd = dataStorage.getStorageDir(idx); - final File absRoot = sd.getRoot().getAbsoluteFile(); - if (volumesToRemove.contains(absRoot)) { - LOG.info("Removing " + absRoot + " from FsDataset."); - // Disable the volume from the service. - asyncDiskService.removeVolume(sd.getCurrentDir()); - volumes.removeVolume(absRoot, clearFailure); + Map> blkToInvalidate = new HashMap<>(); + List storageToRemove = new ArrayList<>(); + synchronized (this) { + for (int idx = 0; idx < dataStorage.getNumStorageDirs(); idx++) { + Storage.StorageDirectory sd = dataStorage.getStorageDir(idx); + final File absRoot = sd.getRoot().getAbsoluteFile(); + if (volumesToRemove.contains(absRoot)) { + LOG.info("Removing " + absRoot + " from FsDataset."); - // Removed all replica information for the blocks on the volume. Unlike - // updating the volumeMap in addVolume(), this operation does not scan - // disks. - for (String bpid : volumeMap.getBlockPoolList()) { - for (Iterator it = volumeMap.replicas(bpid).iterator(); - it.hasNext(); ) { - ReplicaInfo block = it.next(); - final File absBasePath = - new File(block.getVolume().getBasePath()).getAbsoluteFile(); - if (absBasePath.equals(absRoot)) { - invalidate(bpid, block); - it.remove(); + // Disable the volume from the service. + asyncDiskService.removeVolume(sd.getCurrentDir()); + volumes.removeVolume(absRoot, clearFailure); + + // Removed all replica information for the blocks on the volume. + // Unlike updating the volumeMap in addVolume(), this operation does + // not scan disks. + for (String bpid : volumeMap.getBlockPoolList()) { + List blocks = new ArrayList<>(); + for (Iterator it = volumeMap.replicas(bpid).iterator(); + it.hasNext(); ) { + ReplicaInfo block = it.next(); + final File absBasePath = + new File(block.getVolume().getBasePath()).getAbsoluteFile(); + if (absBasePath.equals(absRoot)) { + blocks.add(block); + it.remove(); + } } + blkToInvalidate.put(bpid, blocks); } - } - storageMap.remove(sd.getStorageUuid()); + storageToRemove.add(sd.getStorageUuid()); + } + } + setupAsyncLazyPersistThreads(); + } + + // Call this outside the lock. + for (Map.Entry> entry : + blkToInvalidate.entrySet()) { + String bpid = entry.getKey(); + List blocks = entry.getValue(); + for (ReplicaInfo block : blocks) { + invalidate(bpid, block); + } + } + + synchronized (this) { + for(String storageUuid : storageToRemove) { + storageMap.remove(storageUuid); } } - setupAsyncLazyPersistThreads(); } private StorageType getStorageTypeFromLocations( @@ -1936,15 +1955,11 @@ class FsDatasetImpl implements FsDatasetSpi { public void invalidate(String bpid, ReplicaInfo block) { // If a DFSClient has the replica in its cache of short-circuit file // descriptors (and the client is using ShortCircuitShm), invalidate it. - // The short-circuit registry is null in the unit tests, because the - // datanode is mock object. - if (datanode.getShortCircuitRegistry() != null) { - datanode.getShortCircuitRegistry().processBlockInvalidation( - new ExtendedBlockId(block.getBlockId(), bpid)); + datanode.getShortCircuitRegistry().processBlockInvalidation( + new ExtendedBlockId(block.getBlockId(), bpid)); - // If the block is cached, start uncaching it. - cacheManager.uncacheBlock(bpid, block.getBlockId()); - } + // If the block is cached, start uncaching it. + cacheManager.uncacheBlock(bpid, block.getBlockId()); datanode.notifyNamenodeDeletedBlock(new ExtendedBlock(bpid, block), block.getStorageUuid()); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java index 62907ec0653..a3d57691829 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java @@ -39,6 +39,7 @@ import org.apache.hadoop.hdfs.server.datanode.DataStorage; import org.apache.hadoop.hdfs.server.datanode.FinalizedReplica; import org.apache.hadoop.hdfs.server.datanode.ReplicaHandler; import org.apache.hadoop.hdfs.server.datanode.ReplicaInfo; +import org.apache.hadoop.hdfs.server.datanode.ShortCircuitRegistry; import org.apache.hadoop.hdfs.server.datanode.StorageLocation; import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi; import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeReference; @@ -147,6 +148,9 @@ public class TestFsDatasetImpl { when(datanode.getDnConf()).thenReturn(dnConf); final BlockScanner disabledBlockScanner = new BlockScanner(datanode, conf); when(datanode.getBlockScanner()).thenReturn(disabledBlockScanner); + final ShortCircuitRegistry shortCircuitRegistry = + new ShortCircuitRegistry(conf); + when(datanode.getShortCircuitRegistry()).thenReturn(shortCircuitRegistry); createStorageDirs(storage, conf, NUM_INIT_VOLUMES); dataset = new FsDatasetImpl(datanode, storage, conf);