diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 578481406fc..68ba352d385 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -24,6 +24,9 @@ Release 2.6.4 - UNRELEASED BUG FIXES + HDFS-9445. Datanode may deadlock while handling a bad volume. + (Wlater Su via Kihwal) + Release 2.6.3 - 2015-12-17 INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java index e352ea34e09..72124320969 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java @@ -385,51 +385,70 @@ public void addVolume(final StorageLocation location, /** * Removes a collection of volumes from FsDataset. * @param volumes the root directories of the volumes. - * - * DataNode should call this function before calling - * {@link DataStorage#removeVolumes(java.util.Collection)}. */ @Override - public synchronized void removeVolumes(Collection volumes) { + public void removeVolumes(Collection volumes) { Set volumeSet = new HashSet(); for (StorageLocation sl : volumes) { volumeSet.add(sl.getFile().getAbsolutePath()); } - for (int idx = 0; idx < dataStorage.getNumStorageDirs(); idx++) { - Storage.StorageDirectory sd = dataStorage.getStorageDir(idx); - String volume = sd.getRoot().getAbsolutePath(); - if (volumeSet.contains(volume)) { - LOG.info("Removing " + volume + " from FsDataset."); - // Disable the volume from the service. - asyncDiskService.removeVolume(sd.getCurrentDir()); - this.volumes.removeVolume(sd.getRoot()); + Map> blkToInvalidate = + new HashMap>(); + List storageToRemove = new ArrayList(); + synchronized (this) { + for (int idx = 0; idx < dataStorage.getNumStorageDirs(); idx++) { + Storage.StorageDirectory sd = dataStorage.getStorageDir(idx); + String volume = sd.getRoot().getAbsolutePath(); + if (volumeSet.contains(volume)) { + LOG.info("Removing " + volume + " from FsDataset."); - // Removed all replica information for the blocks on the volume. Unlike - // updating the volumeMap in addVolume(), this operation does not scan - // disks. - for (String bpid : volumeMap.getBlockPoolList()) { - List blocks = new ArrayList(); - for (Iterator it = volumeMap.replicas(bpid).iterator(); - it.hasNext(); ) { - ReplicaInfo block = it.next(); - String absBasePath = + // Disable the volume from the service. + asyncDiskService.removeVolume(sd.getCurrentDir()); + this.volumes.removeVolume(sd.getRoot()); + + // Removed all replica information for the blocks on the volume. + // Unlike updating the volumeMap in addVolume(), this operation does + // not scan disks. + for (String bpid : volumeMap.getBlockPoolList()) { + List blocks = new ArrayList(); + for (Iterator it = volumeMap.replicas(bpid).iterator(); + it.hasNext(); ) { + ReplicaInfo block = it.next(); + String absBasePath = new File(block.getVolume().getBasePath()).getAbsolutePath(); - if (absBasePath.equals(volume)) { - invalidate(bpid, block); - blocks.add(block); - it.remove(); + if (absBasePath.equals(volume)) { + blocks.add(block); + it.remove(); + } } + blkToInvalidate.put(bpid, blocks); + // Delete blocks from the block scanner in batch. + datanode.getBlockScanner().deleteBlocks(bpid, + blocks.toArray(new Block[blocks.size()])); } - // Delete blocks from the block scanner in batch. - datanode.getBlockScanner().deleteBlocks(bpid, - blocks.toArray(new Block[blocks.size()])); - } - storageMap.remove(sd.getStorageUuid()); + storageToRemove.add(sd.getStorageUuid()); + } + } + setupAsyncLazyPersistThreads(); + } + + // Call this outside the lock. + for (Map.Entry> entry : + blkToInvalidate.entrySet()) { + String bpid = entry.getKey(); + List blocks = entry.getValue(); + for (ReplicaInfo block : blocks) { + invalidate(bpid, block); + } + } + + synchronized (this) { + for(String storageUuid : storageToRemove) { + storageMap.remove(storageUuid); } } - setupAsyncLazyPersistThreads(); } private StorageType getStorageTypeFromLocations( @@ -1639,15 +1658,11 @@ public void invalidate(String bpid, Block invalidBlks[]) throws IOException { public void invalidate(String bpid, ReplicaInfo block) { // If a DFSClient has the replica in its cache of short-circuit file // descriptors (and the client is using ShortCircuitShm), invalidate it. - // The short-circuit registry is null in the unit tests, because the - // datanode is mock object. - if (datanode.getShortCircuitRegistry() != null) { - datanode.getShortCircuitRegistry().processBlockInvalidation( - new ExtendedBlockId(block.getBlockId(), bpid)); + datanode.getShortCircuitRegistry().processBlockInvalidation( + new ExtendedBlockId(block.getBlockId(), bpid)); - // If the block is cached, start uncaching it. - cacheManager.uncacheBlock(bpid, block.getBlockId()); - } + // If the block is cached, start uncaching it. + cacheManager.uncacheBlock(bpid, block.getBlockId()); datanode.notifyNamenodeDeletedBlock(new ExtendedBlock(bpid, block), block.getStorageUuid()); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java index 0a9776de2c1..2846c7148ea 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java @@ -33,6 +33,7 @@ import org.apache.hadoop.hdfs.server.datanode.DataStorage; import org.apache.hadoop.hdfs.server.datanode.FinalizedReplica; import org.apache.hadoop.hdfs.server.datanode.ReplicaInfo; +import org.apache.hadoop.hdfs.server.datanode.ShortCircuitRegistry; import org.apache.hadoop.hdfs.server.datanode.StorageLocation; import org.apache.hadoop.hdfs.server.datanode.fsdataset.RoundRobinVolumeChoosingPolicy; import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo; @@ -127,6 +128,9 @@ public void setUp() throws IOException { when(datanode.getConf()).thenReturn(conf); when(datanode.getDnConf()).thenReturn(dnConf); when(datanode.getBlockScanner()).thenReturn(scanner); + final ShortCircuitRegistry shortCircuitRegistry = + new ShortCircuitRegistry(conf); + when(datanode.getShortCircuitRegistry()).thenReturn(shortCircuitRegistry); createStorageDirs(storage, conf, NUM_INIT_VOLUMES); dataset = new FsDatasetImpl(datanode, storage, conf);