From aa2303088f0296d587afeaa4541cfe735b27a756 Mon Sep 17 00:00:00 2001 From: Arpit Agarwal Date: Thu, 6 Aug 2015 15:17:45 -0700 Subject: [PATCH] HDFS-8486. DN startup may cause severe data loss. (Contributed by Daryn Sharp) --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 2 ++ .../hadoop/hdfs/server/datanode/DataNode.java | 3 +- .../fsdataset/impl/BlockPoolSlice.java | 28 +++++++++++---- .../fsdataset/impl/TestFsDatasetImpl.java | 35 +++++++++++++++++++ 4 files changed, 60 insertions(+), 8 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 04c0f4c1967..47ec910e92b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -33,6 +33,8 @@ Release 2.6.1 - UNRELEASED HDFS-7733. NFS: readdir/readdirplus return null directory attribute on failure. (Arpit Agarwal) + HDFS-8486. DN startup may cause severe data loss. (daryn via cmccabe) + Release 2.6.0 - 2014-11-18 INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java index a7dd9223584..fe57bc35975 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java @@ -1279,9 +1279,8 @@ public class DataNode extends ReconfigurableBase // failures. checkDiskError(); - initPeriodicScanners(conf); - data.addBlockPool(nsInfo.getBlockPoolID(), conf); + initDirectoryScanner(conf); } BPOfferService[] getAllBpOs() { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/BlockPoolSlice.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/BlockPoolSlice.java index e3d1607a92a..58e57f9221b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/BlockPoolSlice.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/BlockPoolSlice.java @@ -531,10 +531,28 @@ class BlockPoolSlice { // Leave both block replicas in place. return replica1; } + final ReplicaInfo replicaToDelete = + selectReplicaToDelete(replica1, replica2); + final ReplicaInfo replicaToKeep = + (replicaToDelete != replica1) ? replica1 : replica2; + // Update volumeMap and delete the replica + volumeMap.add(bpid, replicaToKeep); + if (replicaToDelete != null) { + deleteReplica(replicaToDelete); + } + return replicaToKeep; + } + static ReplicaInfo selectReplicaToDelete(final ReplicaInfo replica1, + final ReplicaInfo replica2) { ReplicaInfo replicaToKeep; ReplicaInfo replicaToDelete; + // it's the same block so don't ever delete it, even if GS or size + // differs. caller should keep the one it just discovered on disk + if (replica1.getBlockFile().equals(replica2.getBlockFile())) { + return null; + } if (replica1.getGenerationStamp() != replica2.getGenerationStamp()) { replicaToKeep = replica1.getGenerationStamp() > replica2.getGenerationStamp() ? replica1 : replica2; @@ -554,10 +572,10 @@ class BlockPoolSlice { LOG.debug("resolveDuplicateReplicas decide to keep " + replicaToKeep + ". Will try to delete " + replicaToDelete); } + return replicaToDelete; + } - // Update volumeMap. - volumeMap.add(bpid, replicaToKeep); - + private void deleteReplica(final ReplicaInfo replicaToDelete) { // Delete the files on disk. Failure here is okay. final File blockFile = replicaToDelete.getBlockFile(); if (!blockFile.delete()) { @@ -567,10 +585,8 @@ class BlockPoolSlice { if (!metaFile.delete()) { LOG.warn("Failed to delete meta file " + metaFile); } - - return replicaToKeep; } - + /** * Find out the number of bytes in the block that match its crc. * diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java index 7c39ca55680..9a8ce9c42fb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java @@ -30,6 +30,8 @@ import org.apache.hadoop.hdfs.server.datanode.DNConf; import org.apache.hadoop.hdfs.server.datanode.DataBlockScanner; import org.apache.hadoop.hdfs.server.datanode.DataNode; import org.apache.hadoop.hdfs.server.datanode.DataStorage; +import org.apache.hadoop.hdfs.server.datanode.FinalizedReplica; +import org.apache.hadoop.hdfs.server.datanode.ReplicaInfo; import org.apache.hadoop.hdfs.server.datanode.StorageLocation; import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.util.StringUtils; @@ -46,6 +48,8 @@ import java.util.List; import java.util.Set; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertSame; import static org.junit.Assert.fail; import static org.mockito.Matchers.any; import static org.mockito.Matchers.anyString; @@ -186,4 +190,35 @@ public class TestFsDatasetImpl { verify(scanner, times(BLOCK_POOL_IDS.length)) .deleteBlocks(anyString(), any(Block[].class)); } + + @Test + public void testDuplicateReplicaResolution() throws IOException { + FsVolumeImpl fsv1 = Mockito.mock(FsVolumeImpl.class); + FsVolumeImpl fsv2 = Mockito.mock(FsVolumeImpl.class); + + File f1 = new File("d1/block"); + File f2 = new File("d2/block"); + + ReplicaInfo replicaOlder = new FinalizedReplica(1,1,1,fsv1,f1); + ReplicaInfo replica = new FinalizedReplica(1,2,2,fsv1,f1); + ReplicaInfo replicaSame = new FinalizedReplica(1,2,2,fsv1,f1); + ReplicaInfo replicaNewer = new FinalizedReplica(1,3,3,fsv1,f1); + + ReplicaInfo replicaOtherOlder = new FinalizedReplica(1,1,1,fsv2,f2); + ReplicaInfo replicaOtherSame = new FinalizedReplica(1,2,2,fsv2,f2); + ReplicaInfo replicaOtherNewer = new FinalizedReplica(1,3,3,fsv2,f2); + + // equivalent path so don't remove either + assertNull(BlockPoolSlice.selectReplicaToDelete(replicaSame, replica)); + assertNull(BlockPoolSlice.selectReplicaToDelete(replicaOlder, replica)); + assertNull(BlockPoolSlice.selectReplicaToDelete(replicaNewer, replica)); + + // keep latest found replica + assertSame(replica, + BlockPoolSlice.selectReplicaToDelete(replicaOtherSame, replica)); + assertSame(replicaOtherOlder, + BlockPoolSlice.selectReplicaToDelete(replicaOtherOlder, replica)); + assertSame(replica, + BlockPoolSlice.selectReplicaToDelete(replicaOtherNewer, replica)); + } }