From 6a1fc34595a1794dac5b50bcc7197fe1c3c91e0d Mon Sep 17 00:00:00 2001 From: Brahma Reddy Battula Date: Tue, 21 Jun 2016 16:12:46 +0530 Subject: [PATCH] HDFS-9530. ReservedSpace is not cleared for abandoned Blocks (Contributed by Brahma Reddy Battula) --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 ++ .../datanode/DataNodeFaultInjector.java | 2 + .../hdfs/server/datanode/DataXceiver.java | 1 + .../fsdataset/impl/FsDatasetImpl.java | 4 ++ .../impl/TestRbwSpaceReservation.java | 48 ++++++++++++++++++- 5 files changed, 57 insertions(+), 1 deletion(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 60b1f1dcf4f..67465425273 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -201,6 +201,9 @@ Release 2.7.3 - UNRELEASED HDFS-10474. hftp copy fails when file name with Chinese+special char in branch-2 (Brahma Reddy Battula) + HDFS-9530. ReservedSpace is not cleared for abandoned Blocks + (Brahma Reddy Battula) + Release 2.7.2 - 2016-01-25 INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNodeFaultInjector.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNodeFaultInjector.java index 732742098a8..4ecbdc0207a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNodeFaultInjector.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNodeFaultInjector.java @@ -53,4 +53,6 @@ public boolean dropHeartbeatPacket() { public void stopSendingPacketDownstream() throws IOException {} public void noRegistration() throws IOException { } + + public void failMirrorConnection() throws IOException { } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java index f27c28b323d..7597a368dd9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java @@ -697,6 +697,7 @@ public void writeBlock(final ExtendedBlock block, mirrorTarget = NetUtils.createSocketAddr(mirrorNode); mirrorSock = datanode.newSocket(); try { + DataNodeFaultInjector.get().failMirrorConnection(); int timeoutValue = dnConf.socketTimeout + (HdfsServerConstants.READ_TIMEOUT_EXTENSION * targets.length); int writeTimeout = dnConf.socketWriteTimeout + diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java index 9f626e04234..64f3f6cfc1e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java @@ -81,6 +81,7 @@ import org.apache.hadoop.hdfs.server.datanode.ReplicaBeingWritten; import org.apache.hadoop.hdfs.server.datanode.ReplicaHandler; import org.apache.hadoop.hdfs.server.datanode.ReplicaInPipeline; +import org.apache.hadoop.hdfs.server.datanode.ReplicaInPipelineInterface; import org.apache.hadoop.hdfs.server.datanode.ReplicaInfo; import org.apache.hadoop.hdfs.server.datanode.ReplicaNotFoundException; import org.apache.hadoop.hdfs.server.datanode.ReplicaUnderRecovery; @@ -1848,6 +1849,9 @@ public void invalidate(String bpid, Block invalidBlks[]) throws IOException { LOG.debug("Block file " + removing.getBlockFile().getName() + " is to be deleted"); } + if (removing instanceof ReplicaInPipelineInterface) { + ((ReplicaInPipelineInterface) removing).releaseAllBytesReserved(); + } } if (v.isTransientStorage()) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestRbwSpaceReservation.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestRbwSpaceReservation.java index a2638c8dbe9..026cbd8adc2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestRbwSpaceReservation.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestRbwSpaceReservation.java @@ -35,6 +35,8 @@ import org.apache.hadoop.hdfs.*; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.server.datanode.DataNode; +import org.apache.hadoop.hdfs.server.datanode.DataNodeFaultInjector; +import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi; import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi; import org.apache.hadoop.ipc.RemoteException; import org.apache.hadoop.test.GenericTestUtils; @@ -71,7 +73,7 @@ public class TestRbwSpaceReservation { private DistributedFileSystem fs = null; private DFSClient client = null; FsVolumeImpl singletonVolume = null; - + private DataNodeFaultInjector old = null; private static Random rand = new Random(); private void initConfig(int blockSize) { @@ -459,6 +461,50 @@ public void testReservedSpaceForAppend() throws Exception { checkReservedSpace(expectedFile2Reserved); } + @Test(timeout = 30000) + public void testReservedSpaceForPipelineRecovery() + throws Exception { + final short replication = 3; + startCluster(BLOCK_SIZE, replication, -1); + + final String methodName = GenericTestUtils.getMethodName(); + final Path file = new Path("/" + methodName + ".01.dat"); + + old = DataNodeFaultInjector.get(); + // Fault injector to fail connection to mirror first time. + DataNodeFaultInjector.set(new DataNodeFaultInjector() { + private int tries = 0; + + @Override + public void failMirrorConnection() throws IOException { + if (tries++ == 0) { + throw new IOException("Failing Mirror for space reservation"); + } + } + }); + // Write 1 byte to the file and kill the writer. + FSDataOutputStream os = fs.create(file, replication); + os.write(new byte[1]); + os.close(); + // Ensure all space reserved for the replica was released on each + // DataNode. + cluster.triggerBlockReports(); + for (final DataNode dn : cluster.getDataNodes()) { + for (FsVolumeSpi fsVolume : dn.getFSDataset().getVolumes()) { + { + final FsVolumeImpl volume = (FsVolumeImpl) fsVolume; + GenericTestUtils.waitFor(new Supplier() { + @Override public Boolean get() { + LOG.info("dn " + dn.getDisplayName() + " space : " + volume + .getReservedForRbw()); + return (volume.getReservedForRbw() == 0); + } + }, 100, Integer.MAX_VALUE); // Wait until the test times out. + } + } + } + } + private void checkReservedSpace(final long expectedReserved) throws TimeoutException, InterruptedException, IOException { for (final DataNode dn : cluster.getDataNodes()) {