From b762199adbd10173c588df67bd227393c5bbcce9 Mon Sep 17 00:00:00 2001 From: Uma Mahesh Date: Sat, 19 Sep 2015 21:18:42 -0700 Subject: [PATCH] HDFS-9113. ErasureCodingWorker#processErasureCodingTasks should not fail to process remaining tasks due to one invalid ECTask. Contributed by Uma Maheswara Rao G. --- .../hadoop-hdfs/CHANGES-HDFS-EC-7285.txt | 3 + .../erasurecode/ErasureCodingWorker.java | 9 ++- .../hadoop/hdfs/TestRecoverStripedFile.java | 62 ++++++++++++++----- 3 files changed, 57 insertions(+), 17 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES-HDFS-EC-7285.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES-HDFS-EC-7285.txt index 468cc56f519..db63d533949 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES-HDFS-EC-7285.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES-HDFS-EC-7285.txt @@ -438,3 +438,6 @@ HDFS-8550. Erasure Coding: Fix FindBugs Multithreaded correctness Warning. (Rakesh R via zhz) + + HDFS-9113. ErasureCodingWorker#processErasureCodingTasks should not fail to process + remaining tasks due to one invalid ECTask (umamahesh) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/ErasureCodingWorker.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/ErasureCodingWorker.java index d1fa4788d9a..7a7cd24612c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/ErasureCodingWorker.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/ErasureCodingWorker.java @@ -175,8 +175,13 @@ public Thread newThread(Runnable r) { */ public void processErasureCodingTasks(Collection ecTasks) { for (BlockECRecoveryInfo recoveryInfo : ecTasks) { - STRIPED_BLK_RECOVERY_THREAD_POOL.submit(new ReconstructAndTransferBlock( - recoveryInfo)); + try { + STRIPED_BLK_RECOVERY_THREAD_POOL + .submit(new ReconstructAndTransferBlock(recoveryInfo)); + } catch (Throwable e) { + LOG.warn("Failed to recover striped block " + + recoveryInfo.getExtendedBlock().getLocalBlock(), e); + } } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestRecoverStripedFile.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestRecoverStripedFile.java index b58184552ab..ed2ca7b367d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestRecoverStripedFile.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestRecoverStripedFile.java @@ -17,10 +17,23 @@ */ package org.apache.hadoop.hdfs; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.BitSet; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ThreadLocalRandom; + import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.protocol.DatanodeID; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; @@ -28,30 +41,20 @@ import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.protocol.LocatedBlocks; import org.apache.hadoop.hdfs.protocol.LocatedStripedBlock; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil; import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor; +import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo; import org.apache.hadoop.hdfs.server.datanode.DataNode; +import org.apache.hadoop.hdfs.server.namenode.ErasureCodingPolicyManager; import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter; +import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage; +import org.apache.hadoop.hdfs.server.protocol.BlockECRecoveryCommand.BlockECRecoveryInfo; import org.apache.hadoop.hdfs.util.StripedBlockUtil; import org.junit.After; import org.junit.Assert; import org.junit.Before; import org.junit.Test; -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.util.BitSet; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.concurrent.ThreadLocalRandom; - public class TestRecoverStripedFile { public static final Log LOG = LogFactory.getLog(TestRecoverStripedFile.class); @@ -383,4 +386,33 @@ private LocatedBlocks waitForRecoveryFinished(Path file, int groupSize) private LocatedBlocks getLocatedBlocks(Path file) throws IOException { return fs.getClient().getLocatedBlocks(file.toString(), 0, Long.MAX_VALUE); } + + /* + * Tests that processErasureCodingTasks should not throw exceptions out due to + * invalid ECTask submission. + */ + @Test + public void testProcessErasureCodingTasksSubmitionShouldSucceed() + throws Exception { + DataNode dataNode = cluster.dataNodes.get(0).datanode; + + // Pack invalid(dummy) parameters in ecTasks. Irrespective of parameters, each task + // thread pool submission should succeed, so that it will not prevent + // processing other tasks in the list if any exceptions. + int size = cluster.dataNodes.size(); + short[] liveIndices = new short[size]; + DatanodeInfo[] dataDNs = new DatanodeInfo[size + 1]; + DatanodeStorageInfo targetDnInfos_1 = BlockManagerTestUtil + .newDatanodeStorageInfo(DFSTestUtil.getLocalDatanodeDescriptor(), + new DatanodeStorage("s01")); + DatanodeStorageInfo[] dnStorageInfo = new DatanodeStorageInfo[] { + targetDnInfos_1 }; + + BlockECRecoveryInfo invalidECInfo = new BlockECRecoveryInfo( + new ExtendedBlock("bp-id", 123456), dataDNs, dnStorageInfo, liveIndices, + ErasureCodingPolicyManager.getSystemDefaultPolicy()); + List ecTasks = new ArrayList(); + ecTasks.add(invalidECInfo); + dataNode.getErasureCodingWorker().processErasureCodingTasks(ecTasks); + } }