HDFS-14849. Erasure Coding: the internal block is replicated many times when datanode is decommissioning. Contributed by HuangTao.

This commit is contained in:
Ayush Saxena 2019-09-27 18:26:47 +05:30
parent a93a139b5d
commit ce58c05f1d
3 changed files with 65 additions and 7 deletions

View File

@ -2359,11 +2359,13 @@ public class BlockManager implements BlockStatsMXBean {
if (isStriped) {
blockIndex = ((BlockInfoStriped) block)
.getStorageBlockIndex(storage);
if (!bitSet.get(blockIndex)) {
bitSet.set(blockIndex);
} else if (state == StoredReplicaState.LIVE) {
numReplicas.subtract(StoredReplicaState.LIVE, 1);
numReplicas.add(StoredReplicaState.REDUNDANT, 1);
if (state == StoredReplicaState.LIVE) {
if (!bitSet.get(blockIndex)) {
bitSet.set(blockIndex);
} else {
numReplicas.subtract(StoredReplicaState.LIVE, 1);
numReplicas.add(StoredReplicaState.REDUNDANT, 1);
}
}
}

View File

@ -19,7 +19,6 @@ package org.apache.hadoop.hdfs;
import static org.junit.Assert.assertTrue;
import java.io.File;
import java.io.IOException;
import org.slf4j.Logger;
@ -31,7 +30,6 @@ import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.junit.Test;

View File

@ -21,6 +21,7 @@ import com.google.common.base.Joiner;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.LinkedListMultimap;
import com.google.common.collect.Lists;
import org.apache.hadoop.hdfs.protocol.SystemErasureCodingPolicies;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CreateFlag;
@ -748,6 +749,63 @@ public class TestBlockManager {
numReplicas.redundantInternalBlocks());
}
@Test
public void testChooseSrcDNWithDupECInDecommissioningNode() throws Exception {
long blockId = -9223372036854775776L; // real ec block id
Block aBlock = new Block(blockId, 0, 0);
// RS-3-2 EC policy
ErasureCodingPolicy ecPolicy =
SystemErasureCodingPolicies.getPolicies().get(1);
// striped blockInfo
BlockInfoStriped aBlockInfoStriped = new BlockInfoStriped(aBlock, ecPolicy);
// ec storageInfo
DatanodeStorageInfo ds1 = DFSTestUtil.createDatanodeStorageInfo(
"storage1", "1.1.1.1", "rack1", "host1");
DatanodeStorageInfo ds2 = DFSTestUtil.createDatanodeStorageInfo(
"storage2", "2.2.2.2", "rack2", "host2");
DatanodeStorageInfo ds3 = DFSTestUtil.createDatanodeStorageInfo(
"storage3", "3.3.3.3", "rack3", "host3");
DatanodeStorageInfo ds4 = DFSTestUtil.createDatanodeStorageInfo(
"storage4", "4.4.4.4", "rack4", "host4");
DatanodeStorageInfo ds5 = DFSTestUtil.createDatanodeStorageInfo(
"storage5", "5.5.5.5", "rack5", "host5");
DatanodeStorageInfo ds6 = DFSTestUtil.createDatanodeStorageInfo(
"storage6", "6.6.6.6", "rack6", "host6");
// link block with storage
aBlockInfoStriped.addStorage(ds1, aBlock);
aBlockInfoStriped.addStorage(ds2, new Block(blockId + 1, 0, 0));
aBlockInfoStriped.addStorage(ds3, new Block(blockId + 2, 0, 0));
aBlockInfoStriped.addStorage(ds4, new Block(blockId + 3, 0, 0));
aBlockInfoStriped.addStorage(ds5, new Block(blockId + 4, 0, 0));
// NOTE: duplicate block 0this DN will replace the decommission ds1 DN
aBlockInfoStriped.addStorage(ds6, aBlock);
addEcBlockToBM(blockId, ecPolicy);
// decommission datanode where store block 0
ds1.getDatanodeDescriptor().startDecommission();
List<DatanodeDescriptor> containingNodes =
new LinkedList<DatanodeDescriptor>();
List<DatanodeStorageInfo> nodesContainingLiveReplicas =
new LinkedList<DatanodeStorageInfo>();
NumberReplicas numReplicas = new NumberReplicas();
List<Byte> liveBlockIndices = new ArrayList<>();
bm.chooseSourceDatanodes(
aBlockInfoStriped,
containingNodes,
nodesContainingLiveReplicas,
numReplicas, liveBlockIndices,
LowRedundancyBlocks.QUEUE_HIGHEST_PRIORITY);
assertEquals("There are 5 live replicas in " +
"[ds2, ds3, ds4, ds5, ds6] datanodes ",
5, numReplicas.liveReplicas());
assertEquals("The ds1 datanode is in decommissioning, " +
"so there is no redundant replica",
0, numReplicas.redundantInternalBlocks());
}
@Test
public void testFavorDecomUntilHardLimit() throws Exception {
bm.maxReplicationStreams = 0;