HDFS-5579. Under construction files make DataNode decommission take very long hours. Contributed by zhaoyunjiong.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1557904 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e06ae2d567
commit
e210519d32
|
@ -749,6 +749,9 @@ Release 2.4.0 - UNRELEASED
|
||||||
HDFS-5710. FSDirectory#getFullPathName should check inodes against null.
|
HDFS-5710. FSDirectory#getFullPathName should check inodes against null.
|
||||||
(Uma Maheswara Rao G via jing9)
|
(Uma Maheswara Rao G via jing9)
|
||||||
|
|
||||||
|
HDFS-5579. Under construction files make DataNode decommission take very long
|
||||||
|
hours. (zhaoyunjiong via jing9)
|
||||||
|
|
||||||
BREAKDOWN OF HDFS-2832 SUBTASKS AND RELATED JIRAS
|
BREAKDOWN OF HDFS-2832 SUBTASKS AND RELATED JIRAS
|
||||||
|
|
||||||
HDFS-4985. Add storage type to the protocol and expose it in block report
|
HDFS-4985. Add storage type to the protocol and expose it in block report
|
||||||
|
|
|
@ -31,7 +31,7 @@ public interface BlockCollection {
|
||||||
/**
|
/**
|
||||||
* Get the last block of the collection.
|
* Get the last block of the collection.
|
||||||
*/
|
*/
|
||||||
public BlockInfo getLastBlock() throws IOException;
|
public BlockInfo getLastBlock();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get content summary.
|
* Get content summary.
|
||||||
|
|
|
@ -1214,8 +1214,10 @@ public class BlockManager {
|
||||||
// block should belong to a file
|
// block should belong to a file
|
||||||
bc = blocksMap.getBlockCollection(block);
|
bc = blocksMap.getBlockCollection(block);
|
||||||
// abandoned block or block reopened for append
|
// abandoned block or block reopened for append
|
||||||
if(bc == null || bc.isUnderConstruction()) {
|
if (bc == null
|
||||||
neededReplications.remove(block, priority); // remove from neededReplications
|
|| (bc.isUnderConstruction() && block.equals(bc.getLastBlock()))) {
|
||||||
|
// remove from neededReplications
|
||||||
|
neededReplications.remove(block, priority);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1295,7 +1297,7 @@ public class BlockManager {
|
||||||
// block should belong to a file
|
// block should belong to a file
|
||||||
bc = blocksMap.getBlockCollection(block);
|
bc = blocksMap.getBlockCollection(block);
|
||||||
// abandoned block or block reopened for append
|
// abandoned block or block reopened for append
|
||||||
if(bc == null || bc.isUnderConstruction()) {
|
if(bc == null || (bc.isUnderConstruction() && block.equals(bc.getLastBlock()))) {
|
||||||
neededReplications.remove(block, priority); // remove from neededReplications
|
neededReplications.remove(block, priority); // remove from neededReplications
|
||||||
rw.targets = null;
|
rw.targets = null;
|
||||||
continue;
|
continue;
|
||||||
|
@ -2906,8 +2908,16 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
|
||||||
NumberReplicas num = countNodes(block);
|
NumberReplicas num = countNodes(block);
|
||||||
int curReplicas = num.liveReplicas();
|
int curReplicas = num.liveReplicas();
|
||||||
int curExpectedReplicas = getReplication(block);
|
int curExpectedReplicas = getReplication(block);
|
||||||
|
|
||||||
if (isNeededReplication(block, curExpectedReplicas, curReplicas)) {
|
if (isNeededReplication(block, curExpectedReplicas, curReplicas)) {
|
||||||
if (curExpectedReplicas > curReplicas) {
|
if (curExpectedReplicas > curReplicas) {
|
||||||
|
if (bc.isUnderConstruction()) {
|
||||||
|
if (block.equals(bc.getLastBlock()) && curReplicas > minReplication) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
underReplicatedInOpenFiles++;
|
||||||
|
}
|
||||||
|
|
||||||
// Log info about one block for this node which needs replication
|
// Log info about one block for this node which needs replication
|
||||||
if (!status) {
|
if (!status) {
|
||||||
status = true;
|
status = true;
|
||||||
|
@ -2924,9 +2934,6 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
|
||||||
if ((curReplicas == 0) && (num.decommissionedReplicas() > 0)) {
|
if ((curReplicas == 0) && (num.decommissionedReplicas() > 0)) {
|
||||||
decommissionOnlyReplicas++;
|
decommissionOnlyReplicas++;
|
||||||
}
|
}
|
||||||
if (bc.isUnderConstruction()) {
|
|
||||||
underReplicatedInOpenFiles++;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
if (!neededReplications.contains(block) &&
|
if (!neededReplications.contains(block) &&
|
||||||
pendingReplications.getNumReplicas(block) == 0) {
|
pendingReplications.getNumReplicas(block) == 0) {
|
||||||
|
|
|
@ -640,7 +640,7 @@ public class INodeFile extends INodeWithAdditionalFields
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public BlockInfo getLastBlock() throws IOException {
|
public BlockInfo getLastBlock() {
|
||||||
return blocks == null || blocks.length == 0? null: blocks[blocks.length-1];
|
return blocks == null || blocks.length == 0? null: blocks[blocks.length-1];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -42,6 +42,7 @@ import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
||||||
import org.apache.hadoop.hdfs.protocol.DatanodeInfo.AdminStates;
|
import org.apache.hadoop.hdfs.protocol.DatanodeInfo.AdminStates;
|
||||||
import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
|
import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
|
||||||
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
|
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
|
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.NameNode;
|
import org.apache.hadoop.hdfs.server.namenode.NameNode;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
|
import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
|
||||||
|
@ -779,4 +780,53 @@ public class TestDecommission {
|
||||||
Thread.sleep(HEARTBEAT_INTERVAL * 1000);
|
Thread.sleep(HEARTBEAT_INTERVAL * 1000);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test(timeout=120000)
|
||||||
|
public void testDecommissionWithOpenfile() throws IOException, InterruptedException {
|
||||||
|
LOG.info("Starting test testDecommissionWithOpenfile");
|
||||||
|
|
||||||
|
//At most 4 nodes will be decommissioned
|
||||||
|
startCluster(1, 7, conf);
|
||||||
|
|
||||||
|
FileSystem fileSys = cluster.getFileSystem(0);
|
||||||
|
FSNamesystem ns = cluster.getNamesystem(0);
|
||||||
|
|
||||||
|
String openFile = "/testDecommissionWithOpenfile.dat";
|
||||||
|
|
||||||
|
writeFile(fileSys, new Path(openFile), (short)3);
|
||||||
|
// make sure the file was open for write
|
||||||
|
FSDataOutputStream fdos = fileSys.append(new Path(openFile));
|
||||||
|
|
||||||
|
LocatedBlocks lbs = NameNodeAdapter.getBlockLocations(cluster.getNameNode(0), openFile, 0, fileSize);
|
||||||
|
|
||||||
|
DatanodeInfo[] dnInfos4LastBlock = lbs.getLastLocatedBlock().getLocations();
|
||||||
|
DatanodeInfo[] dnInfos4FirstBlock = lbs.get(0).getLocations();
|
||||||
|
|
||||||
|
ArrayList<String> nodes = new ArrayList<String>();
|
||||||
|
ArrayList<DatanodeInfo> dnInfos = new ArrayList<DatanodeInfo>();
|
||||||
|
|
||||||
|
for (DatanodeInfo datanodeInfo : dnInfos4FirstBlock) {
|
||||||
|
DatanodeInfo found = datanodeInfo;
|
||||||
|
for (DatanodeInfo dif: dnInfos4LastBlock) {
|
||||||
|
if (datanodeInfo.equals(dif)) {
|
||||||
|
found = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (found != null) {
|
||||||
|
nodes.add(found.getXferAddr());
|
||||||
|
dnInfos.add(found);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
//decommission one of the 3 nodes which have last block
|
||||||
|
nodes.add(dnInfos4LastBlock[0].getXferAddr());
|
||||||
|
dnInfos.add(dnInfos4LastBlock[0]);
|
||||||
|
|
||||||
|
writeConfigFile(excludeFile, nodes);
|
||||||
|
refreshNodes(ns, conf);
|
||||||
|
for (DatanodeInfo dn : dnInfos) {
|
||||||
|
waitNodeState(dn, AdminStates.DECOMMISSIONED);
|
||||||
|
}
|
||||||
|
|
||||||
|
fdos.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue