HDFS-2822. processMisReplicatedBlock incorrectly identifies under-construction blocks as under-replicated. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-0.23@1234926 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Todd Lipcon 2012-01-23 18:12:25 +00:00
parent d1cab74401
commit 1b935dd6be
3 changed files with 61 additions and 1 deletions

View File

@ -171,6 +171,9 @@ Release 0.23.1 - UNRELEASED
HDFS-2816. Fix missing license header in httpfs findbugsExcludeFile.xml. HDFS-2816. Fix missing license header in httpfs findbugsExcludeFile.xml.
(hitesh via tucu) (hitesh via tucu)
HDFS-2822. processMisReplicatedBlock incorrectly identifies
under-construction blocks as under-replicated. (todd)
Release 0.23.0 - 2011-11-01 Release 0.23.0 - 2011-11-01
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -1791,7 +1791,8 @@ public class BlockManager {
public void processMisReplicatedBlocks() { public void processMisReplicatedBlocks() {
assert namesystem.hasWriteLock(); assert namesystem.hasWriteLock();
long nrInvalid = 0, nrOverReplicated = 0, nrUnderReplicated = 0; long nrInvalid = 0, nrOverReplicated = 0, nrUnderReplicated = 0,
nrUnderConstruction = 0;
neededReplications.clear(); neededReplications.clear();
for (BlockInfo block : blocksMap.getBlocks()) { for (BlockInfo block : blocksMap.getBlocks()) {
INodeFile fileINode = block.getINode(); INodeFile fileINode = block.getINode();
@ -1801,6 +1802,12 @@ public class BlockManager {
addToInvalidates(block); addToInvalidates(block);
continue; continue;
} }
if (!block.isComplete()) {
// Incomplete blocks are never considered mis-replicated --
// they'll be reached when they are completed or recovered.
nrUnderConstruction++;
continue;
}
// calculate current replication // calculate current replication
short expectedReplication = fileINode.getReplication(); short expectedReplication = fileINode.getReplication();
NumberReplicas num = countNodes(block); NumberReplicas num = countNodes(block);
@ -1824,6 +1831,7 @@ public class BlockManager {
LOG.info("Number of invalid blocks = " + nrInvalid); LOG.info("Number of invalid blocks = " + nrInvalid);
LOG.info("Number of under-replicated blocks = " + nrUnderReplicated); LOG.info("Number of under-replicated blocks = " + nrUnderReplicated);
LOG.info("Number of over-replicated blocks = " + nrOverReplicated); LOG.info("Number of over-replicated blocks = " + nrOverReplicated);
LOG.info("Number of blocks being written = " + nrUnderConstruction);
} }
/** Set replication for the blocks. */ /** Set replication for the blocks. */

View File

@ -19,22 +19,30 @@
package org.apache.hadoop.hdfs; package org.apache.hadoop.hdfs;
import java.io.IOException; import java.io.IOException;
import java.util.List;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction; import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
import org.apache.hadoop.io.IOUtils;
import static org.junit.Assert.*; import static org.junit.Assert.*;
import org.junit.Before; import org.junit.Before;
import org.junit.After; import org.junit.After;
import org.junit.Test; import org.junit.Test;
import com.google.common.collect.Lists;
/** /**
* Tests to verify safe mode correctness. * Tests to verify safe mode correctness.
*/ */
public class TestSafeMode { public class TestSafeMode {
private static final int BLOCK_SIZE = 1024;
Configuration conf; Configuration conf;
MiniDFSCluster cluster; MiniDFSCluster cluster;
FileSystem fs; FileSystem fs;
@ -43,6 +51,7 @@ public class TestSafeMode {
@Before @Before
public void startUp() throws IOException { public void startUp() throws IOException {
conf = new HdfsConfiguration(); conf = new HdfsConfiguration();
conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build(); cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
cluster.waitActive(); cluster.waitActive();
fs = cluster.getFileSystem(); fs = cluster.getFileSystem();
@ -128,6 +137,46 @@ public class TestSafeMode {
assertEquals("", status); assertEquals("", status);
} }
/**
* Test that, when under-replicated blocks are processed at the end of
* safe-mode, blocks currently under construction are not considered
* under-construction or missing. Regression test for HDFS-2822.
*/
@Test
public void testRbwBlocksNotConsideredUnderReplicated() throws IOException {
List<FSDataOutputStream> stms = Lists.newArrayList();
try {
// Create some junk blocks so that the NN doesn't just immediately
// exit safemode on restart.
DFSTestUtil.createFile(fs, new Path("/junk-blocks"),
BLOCK_SIZE*4, (short)1, 1L);
// Create several files which are left open. It's important to
// create several here, because otherwise the first iteration of the
// replication monitor will pull them off the replication queue and
// hide this bug from the test!
for (int i = 0; i < 10; i++) {
FSDataOutputStream stm = fs.create(
new Path("/append-" + i), true, BLOCK_SIZE, (short) 1, BLOCK_SIZE);
stms.add(stm);
stm.write(1);
stm.hflush();
}
cluster.restartNameNode();
FSNamesystem ns = cluster.getNameNode(0).getNamesystem();
BlockManagerTestUtil.updateState(ns.getBlockManager());
assertEquals(0, ns.getPendingReplicationBlocks());
assertEquals(0, ns.getCorruptReplicaBlocks());
assertEquals(0, ns.getMissingBlocksCount());
} finally {
for (FSDataOutputStream stm : stms) {
IOUtils.closeStream(stm);
}
cluster.shutdown();
}
}
public interface FSRun { public interface FSRun {
public abstract void run(FileSystem fs) throws IOException; public abstract void run(FileSystem fs) throws IOException;
} }