HDFS-2822. processMisReplicatedBlock incorrectly identifies under-construction blocks as under-replicated. Contributed by Todd Lipcon.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-0.23@1234926 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
d1cab74401
commit
1b935dd6be
|
@ -171,6 +171,9 @@ Release 0.23.1 - UNRELEASED
|
||||||
HDFS-2816. Fix missing license header in httpfs findbugsExcludeFile.xml.
|
HDFS-2816. Fix missing license header in httpfs findbugsExcludeFile.xml.
|
||||||
(hitesh via tucu)
|
(hitesh via tucu)
|
||||||
|
|
||||||
|
HDFS-2822. processMisReplicatedBlock incorrectly identifies
|
||||||
|
under-construction blocks as under-replicated. (todd)
|
||||||
|
|
||||||
Release 0.23.0 - 2011-11-01
|
Release 0.23.0 - 2011-11-01
|
||||||
|
|
||||||
INCOMPATIBLE CHANGES
|
INCOMPATIBLE CHANGES
|
||||||
|
|
|
@ -1791,7 +1791,8 @@ public class BlockManager {
|
||||||
public void processMisReplicatedBlocks() {
|
public void processMisReplicatedBlocks() {
|
||||||
assert namesystem.hasWriteLock();
|
assert namesystem.hasWriteLock();
|
||||||
|
|
||||||
long nrInvalid = 0, nrOverReplicated = 0, nrUnderReplicated = 0;
|
long nrInvalid = 0, nrOverReplicated = 0, nrUnderReplicated = 0,
|
||||||
|
nrUnderConstruction = 0;
|
||||||
neededReplications.clear();
|
neededReplications.clear();
|
||||||
for (BlockInfo block : blocksMap.getBlocks()) {
|
for (BlockInfo block : blocksMap.getBlocks()) {
|
||||||
INodeFile fileINode = block.getINode();
|
INodeFile fileINode = block.getINode();
|
||||||
|
@ -1801,6 +1802,12 @@ public class BlockManager {
|
||||||
addToInvalidates(block);
|
addToInvalidates(block);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
if (!block.isComplete()) {
|
||||||
|
// Incomplete blocks are never considered mis-replicated --
|
||||||
|
// they'll be reached when they are completed or recovered.
|
||||||
|
nrUnderConstruction++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
// calculate current replication
|
// calculate current replication
|
||||||
short expectedReplication = fileINode.getReplication();
|
short expectedReplication = fileINode.getReplication();
|
||||||
NumberReplicas num = countNodes(block);
|
NumberReplicas num = countNodes(block);
|
||||||
|
@ -1824,6 +1831,7 @@ public class BlockManager {
|
||||||
LOG.info("Number of invalid blocks = " + nrInvalid);
|
LOG.info("Number of invalid blocks = " + nrInvalid);
|
||||||
LOG.info("Number of under-replicated blocks = " + nrUnderReplicated);
|
LOG.info("Number of under-replicated blocks = " + nrUnderReplicated);
|
||||||
LOG.info("Number of over-replicated blocks = " + nrOverReplicated);
|
LOG.info("Number of over-replicated blocks = " + nrOverReplicated);
|
||||||
|
LOG.info("Number of blocks being written = " + nrUnderConstruction);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Set replication for the blocks. */
|
/** Set replication for the blocks. */
|
||||||
|
|
|
@ -19,22 +19,30 @@
|
||||||
package org.apache.hadoop.hdfs;
|
package org.apache.hadoop.hdfs;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.permission.FsPermission;
|
import org.apache.hadoop.fs.permission.FsPermission;
|
||||||
import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
|
import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
|
||||||
|
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
|
||||||
|
import org.apache.hadoop.io.IOUtils;
|
||||||
|
|
||||||
import static org.junit.Assert.*;
|
import static org.junit.Assert.*;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
import org.junit.After;
|
import org.junit.After;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import com.google.common.collect.Lists;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tests to verify safe mode correctness.
|
* Tests to verify safe mode correctness.
|
||||||
*/
|
*/
|
||||||
public class TestSafeMode {
|
public class TestSafeMode {
|
||||||
|
private static final int BLOCK_SIZE = 1024;
|
||||||
Configuration conf;
|
Configuration conf;
|
||||||
MiniDFSCluster cluster;
|
MiniDFSCluster cluster;
|
||||||
FileSystem fs;
|
FileSystem fs;
|
||||||
|
@ -43,6 +51,7 @@ public class TestSafeMode {
|
||||||
@Before
|
@Before
|
||||||
public void startUp() throws IOException {
|
public void startUp() throws IOException {
|
||||||
conf = new HdfsConfiguration();
|
conf = new HdfsConfiguration();
|
||||||
|
conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
|
||||||
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
|
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
|
||||||
cluster.waitActive();
|
cluster.waitActive();
|
||||||
fs = cluster.getFileSystem();
|
fs = cluster.getFileSystem();
|
||||||
|
@ -128,6 +137,46 @@ public class TestSafeMode {
|
||||||
assertEquals("", status);
|
assertEquals("", status);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test that, when under-replicated blocks are processed at the end of
|
||||||
|
* safe-mode, blocks currently under construction are not considered
|
||||||
|
* under-construction or missing. Regression test for HDFS-2822.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testRbwBlocksNotConsideredUnderReplicated() throws IOException {
|
||||||
|
List<FSDataOutputStream> stms = Lists.newArrayList();
|
||||||
|
try {
|
||||||
|
// Create some junk blocks so that the NN doesn't just immediately
|
||||||
|
// exit safemode on restart.
|
||||||
|
DFSTestUtil.createFile(fs, new Path("/junk-blocks"),
|
||||||
|
BLOCK_SIZE*4, (short)1, 1L);
|
||||||
|
// Create several files which are left open. It's important to
|
||||||
|
// create several here, because otherwise the first iteration of the
|
||||||
|
// replication monitor will pull them off the replication queue and
|
||||||
|
// hide this bug from the test!
|
||||||
|
for (int i = 0; i < 10; i++) {
|
||||||
|
FSDataOutputStream stm = fs.create(
|
||||||
|
new Path("/append-" + i), true, BLOCK_SIZE, (short) 1, BLOCK_SIZE);
|
||||||
|
stms.add(stm);
|
||||||
|
stm.write(1);
|
||||||
|
stm.hflush();
|
||||||
|
}
|
||||||
|
|
||||||
|
cluster.restartNameNode();
|
||||||
|
FSNamesystem ns = cluster.getNameNode(0).getNamesystem();
|
||||||
|
BlockManagerTestUtil.updateState(ns.getBlockManager());
|
||||||
|
assertEquals(0, ns.getPendingReplicationBlocks());
|
||||||
|
assertEquals(0, ns.getCorruptReplicaBlocks());
|
||||||
|
assertEquals(0, ns.getMissingBlocksCount());
|
||||||
|
|
||||||
|
} finally {
|
||||||
|
for (FSDataOutputStream stm : stms) {
|
||||||
|
IOUtils.closeStream(stm);
|
||||||
|
}
|
||||||
|
cluster.shutdown();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public interface FSRun {
|
public interface FSRun {
|
||||||
public abstract void run(FileSystem fs) throws IOException;
|
public abstract void run(FileSystem fs) throws IOException;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue