HDFS-15200. Delete Corrupt Replica Immediately Irrespective of Replicas On Stale Storage. Contributed by Ayush Saxena.
This commit is contained in:
parent
a68ca4a292
commit
9e50a1082b
|
@ -269,6 +269,13 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
|
|||
= "dfs.namenode.blockreport.queue.size";
|
||||
public static final int DFS_NAMENODE_BLOCKREPORT_QUEUE_SIZE_DEFAULT
|
||||
= 1024;
|
||||
|
||||
public static final String
|
||||
DFS_NAMENODE_CORRUPT_BLOCK_DELETE_IMMEDIATELY_ENABLED =
|
||||
"dfs.namenode.corrupt.block.delete.immediately.enabled";
|
||||
public static final boolean
|
||||
DFS_NAMENODE_CORRUPT_BLOCK_DELETE_IMMEDIATELY_ENABLED_DEFAULT = true;
|
||||
|
||||
public static final String DFS_WEBHDFS_AUTHENTICATION_FILTER_KEY = "dfs.web.authentication.filter";
|
||||
/* Phrased as below to avoid javac inlining as a constant, to match the behavior when
|
||||
this was AuthFilter.class.getName(). Note that if you change the import for AuthFilter, you
|
||||
|
|
|
@ -447,6 +447,11 @@ public class BlockManager implements BlockStatsMXBean {
|
|||
* from ENTERING_MAINTENANCE to IN_MAINTENANCE.
|
||||
*/
|
||||
private final short minReplicationToBeInMaintenance;
|
||||
/**
|
||||
* Whether to delete corrupt replica immediately irrespective of other
|
||||
* replicas available on stale storages.
|
||||
*/
|
||||
private final boolean deleteCorruptReplicaImmediately;
|
||||
|
||||
/** Storages accessible from multiple DNs. */
|
||||
private final ProvidedStorageMap providedStorageMap;
|
||||
|
@ -597,6 +602,10 @@ public class BlockManager implements BlockStatsMXBean {
|
|||
DFSConfigKeys.DFS_NAMENODE_BLOCKREPORT_QUEUE_SIZE_DEFAULT);
|
||||
blockReportThread = new BlockReportProcessingThread(queueSize);
|
||||
|
||||
this.deleteCorruptReplicaImmediately =
|
||||
conf.getBoolean(DFS_NAMENODE_CORRUPT_BLOCK_DELETE_IMMEDIATELY_ENABLED,
|
||||
DFS_NAMENODE_CORRUPT_BLOCK_DELETE_IMMEDIATELY_ENABLED_DEFAULT);
|
||||
|
||||
LOG.info("defaultReplication = {}", defaultReplication);
|
||||
LOG.info("maxReplication = {}", maxReplication);
|
||||
LOG.info("minReplication = {}", minReplication);
|
||||
|
@ -1851,7 +1860,7 @@ public class BlockManager implements BlockStatsMXBean {
|
|||
}
|
||||
|
||||
// Check how many copies we have of the block
|
||||
if (nr.replicasOnStaleNodes() > 0) {
|
||||
if (nr.replicasOnStaleNodes() > 0 && !deleteCorruptReplicaImmediately) {
|
||||
blockLog.debug("BLOCK* invalidateBlocks: postponing " +
|
||||
"invalidation of {} on {} because {} replica(s) are located on " +
|
||||
"nodes with potentially out-of-date block reports", b, dn,
|
||||
|
|
|
@ -5246,6 +5246,15 @@
|
|||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.corrupt.block.delete.immediately.enabled</name>
|
||||
<value>true</value>
|
||||
<description>
|
||||
Whether the corrupt replicas should be deleted immediately, irrespective
|
||||
of other replicas on stale storages..
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.journalnode.edits.dir.perm</name>
|
||||
<value>700</value>
|
||||
|
|
|
@ -23,6 +23,7 @@ import com.google.common.collect.LinkedListMultimap;
|
|||
import com.google.common.collect.Lists;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys;
|
||||
import org.apache.hadoop.hdfs.protocol.SystemErasureCodingPolicies;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.CreateFlag;
|
||||
|
@ -50,6 +51,7 @@ import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor.BlockTar
|
|||
import org.apache.hadoop.hdfs.server.common.blockaliasmap.BlockAliasMap;
|
||||
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
|
||||
import org.apache.hadoop.hdfs.server.datanode.DataNode;
|
||||
import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils;
|
||||
import org.apache.hadoop.hdfs.server.datanode.FinalizedReplica;
|
||||
import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.TestProvidedImpl;
|
||||
import org.apache.hadoop.hdfs.server.datanode.InternalDataNodeTestUtils;
|
||||
|
@ -503,6 +505,40 @@ public class TestBlockManager {
|
|||
}
|
||||
}
|
||||
|
||||
@Test(timeout = 60000)
|
||||
public void testDeleteCorruptReplicaWithStatleStorages() throws Exception {
|
||||
Configuration conf = new HdfsConfiguration();
|
||||
conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.
|
||||
MIN_REPLICATION, 2);
|
||||
Path file = new Path("/test-file");
|
||||
MiniDFSCluster cluster =
|
||||
new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
|
||||
try {
|
||||
cluster.waitActive();
|
||||
BlockManager blockManager = cluster.getNamesystem().getBlockManager();
|
||||
blockManager.getDatanodeManager().markAllDatanodesStale();
|
||||
FileSystem fs = cluster.getFileSystem();
|
||||
FSDataOutputStream out = fs.create(file);
|
||||
for (int i = 0; i < 1024 * 1024 * 1; i++) {
|
||||
out.write(i);
|
||||
}
|
||||
out.hflush();
|
||||
MiniDFSCluster.DataNodeProperties datanode = cluster.stopDataNode(0);
|
||||
for (int i = 0; i < 1024 * 1024 * 1; i++) {
|
||||
out.write(i);
|
||||
}
|
||||
out.close();
|
||||
cluster.restartDataNode(datanode);
|
||||
cluster.triggerBlockReports();
|
||||
DataNodeTestUtils.triggerBlockReport(datanode.getDatanode());
|
||||
assertEquals(0, blockManager.getCorruptBlocks());
|
||||
} finally {
|
||||
if (cluster != null) {
|
||||
cluster.shutdown();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Tell the block manager that replication is completed for the given
|
||||
* pipeline.
|
||||
|
|
|
@ -27,6 +27,8 @@ import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys;
|
|||
import org.apache.hadoop.test.GenericTestUtils;
|
||||
import org.junit.Test;
|
||||
|
||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CORRUPT_BLOCK_DELETE_IMMEDIATELY_ENABLED;
|
||||
|
||||
/**
|
||||
* Tests corruption of replicas in case of failover.
|
||||
*/
|
||||
|
@ -35,6 +37,8 @@ public class TestCorruptionWithFailover {
|
|||
@Test
|
||||
public void testCorruptReplicaAfterFailover() throws Exception {
|
||||
Configuration conf = new Configuration();
|
||||
conf.setBoolean(DFS_NAMENODE_CORRUPT_BLOCK_DELETE_IMMEDIATELY_ENABLED,
|
||||
false);
|
||||
// Enable data to be written, to less replicas in case of pipeline failure.
|
||||
conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.
|
||||
MIN_REPLICATION, 2);
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
|
||||
package org.apache.hadoop.hdfs.server.namenode;
|
||||
|
||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CORRUPT_BLOCK_DELETE_IMMEDIATELY_ENABLED;
|
||||
import static org.apache.hadoop.hdfs.MiniDFSCluster.HDFS_MINIDFS_BASEDIR;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
|
@ -187,6 +188,8 @@ public class TestFsck {
|
|||
@Before
|
||||
public void setUp() throws Exception {
|
||||
conf = new Configuration();
|
||||
conf.setBoolean(DFS_NAMENODE_CORRUPT_BLOCK_DELETE_IMMEDIATELY_ENABLED,
|
||||
false);
|
||||
}
|
||||
|
||||
@After
|
||||
|
|
Loading…
Reference in New Issue