HDFS-13770. dfsadmin -report does not always decrease "missing blocks (with replication factor 1)" metrics when file is deleted. Contributed by Kitti Nanasi.

Signed-off-by: Wei-Chiu Chuang <weichiu@apache.org>
This commit is contained in:
Kitti Nanasi 2019-06-18 00:24:44 -07:00 committed by Wei-Chiu Chuang
parent ce49f8a5df
commit abd11f9db1
2 changed files with 109 additions and 6 deletions

View File

@ -213,7 +213,7 @@ class UnderReplicatedBlocks implements Iterable<BlockInfo> {
return false;
}
/** remove a block from a under replication queue */
/** remove a block from the under replication queues. */
synchronized boolean remove(BlockInfo block,
int oldReplicas,
int oldReadOnlyReplicas,
@ -221,11 +221,10 @@ class UnderReplicatedBlocks implements Iterable<BlockInfo> {
int oldExpectedReplicas) {
final int priLevel = getPriority(oldReplicas, oldReadOnlyReplicas,
decommissionedReplicas, oldExpectedReplicas);
boolean removedBlock = remove(block, priLevel);
boolean removedBlock = remove(block, priLevel, oldExpectedReplicas);
if (priLevel == QUEUE_WITH_CORRUPT_BLOCKS &&
oldExpectedReplicas == 1 &&
removedBlock) {
corruptReplOneBlocks--;
assert corruptReplOneBlocks >= 0 :
"Number of corrupt blocks with replication factor 1 " +
"should be non-negative";
@ -238,7 +237,7 @@ class UnderReplicatedBlocks implements Iterable<BlockInfo> {
*
* The priLevel parameter is a hint of which queue to query
* first: if negative or &gt;= {@link #LEVEL} this shortcutting
* is not attmpted.
* is not attempted.
*
* If the block is not found in the nominated queue, an attempt is made to
* remove it from all queues.
@ -246,14 +245,34 @@ class UnderReplicatedBlocks implements Iterable<BlockInfo> {
* <i>Warning:</i> This is not a synchronized method.
* @param block block to remove
* @param priLevel expected privilege level
* @return true if the block was found and removed from one of the priority queues
* @return true if the block was found and removed from one of the priority
* queues
*/
boolean remove(BlockInfo block, int priLevel) {
return remove(block, priLevel, block.getReplication());
}
/**
* Remove a block from the under replication queues.
* For details, see {@link #remove(BlockInfo, int)}.
*
* CorruptReplOneBlocks stat is decremented if the previous replication
* factor was one and the block is removed from the corrupt blocks queue.
*
* @param block block to remove
* @param priLevel expected privilege level
* @param oldExpectedReplicas old replication factor
* @return true if the block was found and removed from one of the priority
* queues
*/
private boolean remove(BlockInfo block, int priLevel,
int oldExpectedReplicas) {
if(priLevel >= 0 && priLevel < LEVEL
&& priorityQueues.get(priLevel).remove(block)) {
NameNode.blockStateChangeLog.debug(
"BLOCK* NameSystem.UnderReplicationBlock.remove: Removing block {}" +
" from priority queue {}", block, priLevel);
decrementBlockStat(priLevel, oldExpectedReplicas);
return true;
} else {
// Try to remove the block from all queues if the block was
@ -263,6 +282,7 @@ class UnderReplicatedBlocks implements Iterable<BlockInfo> {
NameNode.blockStateChangeLog.debug(
"BLOCK* NameSystem.UnderReplicationBlock.remove: Removing block" +
" {} from priority queue {}", block, i);
decrementBlockStat(i, oldExpectedReplicas);
return true;
}
}
@ -270,6 +290,12 @@ class UnderReplicatedBlocks implements Iterable<BlockInfo> {
return false;
}
private void decrementBlockStat(int priLevel, int oldExpectedReplicas) {
if(priLevel == QUEUE_WITH_CORRUPT_BLOCKS && oldExpectedReplicas == 1) {
corruptReplOneBlocks--;
}
}
/**
* Recalculate and potentially update the priority level of a block.
*

View File

@ -19,6 +19,7 @@ package org.apache.hadoop.hdfs.server.blockmanagement;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
@ -28,6 +29,7 @@ import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DFSTestUtil;
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
import org.junit.Test;
@ -137,7 +139,82 @@ public class TestUnderReplicatedBlocks {
} finally {
cluster.shutdown();
}
}
/**
* The test verifies the following scenarios:
*
* 1. Missing one replication blocks count is increased
* when a block with replication factor one becomes corrupt
* and is decreased if the file is removed.
*
* 2. When a file with replication factor two becomes corrupt,
* then missing one replication blocks count does not change.
*
* 3. When a file with replication factor 1 only has read only replicas,
* then missing one replication blocks count does not change
*/
@Test(timeout = 60000)
public void testOneReplicaMissingBlocksCountIsReported() {
UnderReplicatedBlocks underReplicatedBlocks = new UnderReplicatedBlocks();
BlockInfo blockInfo = new BlockInfoContiguous(new Block(1), (short) 1);
boolean addResult
= underReplicatedBlocks.add(blockInfo, 0, 0, 0, 1);
assertTrue(addResult);
assertInLevel(underReplicatedBlocks, blockInfo,
UnderReplicatedBlocks.QUEUE_WITH_CORRUPT_BLOCKS);
assertEquals(1, underReplicatedBlocks.getCorruptBlockSize());
assertEquals(1, underReplicatedBlocks.getCorruptReplOneBlockSize());
underReplicatedBlocks.remove(blockInfo, UnderReplicatedBlocks.LEVEL);
assertEquals(0, underReplicatedBlocks.getCorruptBlockSize());
assertEquals(0, underReplicatedBlocks.getCorruptReplOneBlockSize());
blockInfo.setReplication((short) 2);
addResult = underReplicatedBlocks.add(blockInfo, 0, 0, 0, 2);
assertTrue(addResult);
assertInLevel(underReplicatedBlocks, blockInfo,
UnderReplicatedBlocks.QUEUE_WITH_CORRUPT_BLOCKS);
assertEquals(1, underReplicatedBlocks.getCorruptBlockSize());
assertEquals(0, underReplicatedBlocks.getCorruptReplOneBlockSize());
underReplicatedBlocks.remove(blockInfo, UnderReplicatedBlocks.LEVEL);
assertEquals(0, underReplicatedBlocks.getCorruptBlockSize());
assertEquals(0, underReplicatedBlocks.getCorruptReplOneBlockSize());
blockInfo.setReplication((short) 1);
addResult = underReplicatedBlocks.add(blockInfo, 0, 1, 0, 1);
assertTrue(addResult);
assertInLevel(underReplicatedBlocks, blockInfo,
UnderReplicatedBlocks.QUEUE_HIGHEST_PRIORITY);
assertEquals(0, underReplicatedBlocks.getCorruptBlockSize());
assertEquals(0, underReplicatedBlocks.getCorruptReplOneBlockSize());
underReplicatedBlocks.remove(blockInfo, UnderReplicatedBlocks.LEVEL);
assertEquals(0, underReplicatedBlocks.getCorruptBlockSize());
assertEquals(0, underReplicatedBlocks.getCorruptReplOneBlockSize());
}
/**
* Determine whether or not a block is in a level without changing the API.
* Instead get the per-level iterator and run though it looking for a match.
* If the block is not found, an assertion is thrown.
*
* This is inefficient, but this is only a test case.
* @param queues queues to scan
* @param block block to look for
* @param level level to select
*/
private void assertInLevel(UnderReplicatedBlocks queues,
Block block,
int level) {
final Iterator<BlockInfo> bi = queues.iterator(level);
while (bi.hasNext()) {
Block next = bi.next();
if (block.equals(next)) {
return;
}
}
fail("Block " + block + " not found in level " + level);
}
}