HDFS-15386. ReplicaNotFoundException keeps happening in DN after removing multiple DN's data directories (#2054)
Contributed by Toshihiro Suzuki.
This commit is contained in:
parent
14ff6171a5
commit
d1f4c8f10f
|
@ -574,7 +574,11 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
|
|||
// Unlike updating the volumeMap in addVolume(), this operation does
|
||||
// not scan disks.
|
||||
for (String bpid : volumeMap.getBlockPoolList()) {
|
||||
List<ReplicaInfo> blocks = new ArrayList<>();
|
||||
List<ReplicaInfo> blocks = blkToInvalidate.get(bpid);
|
||||
if (blocks == null) {
|
||||
blocks = new ArrayList<>();
|
||||
blkToInvalidate.put(bpid, blocks);
|
||||
}
|
||||
for (Iterator<ReplicaInfo> it = volumeMap.replicas(bpid).iterator();
|
||||
it.hasNext(); ) {
|
||||
ReplicaInfo block = it.next();
|
||||
|
@ -585,9 +589,7 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
|
|||
it.remove();
|
||||
}
|
||||
}
|
||||
blkToInvalidate.put(bpid, blocks);
|
||||
}
|
||||
|
||||
storageToRemove.add(sd.getStorageUuid());
|
||||
storageLocationsToRemove.remove(absRoot);
|
||||
}
|
||||
|
|
|
@ -88,12 +88,15 @@ import static org.junit.Assert.assertSame;
|
|||
import static org.junit.Assert.fail;
|
||||
import static org.mockito.Matchers.any;
|
||||
import static org.mockito.Matchers.anyListOf;
|
||||
import static org.mockito.Matchers.anyObject;
|
||||
import static org.mockito.Matchers.anyString;
|
||||
import static org.mockito.Matchers.eq;
|
||||
import static org.mockito.Mockito.doReturn;
|
||||
import static org.mockito.Mockito.doThrow;
|
||||
import static org.mockito.Mockito.mock;
|
||||
import static org.mockito.Mockito.spy;
|
||||
import static org.mockito.Mockito.times;
|
||||
import static org.mockito.Mockito.verify;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
|
@ -247,16 +250,23 @@ public class TestFsDatasetImpl {
|
|||
}
|
||||
|
||||
@Test(timeout = 30000)
|
||||
public void testRemoveVolumes() throws IOException {
|
||||
public void testRemoveOneVolume() throws IOException {
|
||||
// Feed FsDataset with block metadata.
|
||||
final int NUM_BLOCKS = 100;
|
||||
for (int i = 0; i < NUM_BLOCKS; i++) {
|
||||
String bpid = BLOCK_POOL_IDS[NUM_BLOCKS % BLOCK_POOL_IDS.length];
|
||||
final int numBlocks = 100;
|
||||
for (int i = 0; i < numBlocks; i++) {
|
||||
String bpid = BLOCK_POOL_IDS[numBlocks % BLOCK_POOL_IDS.length];
|
||||
ExtendedBlock eb = new ExtendedBlock(bpid, i);
|
||||
try (ReplicaHandler replica =
|
||||
dataset.createRbw(StorageType.DEFAULT, eb, false)) {
|
||||
ReplicaHandler replica = null;
|
||||
try {
|
||||
replica = dataset.createRbw(StorageType.DEFAULT, eb, false);
|
||||
} finally {
|
||||
if (replica != null) {
|
||||
replica.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Remove one volume
|
||||
final String[] dataDirs =
|
||||
conf.get(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY).split(",");
|
||||
final String volumePathToRemove = dataDirs[0];
|
||||
|
@ -271,6 +281,11 @@ public class TestFsDatasetImpl {
|
|||
assertEquals("The volume has been removed from the storageMap.",
|
||||
expectedNumVolumes, dataset.storageMap.size());
|
||||
|
||||
// DataNode.notifyNamenodeDeletedBlock() should be called 50 times
|
||||
// as we deleted one volume that has 50 blocks
|
||||
verify(datanode, times(50))
|
||||
.notifyNamenodeDeletedBlock((ExtendedBlock) anyObject(), anyString());
|
||||
|
||||
try {
|
||||
dataset.asyncDiskService.execute(volumesToRemove.iterator().next(),
|
||||
new Runnable() {
|
||||
|
@ -288,10 +303,70 @@ public class TestFsDatasetImpl {
|
|||
totalNumReplicas += dataset.volumeMap.size(bpid);
|
||||
}
|
||||
assertEquals("The replica infos on this volume has been removed from the "
|
||||
+ "volumeMap.", NUM_BLOCKS / NUM_INIT_VOLUMES,
|
||||
+ "volumeMap.", numBlocks / NUM_INIT_VOLUMES,
|
||||
totalNumReplicas);
|
||||
}
|
||||
|
||||
@Test(timeout = 30000)
|
||||
public void testRemoveTwoVolumes() throws IOException {
|
||||
// Feed FsDataset with block metadata.
|
||||
final int numBlocks = 100;
|
||||
for (int i = 0; i < numBlocks; i++) {
|
||||
String bpid = BLOCK_POOL_IDS[numBlocks % BLOCK_POOL_IDS.length];
|
||||
ExtendedBlock eb = new ExtendedBlock(bpid, i);
|
||||
ReplicaHandler replica = null;
|
||||
try {
|
||||
replica = dataset.createRbw(StorageType.DEFAULT, eb, false);
|
||||
} finally {
|
||||
if (replica != null) {
|
||||
replica.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Remove two volumes
|
||||
final String[] dataDirs =
|
||||
conf.get(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY).split(",");
|
||||
Set<File> volumesToRemove = new HashSet<>();
|
||||
volumesToRemove.add(StorageLocation.parse(dataDirs[0]).getFile()
|
||||
.getAbsoluteFile());
|
||||
volumesToRemove.add(StorageLocation.parse(dataDirs[1]).getFile()
|
||||
.getAbsoluteFile());
|
||||
|
||||
dataset.removeVolumes(volumesToRemove, true);
|
||||
int expectedNumVolumes = dataDirs.length - 2;
|
||||
assertEquals("The volume has been removed from the volumeList.",
|
||||
expectedNumVolumes, getNumVolumes());
|
||||
assertEquals("The volume has been removed from the storageMap.",
|
||||
expectedNumVolumes, dataset.storageMap.size());
|
||||
|
||||
// DataNode.notifyNamenodeDeletedBlock() should be called 100 times
|
||||
// as we deleted 2 volumes that have 100 blocks totally
|
||||
verify(datanode, times(100))
|
||||
.notifyNamenodeDeletedBlock((ExtendedBlock) anyObject(), anyString());
|
||||
|
||||
for (File volume : volumesToRemove) {
|
||||
try {
|
||||
dataset.asyncDiskService.execute(volume,
|
||||
new Runnable() {
|
||||
@Override
|
||||
public void run() {}
|
||||
});
|
||||
fail("Expect RuntimeException: the volume has been removed from the "
|
||||
+ "AsyncDiskService.");
|
||||
} catch (RuntimeException e) {
|
||||
GenericTestUtils.assertExceptionContains("Cannot find root", e);
|
||||
}
|
||||
}
|
||||
|
||||
int totalNumReplicas = 0;
|
||||
for (String bpid : dataset.volumeMap.getBlockPoolList()) {
|
||||
totalNumReplicas += dataset.volumeMap.size(bpid);
|
||||
}
|
||||
assertEquals("The replica infos on this volume has been removed from the "
|
||||
+ "volumeMap.", 0, totalNumReplicas);
|
||||
}
|
||||
|
||||
@Test(timeout = 5000)
|
||||
public void testRemoveNewlyAddedVolume() throws IOException {
|
||||
final int numExistingVolumes = getNumVolumes();
|
||||
|
|
Loading…
Reference in New Issue