HDFS-15386. ReplicaNotFoundException keeps happening in DN after removing multiple DN's data directories (#2054)
Contributed by Toshihiro Suzuki.
This commit is contained in:
parent
14ff6171a5
commit
d1f4c8f10f
|
@ -574,7 +574,11 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
|
||||||
// Unlike updating the volumeMap in addVolume(), this operation does
|
// Unlike updating the volumeMap in addVolume(), this operation does
|
||||||
// not scan disks.
|
// not scan disks.
|
||||||
for (String bpid : volumeMap.getBlockPoolList()) {
|
for (String bpid : volumeMap.getBlockPoolList()) {
|
||||||
List<ReplicaInfo> blocks = new ArrayList<>();
|
List<ReplicaInfo> blocks = blkToInvalidate.get(bpid);
|
||||||
|
if (blocks == null) {
|
||||||
|
blocks = new ArrayList<>();
|
||||||
|
blkToInvalidate.put(bpid, blocks);
|
||||||
|
}
|
||||||
for (Iterator<ReplicaInfo> it = volumeMap.replicas(bpid).iterator();
|
for (Iterator<ReplicaInfo> it = volumeMap.replicas(bpid).iterator();
|
||||||
it.hasNext(); ) {
|
it.hasNext(); ) {
|
||||||
ReplicaInfo block = it.next();
|
ReplicaInfo block = it.next();
|
||||||
|
@ -585,9 +589,7 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
|
||||||
it.remove();
|
it.remove();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
blkToInvalidate.put(bpid, blocks);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
storageToRemove.add(sd.getStorageUuid());
|
storageToRemove.add(sd.getStorageUuid());
|
||||||
storageLocationsToRemove.remove(absRoot);
|
storageLocationsToRemove.remove(absRoot);
|
||||||
}
|
}
|
||||||
|
|
|
@ -88,12 +88,15 @@ import static org.junit.Assert.assertSame;
|
||||||
import static org.junit.Assert.fail;
|
import static org.junit.Assert.fail;
|
||||||
import static org.mockito.Matchers.any;
|
import static org.mockito.Matchers.any;
|
||||||
import static org.mockito.Matchers.anyListOf;
|
import static org.mockito.Matchers.anyListOf;
|
||||||
|
import static org.mockito.Matchers.anyObject;
|
||||||
import static org.mockito.Matchers.anyString;
|
import static org.mockito.Matchers.anyString;
|
||||||
import static org.mockito.Matchers.eq;
|
import static org.mockito.Matchers.eq;
|
||||||
import static org.mockito.Mockito.doReturn;
|
import static org.mockito.Mockito.doReturn;
|
||||||
import static org.mockito.Mockito.doThrow;
|
import static org.mockito.Mockito.doThrow;
|
||||||
import static org.mockito.Mockito.mock;
|
import static org.mockito.Mockito.mock;
|
||||||
import static org.mockito.Mockito.spy;
|
import static org.mockito.Mockito.spy;
|
||||||
|
import static org.mockito.Mockito.times;
|
||||||
|
import static org.mockito.Mockito.verify;
|
||||||
import static org.mockito.Mockito.when;
|
import static org.mockito.Mockito.when;
|
||||||
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
|
@ -247,16 +250,23 @@ public class TestFsDatasetImpl {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(timeout = 30000)
|
@Test(timeout = 30000)
|
||||||
public void testRemoveVolumes() throws IOException {
|
public void testRemoveOneVolume() throws IOException {
|
||||||
// Feed FsDataset with block metadata.
|
// Feed FsDataset with block metadata.
|
||||||
final int NUM_BLOCKS = 100;
|
final int numBlocks = 100;
|
||||||
for (int i = 0; i < NUM_BLOCKS; i++) {
|
for (int i = 0; i < numBlocks; i++) {
|
||||||
String bpid = BLOCK_POOL_IDS[NUM_BLOCKS % BLOCK_POOL_IDS.length];
|
String bpid = BLOCK_POOL_IDS[numBlocks % BLOCK_POOL_IDS.length];
|
||||||
ExtendedBlock eb = new ExtendedBlock(bpid, i);
|
ExtendedBlock eb = new ExtendedBlock(bpid, i);
|
||||||
try (ReplicaHandler replica =
|
ReplicaHandler replica = null;
|
||||||
dataset.createRbw(StorageType.DEFAULT, eb, false)) {
|
try {
|
||||||
|
replica = dataset.createRbw(StorageType.DEFAULT, eb, false);
|
||||||
|
} finally {
|
||||||
|
if (replica != null) {
|
||||||
|
replica.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Remove one volume
|
||||||
final String[] dataDirs =
|
final String[] dataDirs =
|
||||||
conf.get(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY).split(",");
|
conf.get(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY).split(",");
|
||||||
final String volumePathToRemove = dataDirs[0];
|
final String volumePathToRemove = dataDirs[0];
|
||||||
|
@ -271,6 +281,11 @@ public class TestFsDatasetImpl {
|
||||||
assertEquals("The volume has been removed from the storageMap.",
|
assertEquals("The volume has been removed from the storageMap.",
|
||||||
expectedNumVolumes, dataset.storageMap.size());
|
expectedNumVolumes, dataset.storageMap.size());
|
||||||
|
|
||||||
|
// DataNode.notifyNamenodeDeletedBlock() should be called 50 times
|
||||||
|
// as we deleted one volume that has 50 blocks
|
||||||
|
verify(datanode, times(50))
|
||||||
|
.notifyNamenodeDeletedBlock((ExtendedBlock) anyObject(), anyString());
|
||||||
|
|
||||||
try {
|
try {
|
||||||
dataset.asyncDiskService.execute(volumesToRemove.iterator().next(),
|
dataset.asyncDiskService.execute(volumesToRemove.iterator().next(),
|
||||||
new Runnable() {
|
new Runnable() {
|
||||||
|
@ -288,10 +303,70 @@ public class TestFsDatasetImpl {
|
||||||
totalNumReplicas += dataset.volumeMap.size(bpid);
|
totalNumReplicas += dataset.volumeMap.size(bpid);
|
||||||
}
|
}
|
||||||
assertEquals("The replica infos on this volume has been removed from the "
|
assertEquals("The replica infos on this volume has been removed from the "
|
||||||
+ "volumeMap.", NUM_BLOCKS / NUM_INIT_VOLUMES,
|
+ "volumeMap.", numBlocks / NUM_INIT_VOLUMES,
|
||||||
totalNumReplicas);
|
totalNumReplicas);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test(timeout = 30000)
|
||||||
|
public void testRemoveTwoVolumes() throws IOException {
|
||||||
|
// Feed FsDataset with block metadata.
|
||||||
|
final int numBlocks = 100;
|
||||||
|
for (int i = 0; i < numBlocks; i++) {
|
||||||
|
String bpid = BLOCK_POOL_IDS[numBlocks % BLOCK_POOL_IDS.length];
|
||||||
|
ExtendedBlock eb = new ExtendedBlock(bpid, i);
|
||||||
|
ReplicaHandler replica = null;
|
||||||
|
try {
|
||||||
|
replica = dataset.createRbw(StorageType.DEFAULT, eb, false);
|
||||||
|
} finally {
|
||||||
|
if (replica != null) {
|
||||||
|
replica.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove two volumes
|
||||||
|
final String[] dataDirs =
|
||||||
|
conf.get(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY).split(",");
|
||||||
|
Set<File> volumesToRemove = new HashSet<>();
|
||||||
|
volumesToRemove.add(StorageLocation.parse(dataDirs[0]).getFile()
|
||||||
|
.getAbsoluteFile());
|
||||||
|
volumesToRemove.add(StorageLocation.parse(dataDirs[1]).getFile()
|
||||||
|
.getAbsoluteFile());
|
||||||
|
|
||||||
|
dataset.removeVolumes(volumesToRemove, true);
|
||||||
|
int expectedNumVolumes = dataDirs.length - 2;
|
||||||
|
assertEquals("The volume has been removed from the volumeList.",
|
||||||
|
expectedNumVolumes, getNumVolumes());
|
||||||
|
assertEquals("The volume has been removed from the storageMap.",
|
||||||
|
expectedNumVolumes, dataset.storageMap.size());
|
||||||
|
|
||||||
|
// DataNode.notifyNamenodeDeletedBlock() should be called 100 times
|
||||||
|
// as we deleted 2 volumes that have 100 blocks totally
|
||||||
|
verify(datanode, times(100))
|
||||||
|
.notifyNamenodeDeletedBlock((ExtendedBlock) anyObject(), anyString());
|
||||||
|
|
||||||
|
for (File volume : volumesToRemove) {
|
||||||
|
try {
|
||||||
|
dataset.asyncDiskService.execute(volume,
|
||||||
|
new Runnable() {
|
||||||
|
@Override
|
||||||
|
public void run() {}
|
||||||
|
});
|
||||||
|
fail("Expect RuntimeException: the volume has been removed from the "
|
||||||
|
+ "AsyncDiskService.");
|
||||||
|
} catch (RuntimeException e) {
|
||||||
|
GenericTestUtils.assertExceptionContains("Cannot find root", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int totalNumReplicas = 0;
|
||||||
|
for (String bpid : dataset.volumeMap.getBlockPoolList()) {
|
||||||
|
totalNumReplicas += dataset.volumeMap.size(bpid);
|
||||||
|
}
|
||||||
|
assertEquals("The replica infos on this volume has been removed from the "
|
||||||
|
+ "volumeMap.", 0, totalNumReplicas);
|
||||||
|
}
|
||||||
|
|
||||||
@Test(timeout = 5000)
|
@Test(timeout = 5000)
|
||||||
public void testRemoveNewlyAddedVolume() throws IOException {
|
public void testRemoveNewlyAddedVolume() throws IOException {
|
||||||
final int numExistingVolumes = getNumVolumes();
|
final int numExistingVolumes = getNumVolumes();
|
||||||
|
|
Loading…
Reference in New Issue