HDDS-1025. Handle replication of closed containers in DeadNodeHanlder. Contributed by Bharat Viswanadham.

This commit is contained in:
Yiqun Lin 2019-02-01 11:34:51 +08:00
parent 0ab7fc9200
commit 16195eaee1
3 changed files with 77 additions and 17 deletions

View File

@ -21,6 +21,7 @@ package org.apache.hadoop.hdds.scm.node;
import java.util.Set; import java.util.Set;
import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import org.apache.hadoop.hdds.scm.container.ContainerException; import org.apache.hadoop.hdds.scm.container.ContainerException;
import org.apache.hadoop.hdds.scm.container.ContainerID; import org.apache.hadoop.hdds.scm.container.ContainerID;
import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.container.ContainerInfo;
@ -81,8 +82,6 @@ public class DeadNodeHandler implements EventHandler<DatanodeDetails> {
try { try {
final ContainerInfo container = containerManager.getContainer(id); final ContainerInfo container = containerManager.getContainer(id);
// TODO: For open containers, trigger close on other nodes // TODO: For open containers, trigger close on other nodes
// TODO: Check replica count and call replication manager
// on these containers.
if (!container.isOpen()) { if (!container.isOpen()) {
Set<ContainerReplica> replicas = containerManager Set<ContainerReplica> replicas = containerManager
.getContainerReplicas(id); .getContainerReplicas(id);
@ -92,6 +91,9 @@ public class DeadNodeHandler implements EventHandler<DatanodeDetails> {
.ifPresent(replica -> { .ifPresent(replica -> {
try { try {
containerManager.removeContainerReplica(id, replica); containerManager.removeContainerReplica(id, replica);
ContainerInfo containerInfo =
containerManager.getContainer(id);
replicateIfNeeded(containerInfo, publisher);
} catch (ContainerException ex) { } catch (ContainerException ex) {
LOG.warn("Exception while removing container replica #{} " + LOG.warn("Exception while removing container replica #{} " +
"for container #{}.", replica, container, ex); "for container #{}.", replica, container, ex);
@ -109,13 +111,21 @@ public class DeadNodeHandler implements EventHandler<DatanodeDetails> {
*/ */
private void replicateIfNeeded(ContainerInfo container, private void replicateIfNeeded(ContainerInfo container,
EventPublisher publisher) throws ContainerNotFoundException { EventPublisher publisher) throws ContainerNotFoundException {
final int existingReplicas = containerManager // Replicate only closed and Quasi closed containers
.getContainerReplicas(container.containerID()).size(); if (container.getState() == HddsProtos.LifeCycleState.CLOSED ||
final int expectedReplicas = container.getReplicationFactor().getNumber(); container.getState() == HddsProtos.LifeCycleState.QUASI_CLOSED) {
if (existingReplicas != expectedReplicas) { final int existingReplicas = containerManager
publisher.fireEvent(SCMEvents.REPLICATE_CONTAINER, .getContainerReplicas(container.containerID()).size();
new ReplicationRequest( final int expectedReplicas = container.getReplicationFactor().getNumber();
container.getContainerID(), existingReplicas, expectedReplicas)); if (existingReplicas != expectedReplicas) {
LOG.debug("Replicate Request fired for container {}, exisiting " +
"replica count {}, expected replica count {}",
container.getContainerID(), existingReplicas, expectedReplicas);
publisher.fireEvent(SCMEvents.REPLICATE_CONTAINER,
new ReplicationRequest(
container.getContainerID(), existingReplicas,
expectedReplicas));
}
} }
} }

View File

@ -446,4 +446,19 @@ public final class TestUtils {
id, HddsProtos.LifeCycleEvent.CLOSE); id, HddsProtos.LifeCycleEvent.CLOSE);
} }
/**
* Move the container to Quaise close state.
* @param containerManager
* @param id
* @throws IOException
*/
public static void quasiCloseContainer(ContainerManager containerManager,
ContainerID id) throws IOException {
containerManager.updateContainerState(
id, HddsProtos.LifeCycleEvent.FINALIZE);
containerManager.updateContainerState(
id, HddsProtos.LifeCycleEvent.QUASI_CLOSE);
}
} }

View File

@ -59,6 +59,7 @@ import org.junit.Assert;
import org.junit.Before; import org.junit.Before;
import org.junit.Test; import org.junit.Test;
import org.mockito.Mockito; import org.mockito.Mockito;
import org.slf4j.event.Level;
/** /**
* Test DeadNodeHandler. * Test DeadNodeHandler.
@ -140,17 +141,25 @@ public class TestDeadNodeHandler {
TestUtils.allocateContainer(containerManager); TestUtils.allocateContainer(containerManager);
ContainerInfo container3 = ContainerInfo container3 =
TestUtils.allocateContainer(containerManager); TestUtils.allocateContainer(containerManager);
ContainerInfo container4 =
TestUtils.allocateContainer(containerManager);
registerReplicas(datanode1, container1, container2); registerContainers(datanode1, container1, container2, container4);
registerReplicas(datanode2, container1, container3); registerContainers(datanode2, container1, container2);
registerContainers(datanode3, container3);
registerReplicas(containerManager, container1, datanode1, datanode2); registerReplicas(containerManager, container1, datanode1, datanode2);
registerReplicas(containerManager, container2, datanode1); registerReplicas(containerManager, container2, datanode1, datanode2);
registerReplicas(containerManager, container3, datanode2); registerReplicas(containerManager, container3, datanode3);
registerReplicas(containerManager, container4, datanode1);
TestUtils.closeContainer(containerManager, container1.containerID()); TestUtils.closeContainer(containerManager, container1.containerID());
TestUtils.closeContainer(containerManager, container2.containerID()); TestUtils.closeContainer(containerManager, container2.containerID());
TestUtils.closeContainer(containerManager, container3.containerID()); TestUtils.quasiCloseContainer(containerManager, container3.containerID());
GenericTestUtils.setLogLevel(DeadNodeHandler.getLogger(), Level.DEBUG);
GenericTestUtils.LogCapturer logCapturer = GenericTestUtils.LogCapturer
.captureLogs(DeadNodeHandler.getLogger());
deadNodeHandler.onMessage(datanode1, publisher); deadNodeHandler.onMessage(datanode1, publisher);
@ -162,13 +171,33 @@ public class TestDeadNodeHandler {
Set<ContainerReplica> container2Replicas = containerManager Set<ContainerReplica> container2Replicas = containerManager
.getContainerReplicas(new ContainerID(container2.getContainerID())); .getContainerReplicas(new ContainerID(container2.getContainerID()));
Assert.assertEquals(0, container2Replicas.size()); Assert.assertEquals(1, container2Replicas.size());
Assert.assertEquals(datanode2,
container2Replicas.iterator().next().getDatanodeDetails());
Set<ContainerReplica> container3Replicas = containerManager Set<ContainerReplica> container3Replicas = containerManager
.getContainerReplicas(new ContainerID(container3.getContainerID())); .getContainerReplicas(new ContainerID(container3.getContainerID()));
Assert.assertEquals(1, container3Replicas.size()); Assert.assertEquals(1, container3Replicas.size());
Assert.assertEquals(datanode2, Assert.assertEquals(datanode3,
container3Replicas.iterator().next().getDatanodeDetails()); container3Replicas.iterator().next().getDatanodeDetails());
// Replicate should be fired for container 1 and container 2 as now
// datanode 1 is dead, these 2 will not match with expected replica count
// and their state is one of CLOSED/QUASI_CLOSE.
Assert.assertTrue(logCapturer.getOutput().contains(
"Replicate Request fired for container " +
container1.getContainerID()));
Assert.assertTrue(logCapturer.getOutput().contains(
"Replicate Request fired for container " +
container2.getContainerID()));
// as container4 is still in open state, replicate event should not have
// fired for this.
Assert.assertFalse(logCapturer.getOutput().contains(
"Replicate Request fired for container " +
container4.getContainerID()));
} }
@Test @Test
@ -272,7 +301,13 @@ public class TestDeadNodeHandler {
} }
} }
private void registerReplicas(DatanodeDetails datanode, /**
* Update containers available on the datanode.
* @param datanode
* @param containers
* @throws NodeNotFoundException
*/
private void registerContainers(DatanodeDetails datanode,
ContainerInfo... containers) ContainerInfo... containers)
throws NodeNotFoundException { throws NodeNotFoundException {
nodeManager nodeManager