From 2172399c55b481ea0da8cf2e2cb91ea6d8140b27 Mon Sep 17 00:00:00 2001 From: Nanda kumar Date: Tue, 28 Aug 2018 22:19:52 +0530 Subject: [PATCH] HDDS-381. Fix TestKeys#testPutAndGetKeyWithDnRestart. Contributed by Mukul Kumar Singh. --- .../common/transport/server/GrpcXceiverService.java | 8 +++----- .../org/apache/hadoop/ozone/MiniOzoneCluster.java | 3 ++- .../apache/hadoop/ozone/MiniOzoneClusterImpl.java | 13 +++++++++++-- .../commandhandler/TestBlockDeletion.java | 9 +++++++-- .../apache/hadoop/ozone/web/client/TestKeys.java | 11 ++++------- 5 files changed, 27 insertions(+), 17 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/GrpcXceiverService.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/GrpcXceiverService.java index df6220cec7d..db4a86aa8c5 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/GrpcXceiverService.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/GrpcXceiverService.java @@ -56,10 +56,8 @@ public class GrpcXceiverService extends ContainerCommandResponseProto resp = dispatcher.dispatch(request); responseObserver.onNext(resp); } catch (Throwable e) { - if (LOG.isDebugEnabled()) { - LOG.debug("{} got exception when processing" + LOG.error("{} got exception when processing" + " ContainerCommandRequestProto {}: {}", request, e); - } responseObserver.onError(e); } } @@ -67,13 +65,13 @@ public class GrpcXceiverService extends @Override public void onError(Throwable t) { // for now we just log a msg - LOG.info("{}: ContainerCommand send on error. Exception: {}", t); + LOG.error("{}: ContainerCommand send on error. Exception: {}", t); } @Override public void onCompleted() { if (isClosed.compareAndSet(false, true)) { - LOG.info("{}: ContainerCommand send completed"); + LOG.debug("{}: ContainerCommand send completed"); responseObserver.onCompleted(); } } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneCluster.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneCluster.java index b568672db34..ae6a91ebbe0 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneCluster.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneCluster.java @@ -152,7 +152,8 @@ public interface MiniOzoneCluster { * * @param i index of HddsDatanode in the MiniOzoneCluster */ - void restartHddsDatanode(int i); + void restartHddsDatanode(int i) throws InterruptedException, + TimeoutException; /** * Shutdown a particular HddsDatanode. diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java index 9b7e399efd1..e06e2f6c7c2 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java @@ -216,7 +216,8 @@ public final class MiniOzoneClusterImpl implements MiniOzoneCluster { } @Override - public void restartHddsDatanode(int i) { + public void restartHddsDatanode(int i) throws InterruptedException, + TimeoutException { HddsDatanodeService datanodeService = hddsDatanodes.get(i); datanodeService.stop(); datanodeService.join(); @@ -230,7 +231,15 @@ public final class MiniOzoneClusterImpl implements MiniOzoneCluster { .getPort(DatanodeDetails.Port.Name.RATIS).getValue(); conf.setInt(DFS_CONTAINER_RATIS_IPC_PORT, ratisPort); conf.setBoolean(DFS_CONTAINER_RATIS_IPC_RANDOM_PORT, false); - datanodeService.start(null); + hddsDatanodes.remove(i); + // wait for node to be removed from SCM healthy node list. + waitForClusterToBeReady(); + HddsDatanodeService service = + HddsDatanodeService.createHddsDatanodeService(conf); + hddsDatanodes.add(i, service); + service.start(null); + // wait for the node to be identified as a healthy node again. + waitForClusterToBeReady(); } @Override diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestBlockDeletion.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestBlockDeletion.java index 57941515e9c..08bc5563307 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestBlockDeletion.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestBlockDeletion.java @@ -57,8 +57,13 @@ import org.junit.Test; import java.io.File; import java.io.IOException; -import java.util.*; +import java.util.Set; +import java.util.List; +import java.util.HashSet; +import java.util.ArrayList; +import java.util.UUID; import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; import static org.apache.hadoop.hdds .HddsConfigKeys.HDDS_CONTAINER_REPORT_INTERVAL; @@ -109,7 +114,7 @@ public class TestBlockDeletion { @Test(timeout = 60000) @Ignore("Until delete background service is fixed.") public void testBlockDeletion() - throws IOException, InterruptedException { + throws IOException, InterruptedException, TimeoutException { String volumeName = UUID.randomUUID().toString(); String bucketName = UUID.randomUUID().toString(); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/web/client/TestKeys.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/web/client/TestKeys.java index a87f3f65006..21887beb3b9 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/web/client/TestKeys.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/web/client/TestKeys.java @@ -77,7 +77,6 @@ import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.InputStream; import java.io.IOException; -import java.net.URISyntaxException; import java.nio.file.Path; import java.nio.file.Paths; import java.util.Arrays; @@ -90,6 +89,8 @@ import java.util.stream.Collectors; import static org.apache.hadoop.hdds .HddsConfigKeys.HDDS_CONTAINER_REPORT_INTERVAL; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys + .OZONE_SCM_STALENODE_INTERVAL; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; @@ -127,6 +128,7 @@ public class TestKeys { conf.setTimeDuration(OzoneConfigKeys.OZONE_BLOCK_DELETING_SERVICE_INTERVAL, 1000, TimeUnit.MILLISECONDS); conf.setTimeDuration(HDDS_CONTAINER_REPORT_INTERVAL, 1, TimeUnit.SECONDS); + conf.setTimeDuration(OZONE_SCM_STALENODE_INTERVAL, 30, TimeUnit.SECONDS); path = GenericTestUtils.getTempPath(TestKeys.class.getSimpleName()); Logger.getLogger("log4j.logger.org.apache.http").setLevel(Level.DEBUG); @@ -322,7 +324,7 @@ public class TestKeys { } private static void restartDatanode(MiniOzoneCluster cluster, int datanodeIdx) - throws OzoneException, URISyntaxException { + throws Exception { cluster.restartHddsDatanode(datanodeIdx); } @@ -344,11 +346,6 @@ public class TestKeys { // restart the datanode restartDatanode(cluster, 0); - // TODO: Try removing sleep and adding a join for the MiniOzoneCluster start - // The ozoneContainer is not started and its metrics are not initialized - // which leads to NullPointerException in Dispatcher. - Thread.sleep(1000); - ozoneCluster.waitForClusterToBeReady(); // verify getKey after the datanode restart String newFileName = helper.dir + "/" + OzoneUtils.getRequestID().toLowerCase();