From 66bd1681f8eec7c637a5deb0348f7b7901db64df Mon Sep 17 00:00:00 2001 From: Vivek Ratnavel Subramanian Date: Mon, 16 Sep 2019 12:58:16 -0700 Subject: [PATCH] HDDS-2107. Datanodes should retry forever to connect to SCM in an unsecure environment (#1424) --- .../common/statemachine/SCMConnectionManager.java | 11 +++++++++-- .../TestOzoneManagerDoubleBufferWithOMResponse.java | 6 +++--- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/SCMConnectionManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/SCMConnectionManager.java index f6f64a46119..ce31ebdf4d6 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/SCMConnectionManager.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/SCMConnectionManager.java @@ -18,6 +18,8 @@ package org.apache.hadoop.ozone.container.common.statemachine; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.io.retry.RetryPolicies; +import org.apache.hadoop.io.retry.RetryPolicy; import org.apache.hadoop.ipc.ProtobufRpcEngine; import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.metrics2.util.MBeans; @@ -38,6 +40,7 @@ import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.concurrent.TimeUnit; import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock; @@ -139,10 +142,14 @@ public class SCMConnectionManager long version = RPC.getProtocolVersion(StorageContainerDatanodeProtocolPB.class); - StorageContainerDatanodeProtocolPB rpcProxy = RPC.getProxy( + RetryPolicy retryPolicy = + RetryPolicies.retryForeverWithFixedSleep( + 1000, TimeUnit.MILLISECONDS); + StorageContainerDatanodeProtocolPB rpcProxy = RPC.getProtocolProxy( StorageContainerDatanodeProtocolPB.class, version, address, UserGroupInformation.getCurrentUser(), conf, - NetUtils.getDefaultSocketFactory(conf), getRpcTimeout()); + NetUtils.getDefaultSocketFactory(conf), getRpcTimeout(), + retryPolicy).getProxy(); StorageContainerDatanodeProtocolClientSideTranslatorPB rpcClient = new StorageContainerDatanodeProtocolClientSideTranslatorPB(rpcProxy); diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ratis/TestOzoneManagerDoubleBufferWithOMResponse.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ratis/TestOzoneManagerDoubleBufferWithOMResponse.java index d0ee3c5f70b..468668c2c3c 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ratis/TestOzoneManagerDoubleBufferWithOMResponse.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ratis/TestOzoneManagerDoubleBufferWithOMResponse.java @@ -119,7 +119,7 @@ public class TestOzoneManagerDoubleBufferWithOMResponse { * transactions or not. * @throws Exception */ - @Test(timeout = 300_000) + @Test(timeout = 500_000) public void testDoubleBuffer() throws Exception { // This test checks whether count in tables are correct or not. testDoubleBuffer(1, 10); @@ -397,7 +397,7 @@ public class TestOzoneManagerDoubleBufferWithOMResponse { } return count == iterations; - }, 300, 40000); + }, 300, 300000); GenericTestUtils.waitFor(() -> { @@ -409,7 +409,7 @@ public class TestOzoneManagerDoubleBufferWithOMResponse { fail("testDoubleBuffer failed"); } return count == bucketCount * iterations; - }, 300, 40000); + }, 300, 300000); Assert.assertTrue(doubleBuffer.getFlushIterations() > 0); } finally {