HDDS-1211. Test SCMChillMode failing randomly in Jenkins run (#543)

This commit is contained in:
Bharat Viswanadham 2019-04-03 15:02:00 -07:00 committed by GitHub
parent 8ff41d6243
commit 358f7f9b99
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 44 additions and 67 deletions

View File

@ -20,7 +20,6 @@ package org.apache.hadoop.ozone.om;
import java.io.IOException; import java.io.IOException;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.concurrent.atomic.AtomicReference;
import org.apache.commons.lang3.RandomStringUtils; import org.apache.commons.lang3.RandomStringUtils;
import org.apache.hadoop.hdds.HddsConfigKeys; import org.apache.hadoop.hdds.HddsConfigKeys;
import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.conf.OzoneConfiguration;
@ -38,7 +37,6 @@ import org.apache.hadoop.hdds.scm.server.SCMClientProtocolServer;
import org.apache.hadoop.hdds.scm.server.StorageContainerManager; import org.apache.hadoop.hdds.scm.server.StorageContainerManager;
import org.apache.hadoop.ozone.HddsDatanodeService; import org.apache.hadoop.ozone.HddsDatanodeService;
import org.apache.hadoop.ozone.MiniOzoneCluster; import org.apache.hadoop.ozone.MiniOzoneCluster;
import org.apache.hadoop.ozone.MiniOzoneClusterImpl;
import org.apache.hadoop.ozone.OzoneConfigKeys; import org.apache.hadoop.ozone.OzoneConfigKeys;
import org.apache.hadoop.ozone.TestStorageContainerManagerHelper; import org.apache.hadoop.ozone.TestStorageContainerManagerHelper;
import org.apache.hadoop.ozone.om.helpers.OmBucketInfo; import org.apache.hadoop.ozone.om.helpers.OmBucketInfo;
@ -116,10 +114,8 @@ public class TestScmChillMode {
} }
} }
@Test @Test(timeout = 300_000)
public void testChillModeOperations() throws Exception { public void testChillModeOperations() throws Exception {
final AtomicReference<MiniOzoneCluster> miniCluster =
new AtomicReference<>();
// Create {numKeys} random names keys. // Create {numKeys} random names keys.
TestStorageContainerManagerHelper helper = TestStorageContainerManagerHelper helper =
new TestStorageContainerManagerHelper(cluster, conf); new TestStorageContainerManagerHelper(cluster, conf);
@ -158,25 +154,21 @@ public class TestScmChillMode {
cluster.stop(); cluster.stop();
new Thread(() -> { try {
try { cluster = builder.build();
miniCluster.set(builder.build()); } catch (IOException e) {
} catch (IOException e) { fail("failed");
fail("failed"); }
}
}).start();
StorageContainerManager scm; StorageContainerManager scm;
GenericTestUtils.waitFor(() -> {
return miniCluster.get() != null;
}, 100, 1000 * 3);
cluster = miniCluster.get();
scm = cluster.getStorageContainerManager(); scm = cluster.getStorageContainerManager();
Assert.assertTrue(scm.isInChillMode()); Assert.assertTrue(scm.isInChillMode());
om = miniCluster.get().getOzoneManager(); om = cluster.getOzoneManager();
// As cluster is restarted with out datanodes restart
LambdaTestUtils.intercept(IOException.class, LambdaTestUtils.intercept(IOException.class,
"ChillModePrecheck failed for allocateBlock", "ChillModePrecheck failed for allocateBlock",
() -> om.openKey(keyArgs)); () -> om.openKey(keyArgs));
@ -185,25 +177,18 @@ public class TestScmChillMode {
/** /**
* Tests inChillMode & forceExitChillMode api calls. * Tests inChillMode & forceExitChillMode api calls.
*/ */
@Test @Test(timeout = 300_000)
public void testIsScmInChillModeAndForceExit() throws Exception { public void testIsScmInChillModeAndForceExit() throws Exception {
final AtomicReference<MiniOzoneCluster> miniCluster =
new AtomicReference<>();
// Test 1: SCM should be out of chill mode. // Test 1: SCM should be out of chill mode.
Assert.assertFalse(storageContainerLocationClient.inChillMode()); Assert.assertFalse(storageContainerLocationClient.inChillMode());
cluster.stop(); cluster.stop();
// Restart the cluster with same metadata dir. // Restart the cluster with same metadata dir.
new Thread(() -> {
try { try {
miniCluster.set(builder.build()); cluster = builder.build();
} catch (IOException e) { } catch (IOException e) {
Assert.fail("Cluster startup failed."); Assert.fail("Cluster startup failed.");
} }
}).start();
GenericTestUtils.waitFor(() -> {
return miniCluster.get() != null;
}, 10, 1000 * 3);
cluster = miniCluster.get();
// Test 2: Scm should be in chill mode as datanodes are not started yet. // Test 2: Scm should be in chill mode as datanodes are not started yet.
storageContainerLocationClient = cluster storageContainerLocationClient = cluster
@ -225,36 +210,36 @@ public class TestScmChillMode {
} }
@Test(timeout=300_000) @Test(timeout = 300_000)
public void testSCMChillMode() throws Exception { public void testSCMChillMode() throws Exception {
MiniOzoneCluster.Builder clusterBuilder = MiniOzoneCluster.newBuilder(conf)
.setHbInterval(1000)
.setNumDatanodes(3)
.setStartDataNodes(false)
.setHbProcessorInterval(500);
MiniOzoneClusterImpl miniCluster = (MiniOzoneClusterImpl) clusterBuilder
.build();
// Test1: Test chill mode when there are no containers in system. // Test1: Test chill mode when there are no containers in system.
assertTrue(miniCluster.getStorageContainerManager().isInChillMode()); cluster.stop();
miniCluster.startHddsDatanodes();
miniCluster.waitForClusterToBeReady(); try {
assertFalse(miniCluster.getStorageContainerManager().isInChillMode()); cluster = builder.build();
} catch (IOException e) {
Assert.fail("Cluster startup failed.");
}
assertTrue(cluster.getStorageContainerManager().isInChillMode());
cluster.startHddsDatanodes();
cluster.waitForClusterToBeReady();
assertFalse(cluster.getStorageContainerManager().isInChillMode());
// Test2: Test chill mode when containers are there in system. // Test2: Test chill mode when containers are there in system.
// Create {numKeys} random names keys. // Create {numKeys} random names keys.
TestStorageContainerManagerHelper helper = TestStorageContainerManagerHelper helper =
new TestStorageContainerManagerHelper(miniCluster, conf); new TestStorageContainerManagerHelper(cluster, conf);
Map<String, OmKeyInfo> keyLocations = helper.createKeys(100 * 2, 4096); Map<String, OmKeyInfo> keyLocations = helper.createKeys(100 * 2, 4096);
final List<ContainerInfo> containers = miniCluster final List<ContainerInfo> containers = cluster
.getStorageContainerManager().getContainerManager().getContainers(); .getStorageContainerManager().getContainerManager().getContainers();
GenericTestUtils.waitFor(() -> containers.size() >= 3, 100, 1000 * 2); GenericTestUtils.waitFor(() -> containers.size() >= 3, 100, 1000 * 30);
// Removing some container to keep them open. // Removing some container to keep them open.
containers.remove(0); containers.remove(0);
containers.remove(0); containers.remove(0);
// Close remaining containers // Close remaining containers
SCMContainerManager mapping = (SCMContainerManager) miniCluster SCMContainerManager mapping = (SCMContainerManager) cluster
.getStorageContainerManager().getContainerManager(); .getStorageContainerManager().getContainerManager();
containers.forEach(c -> { containers.forEach(c -> {
try { try {
@ -266,38 +251,30 @@ public class TestScmChillMode {
LOG.info("Failed to change state of open containers.", e); LOG.info("Failed to change state of open containers.", e);
} }
}); });
miniCluster.stop(); cluster.stop();
GenericTestUtils.LogCapturer logCapturer = GenericTestUtils.LogCapturer GenericTestUtils.LogCapturer logCapturer = GenericTestUtils.LogCapturer
.captureLogs(SCMChillModeManager.getLogger()); .captureLogs(SCMChillModeManager.getLogger());
logCapturer.clearOutput(); logCapturer.clearOutput();
AtomicReference<MiniOzoneCluster> miniClusterOzone
= new AtomicReference<>(); try {
new Thread(() -> { cluster = builder.build();
try { } catch (IOException ex) {
miniClusterOzone.set(clusterBuilder.setStartDataNodes(false).build()); fail("failed");
} catch (IOException e) { }
fail("failed");
}
}).start();
StorageContainerManager scm; StorageContainerManager scm;
GenericTestUtils.waitFor(() -> {
return miniClusterOzone.get() != null;
}, 100, 1000 * 3);
miniCluster = (MiniOzoneClusterImpl) miniClusterOzone.get(); scm = cluster.getStorageContainerManager();
scm = miniCluster.getStorageContainerManager();
assertTrue(scm.isInChillMode()); assertTrue(scm.isInChillMode());
assertFalse(logCapturer.getOutput().contains("SCM exiting chill mode.")); assertFalse(logCapturer.getOutput().contains("SCM exiting chill mode."));
assertTrue(scm.getCurrentContainerThreshold() == 0); assertTrue(scm.getCurrentContainerThreshold() == 0);
for (HddsDatanodeService dn : miniCluster.getHddsDatanodes()) { for (HddsDatanodeService dn : cluster.getHddsDatanodes()) {
dn.start(null); dn.start(null);
} }
GenericTestUtils GenericTestUtils
.waitFor(() -> scm.getCurrentContainerThreshold() == 1.0, 100, 20000); .waitFor(() -> scm.getCurrentContainerThreshold() == 1.0, 100, 20000);
cluster = miniCluster;
double chillModeCutoff = conf double chillModeCutoff = conf
.getDouble(HddsConfigKeys.HDDS_SCM_CHILLMODE_THRESHOLD_PCT, .getDouble(HddsConfigKeys.HDDS_SCM_CHILLMODE_THRESHOLD_PCT,
HddsConfigKeys.HDDS_SCM_CHILLMODE_THRESHOLD_PCT_DEFAULT); HddsConfigKeys.HDDS_SCM_CHILLMODE_THRESHOLD_PCT_DEFAULT);
@ -306,7 +283,7 @@ public class TestScmChillMode {
assertFalse(scm.isInChillMode()); assertFalse(scm.isInChillMode());
} }
@Test @Test(timeout = 300_000)
public void testSCMChillModeRestrictedOp() throws Exception { public void testSCMChillModeRestrictedOp() throws Exception {
conf.set(OzoneConfigKeys.OZONE_METADATA_STORE_IMPL, conf.set(OzoneConfigKeys.OZONE_METADATA_STORE_IMPL,
OzoneConfigKeys.OZONE_METADATA_STORE_IMPL_LEVELDB); OzoneConfigKeys.OZONE_METADATA_STORE_IMPL_LEVELDB);
@ -338,7 +315,7 @@ public class TestScmChillMode {
new SCMChillModeManager.ChillModeStatus(true)); new SCMChillModeManager.ChillModeStatus(true));
GenericTestUtils.waitFor(() -> { GenericTestUtils.waitFor(() -> {
return clientProtocolServer.getChillModeStatus(); return clientProtocolServer.getChillModeStatus();
}, 50, 1000 * 5); }, 50, 1000 * 30);
assertTrue(clientProtocolServer.getChillModeStatus()); assertTrue(clientProtocolServer.getChillModeStatus());
LambdaTestUtils.intercept(SCMException.class, LambdaTestUtils.intercept(SCMException.class,