HDDS-1211. Test SCMChillMode failing randomly in Jenkins run (#543)
This commit is contained in:
parent
8ff41d6243
commit
358f7f9b99
|
@ -20,7 +20,6 @@ package org.apache.hadoop.ozone.om;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.concurrent.atomic.AtomicReference;
|
|
||||||
import org.apache.commons.lang3.RandomStringUtils;
|
import org.apache.commons.lang3.RandomStringUtils;
|
||||||
import org.apache.hadoop.hdds.HddsConfigKeys;
|
import org.apache.hadoop.hdds.HddsConfigKeys;
|
||||||
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
|
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
|
||||||
|
@ -38,7 +37,6 @@ import org.apache.hadoop.hdds.scm.server.SCMClientProtocolServer;
|
||||||
import org.apache.hadoop.hdds.scm.server.StorageContainerManager;
|
import org.apache.hadoop.hdds.scm.server.StorageContainerManager;
|
||||||
import org.apache.hadoop.ozone.HddsDatanodeService;
|
import org.apache.hadoop.ozone.HddsDatanodeService;
|
||||||
import org.apache.hadoop.ozone.MiniOzoneCluster;
|
import org.apache.hadoop.ozone.MiniOzoneCluster;
|
||||||
import org.apache.hadoop.ozone.MiniOzoneClusterImpl;
|
|
||||||
import org.apache.hadoop.ozone.OzoneConfigKeys;
|
import org.apache.hadoop.ozone.OzoneConfigKeys;
|
||||||
import org.apache.hadoop.ozone.TestStorageContainerManagerHelper;
|
import org.apache.hadoop.ozone.TestStorageContainerManagerHelper;
|
||||||
import org.apache.hadoop.ozone.om.helpers.OmBucketInfo;
|
import org.apache.hadoop.ozone.om.helpers.OmBucketInfo;
|
||||||
|
@ -116,10 +114,8 @@ public class TestScmChillMode {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test(timeout = 300_000)
|
||||||
public void testChillModeOperations() throws Exception {
|
public void testChillModeOperations() throws Exception {
|
||||||
final AtomicReference<MiniOzoneCluster> miniCluster =
|
|
||||||
new AtomicReference<>();
|
|
||||||
// Create {numKeys} random names keys.
|
// Create {numKeys} random names keys.
|
||||||
TestStorageContainerManagerHelper helper =
|
TestStorageContainerManagerHelper helper =
|
||||||
new TestStorageContainerManagerHelper(cluster, conf);
|
new TestStorageContainerManagerHelper(cluster, conf);
|
||||||
|
@ -158,25 +154,21 @@ public class TestScmChillMode {
|
||||||
|
|
||||||
cluster.stop();
|
cluster.stop();
|
||||||
|
|
||||||
new Thread(() -> {
|
|
||||||
try {
|
try {
|
||||||
miniCluster.set(builder.build());
|
cluster = builder.build();
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
fail("failed");
|
fail("failed");
|
||||||
}
|
}
|
||||||
}).start();
|
|
||||||
|
|
||||||
StorageContainerManager scm;
|
StorageContainerManager scm;
|
||||||
GenericTestUtils.waitFor(() -> {
|
|
||||||
return miniCluster.get() != null;
|
|
||||||
}, 100, 1000 * 3);
|
|
||||||
cluster = miniCluster.get();
|
|
||||||
|
|
||||||
scm = cluster.getStorageContainerManager();
|
scm = cluster.getStorageContainerManager();
|
||||||
Assert.assertTrue(scm.isInChillMode());
|
Assert.assertTrue(scm.isInChillMode());
|
||||||
|
|
||||||
om = miniCluster.get().getOzoneManager();
|
om = cluster.getOzoneManager();
|
||||||
|
|
||||||
|
// As cluster is restarted with out datanodes restart
|
||||||
LambdaTestUtils.intercept(IOException.class,
|
LambdaTestUtils.intercept(IOException.class,
|
||||||
"ChillModePrecheck failed for allocateBlock",
|
"ChillModePrecheck failed for allocateBlock",
|
||||||
() -> om.openKey(keyArgs));
|
() -> om.openKey(keyArgs));
|
||||||
|
@ -185,25 +177,18 @@ public class TestScmChillMode {
|
||||||
/**
|
/**
|
||||||
* Tests inChillMode & forceExitChillMode api calls.
|
* Tests inChillMode & forceExitChillMode api calls.
|
||||||
*/
|
*/
|
||||||
@Test
|
@Test(timeout = 300_000)
|
||||||
public void testIsScmInChillModeAndForceExit() throws Exception {
|
public void testIsScmInChillModeAndForceExit() throws Exception {
|
||||||
final AtomicReference<MiniOzoneCluster> miniCluster =
|
|
||||||
new AtomicReference<>();
|
|
||||||
// Test 1: SCM should be out of chill mode.
|
// Test 1: SCM should be out of chill mode.
|
||||||
Assert.assertFalse(storageContainerLocationClient.inChillMode());
|
Assert.assertFalse(storageContainerLocationClient.inChillMode());
|
||||||
cluster.stop();
|
cluster.stop();
|
||||||
// Restart the cluster with same metadata dir.
|
// Restart the cluster with same metadata dir.
|
||||||
new Thread(() -> {
|
|
||||||
try {
|
try {
|
||||||
miniCluster.set(builder.build());
|
cluster = builder.build();
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
Assert.fail("Cluster startup failed.");
|
Assert.fail("Cluster startup failed.");
|
||||||
}
|
}
|
||||||
}).start();
|
|
||||||
GenericTestUtils.waitFor(() -> {
|
|
||||||
return miniCluster.get() != null;
|
|
||||||
}, 10, 1000 * 3);
|
|
||||||
cluster = miniCluster.get();
|
|
||||||
|
|
||||||
// Test 2: Scm should be in chill mode as datanodes are not started yet.
|
// Test 2: Scm should be in chill mode as datanodes are not started yet.
|
||||||
storageContainerLocationClient = cluster
|
storageContainerLocationClient = cluster
|
||||||
|
@ -227,34 +212,34 @@ public class TestScmChillMode {
|
||||||
|
|
||||||
@Test(timeout = 300_000)
|
@Test(timeout = 300_000)
|
||||||
public void testSCMChillMode() throws Exception {
|
public void testSCMChillMode() throws Exception {
|
||||||
MiniOzoneCluster.Builder clusterBuilder = MiniOzoneCluster.newBuilder(conf)
|
|
||||||
.setHbInterval(1000)
|
|
||||||
.setNumDatanodes(3)
|
|
||||||
.setStartDataNodes(false)
|
|
||||||
.setHbProcessorInterval(500);
|
|
||||||
MiniOzoneClusterImpl miniCluster = (MiniOzoneClusterImpl) clusterBuilder
|
|
||||||
.build();
|
|
||||||
// Test1: Test chill mode when there are no containers in system.
|
// Test1: Test chill mode when there are no containers in system.
|
||||||
assertTrue(miniCluster.getStorageContainerManager().isInChillMode());
|
cluster.stop();
|
||||||
miniCluster.startHddsDatanodes();
|
|
||||||
miniCluster.waitForClusterToBeReady();
|
try {
|
||||||
assertFalse(miniCluster.getStorageContainerManager().isInChillMode());
|
cluster = builder.build();
|
||||||
|
} catch (IOException e) {
|
||||||
|
Assert.fail("Cluster startup failed.");
|
||||||
|
}
|
||||||
|
assertTrue(cluster.getStorageContainerManager().isInChillMode());
|
||||||
|
cluster.startHddsDatanodes();
|
||||||
|
cluster.waitForClusterToBeReady();
|
||||||
|
assertFalse(cluster.getStorageContainerManager().isInChillMode());
|
||||||
|
|
||||||
// Test2: Test chill mode when containers are there in system.
|
// Test2: Test chill mode when containers are there in system.
|
||||||
// Create {numKeys} random names keys.
|
// Create {numKeys} random names keys.
|
||||||
TestStorageContainerManagerHelper helper =
|
TestStorageContainerManagerHelper helper =
|
||||||
new TestStorageContainerManagerHelper(miniCluster, conf);
|
new TestStorageContainerManagerHelper(cluster, conf);
|
||||||
Map<String, OmKeyInfo> keyLocations = helper.createKeys(100 * 2, 4096);
|
Map<String, OmKeyInfo> keyLocations = helper.createKeys(100 * 2, 4096);
|
||||||
final List<ContainerInfo> containers = miniCluster
|
final List<ContainerInfo> containers = cluster
|
||||||
.getStorageContainerManager().getContainerManager().getContainers();
|
.getStorageContainerManager().getContainerManager().getContainers();
|
||||||
GenericTestUtils.waitFor(() -> containers.size() >= 3, 100, 1000 * 2);
|
GenericTestUtils.waitFor(() -> containers.size() >= 3, 100, 1000 * 30);
|
||||||
|
|
||||||
// Removing some container to keep them open.
|
// Removing some container to keep them open.
|
||||||
containers.remove(0);
|
containers.remove(0);
|
||||||
containers.remove(0);
|
containers.remove(0);
|
||||||
|
|
||||||
// Close remaining containers
|
// Close remaining containers
|
||||||
SCMContainerManager mapping = (SCMContainerManager) miniCluster
|
SCMContainerManager mapping = (SCMContainerManager) cluster
|
||||||
.getStorageContainerManager().getContainerManager();
|
.getStorageContainerManager().getContainerManager();
|
||||||
containers.forEach(c -> {
|
containers.forEach(c -> {
|
||||||
try {
|
try {
|
||||||
|
@ -266,38 +251,30 @@ public class TestScmChillMode {
|
||||||
LOG.info("Failed to change state of open containers.", e);
|
LOG.info("Failed to change state of open containers.", e);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
miniCluster.stop();
|
cluster.stop();
|
||||||
|
|
||||||
GenericTestUtils.LogCapturer logCapturer = GenericTestUtils.LogCapturer
|
GenericTestUtils.LogCapturer logCapturer = GenericTestUtils.LogCapturer
|
||||||
.captureLogs(SCMChillModeManager.getLogger());
|
.captureLogs(SCMChillModeManager.getLogger());
|
||||||
logCapturer.clearOutput();
|
logCapturer.clearOutput();
|
||||||
AtomicReference<MiniOzoneCluster> miniClusterOzone
|
|
||||||
= new AtomicReference<>();
|
|
||||||
new Thread(() -> {
|
|
||||||
try {
|
try {
|
||||||
miniClusterOzone.set(clusterBuilder.setStartDataNodes(false).build());
|
cluster = builder.build();
|
||||||
} catch (IOException e) {
|
} catch (IOException ex) {
|
||||||
fail("failed");
|
fail("failed");
|
||||||
}
|
}
|
||||||
}).start();
|
|
||||||
|
|
||||||
StorageContainerManager scm;
|
StorageContainerManager scm;
|
||||||
GenericTestUtils.waitFor(() -> {
|
|
||||||
return miniClusterOzone.get() != null;
|
|
||||||
}, 100, 1000 * 3);
|
|
||||||
|
|
||||||
miniCluster = (MiniOzoneClusterImpl) miniClusterOzone.get();
|
scm = cluster.getStorageContainerManager();
|
||||||
|
|
||||||
scm = miniCluster.getStorageContainerManager();
|
|
||||||
assertTrue(scm.isInChillMode());
|
assertTrue(scm.isInChillMode());
|
||||||
assertFalse(logCapturer.getOutput().contains("SCM exiting chill mode."));
|
assertFalse(logCapturer.getOutput().contains("SCM exiting chill mode."));
|
||||||
assertTrue(scm.getCurrentContainerThreshold() == 0);
|
assertTrue(scm.getCurrentContainerThreshold() == 0);
|
||||||
for (HddsDatanodeService dn : miniCluster.getHddsDatanodes()) {
|
for (HddsDatanodeService dn : cluster.getHddsDatanodes()) {
|
||||||
dn.start(null);
|
dn.start(null);
|
||||||
}
|
}
|
||||||
GenericTestUtils
|
GenericTestUtils
|
||||||
.waitFor(() -> scm.getCurrentContainerThreshold() == 1.0, 100, 20000);
|
.waitFor(() -> scm.getCurrentContainerThreshold() == 1.0, 100, 20000);
|
||||||
cluster = miniCluster;
|
|
||||||
double chillModeCutoff = conf
|
double chillModeCutoff = conf
|
||||||
.getDouble(HddsConfigKeys.HDDS_SCM_CHILLMODE_THRESHOLD_PCT,
|
.getDouble(HddsConfigKeys.HDDS_SCM_CHILLMODE_THRESHOLD_PCT,
|
||||||
HddsConfigKeys.HDDS_SCM_CHILLMODE_THRESHOLD_PCT_DEFAULT);
|
HddsConfigKeys.HDDS_SCM_CHILLMODE_THRESHOLD_PCT_DEFAULT);
|
||||||
|
@ -306,7 +283,7 @@ public class TestScmChillMode {
|
||||||
assertFalse(scm.isInChillMode());
|
assertFalse(scm.isInChillMode());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test(timeout = 300_000)
|
||||||
public void testSCMChillModeRestrictedOp() throws Exception {
|
public void testSCMChillModeRestrictedOp() throws Exception {
|
||||||
conf.set(OzoneConfigKeys.OZONE_METADATA_STORE_IMPL,
|
conf.set(OzoneConfigKeys.OZONE_METADATA_STORE_IMPL,
|
||||||
OzoneConfigKeys.OZONE_METADATA_STORE_IMPL_LEVELDB);
|
OzoneConfigKeys.OZONE_METADATA_STORE_IMPL_LEVELDB);
|
||||||
|
@ -338,7 +315,7 @@ public class TestScmChillMode {
|
||||||
new SCMChillModeManager.ChillModeStatus(true));
|
new SCMChillModeManager.ChillModeStatus(true));
|
||||||
GenericTestUtils.waitFor(() -> {
|
GenericTestUtils.waitFor(() -> {
|
||||||
return clientProtocolServer.getChillModeStatus();
|
return clientProtocolServer.getChillModeStatus();
|
||||||
}, 50, 1000 * 5);
|
}, 50, 1000 * 30);
|
||||||
assertTrue(clientProtocolServer.getChillModeStatus());
|
assertTrue(clientProtocolServer.getChillModeStatus());
|
||||||
|
|
||||||
LambdaTestUtils.intercept(SCMException.class,
|
LambdaTestUtils.intercept(SCMException.class,
|
||||||
|
|
Loading…
Reference in New Issue