HDDS-351. Add chill mode state to SCM.
Contributed by Ajay Kumar.
(cherry picked from commit ff64d35716
)
This commit is contained in:
parent
be1ec005f1
commit
48bcebc080
|
@ -75,4 +75,10 @@ public final class HddsConfigKeys {
|
||||||
"hdds.container.close.threshold";
|
"hdds.container.close.threshold";
|
||||||
public static final float HDDS_CONTAINER_CLOSE_THRESHOLD_DEFAULT = 0.9f;
|
public static final float HDDS_CONTAINER_CLOSE_THRESHOLD_DEFAULT = 0.9f;
|
||||||
|
|
||||||
|
// % of containers which should have at least one reported replica
|
||||||
|
// before SCM comes out of chill mode.
|
||||||
|
public static final String HDDS_SCM_CHILLMODE_THRESHOLD_PCT =
|
||||||
|
"hdds.scm.chillmode.threshold.pct";
|
||||||
|
public static final double HDDS_SCM_CHILLMODE_THRESHOLD_PCT_DEFAULT = 0.99;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1112,6 +1112,15 @@
|
||||||
</description>
|
</description>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>hdds.scm.chillmode.threshold.pct</name>
|
||||||
|
<value>0.99</value>
|
||||||
|
<tag>HDDS,SCM,OPERATION</tag>
|
||||||
|
<description> % of containers which should have at least one
|
||||||
|
reported replica before SCM comes out of chill mode.
|
||||||
|
</description>
|
||||||
|
</property>
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>hdds.container.action.max.limit</name>
|
<name>hdds.container.action.max.limit</name>
|
||||||
<value>20</value>
|
<value>20</value>
|
||||||
|
|
|
@ -33,6 +33,7 @@ import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerInfo;
|
||||||
import org.apache.hadoop.hdds.scm.container.common.helpers.PipelineID;
|
import org.apache.hadoop.hdds.scm.container.common.helpers.PipelineID;
|
||||||
import org.apache.hadoop.hdds.scm.events.SCMEvents;
|
import org.apache.hadoop.hdds.scm.events.SCMEvents;
|
||||||
import org.apache.hadoop.hdds.scm.exceptions.SCMException;
|
import org.apache.hadoop.hdds.scm.exceptions.SCMException;
|
||||||
|
import org.apache.hadoop.hdds.scm.exceptions.SCMException.ResultCodes;
|
||||||
import org.apache.hadoop.hdds.scm.node.NodeManager;
|
import org.apache.hadoop.hdds.scm.node.NodeManager;
|
||||||
import org.apache.hadoop.hdds.scm.pipelines.PipelineSelector;
|
import org.apache.hadoop.hdds.scm.pipelines.PipelineSelector;
|
||||||
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
|
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
|
||||||
|
@ -210,6 +211,10 @@ public class ContainerMapping implements Mapping {
|
||||||
// For close containers create pipeline from datanodes with replicas
|
// For close containers create pipeline from datanodes with replicas
|
||||||
Set<DatanodeDetails> dnWithReplicas = containerStateManager
|
Set<DatanodeDetails> dnWithReplicas = containerStateManager
|
||||||
.getContainerReplicas(contInfo.containerID());
|
.getContainerReplicas(contInfo.containerID());
|
||||||
|
if (dnWithReplicas.size() == 0) {
|
||||||
|
throw new SCMException("Can't create a pipeline for container with "
|
||||||
|
+ "no replica.", ResultCodes.NO_REPLICA_FOUND);
|
||||||
|
}
|
||||||
pipeline =
|
pipeline =
|
||||||
new Pipeline(dnWithReplicas.iterator().next().getUuidString(),
|
new Pipeline(dnWithReplicas.iterator().next().getUuidString(),
|
||||||
contInfo.getState(), ReplicationType.STAND_ALONE,
|
contInfo.getState(), ReplicationType.STAND_ALONE,
|
||||||
|
|
|
@ -45,6 +45,7 @@ import org.apache.hadoop.hdds.scm.container.replication.ReplicationManager
|
||||||
.ReplicationCompleted;
|
.ReplicationCompleted;
|
||||||
import org.apache.hadoop.hdds.scm.container.replication.ReplicationRequest;
|
import org.apache.hadoop.hdds.scm.container.replication.ReplicationRequest;
|
||||||
|
|
||||||
|
import org.apache.hadoop.hdds.scm.server.SCMDatanodeProtocolServer.NodeRegistrationContainerReport;
|
||||||
import org.apache.hadoop.hdds.server.events.Event;
|
import org.apache.hadoop.hdds.server.events.Event;
|
||||||
import org.apache.hadoop.hdds.server.events.TypedEvent;
|
import org.apache.hadoop.hdds.server.events.TypedEvent;
|
||||||
import org.apache.hadoop.ozone.protocol.commands.CommandForDatanode;
|
import org.apache.hadoop.ozone.protocol.commands.CommandForDatanode;
|
||||||
|
@ -60,6 +61,15 @@ public final class SCMEvents {
|
||||||
*/
|
*/
|
||||||
public static final TypedEvent<NodeReportFromDatanode> NODE_REPORT =
|
public static final TypedEvent<NodeReportFromDatanode> NODE_REPORT =
|
||||||
new TypedEvent<>(NodeReportFromDatanode.class, "Node_Report");
|
new TypedEvent<>(NodeReportFromDatanode.class, "Node_Report");
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Event generated on DataNode registration.
|
||||||
|
*/
|
||||||
|
public static final TypedEvent<NodeRegistrationContainerReport>
|
||||||
|
NODE_REGISTRATION_CONT_REPORT = new TypedEvent<>(
|
||||||
|
NodeRegistrationContainerReport.class,
|
||||||
|
"Node_Registration_Container_Report");
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* ContainerReports are send out by Datanodes. This report is received by
|
* ContainerReports are send out by Datanodes. This report is received by
|
||||||
* SCMDatanodeHeartbeatDispatcher and Container_Report Event
|
* SCMDatanodeHeartbeatDispatcher and Container_Report Event
|
||||||
|
|
|
@ -0,0 +1,198 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hdds.scm.server;
|
||||||
|
|
||||||
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.hdds.HddsConfigKeys;
|
||||||
|
import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerInfo;
|
||||||
|
import org.apache.hadoop.hdds.scm.events.SCMEvents;
|
||||||
|
import org.apache.hadoop.hdds.scm.server.SCMDatanodeProtocolServer
|
||||||
|
.NodeRegistrationContainerReport;
|
||||||
|
import org.apache.hadoop.hdds.server.events.EventHandler;
|
||||||
|
import org.apache.hadoop.hdds.server.events.EventPublisher;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* StorageContainerManager enters chill mode on startup to allow system to
|
||||||
|
* reach a stable state before becoming fully functional. SCM will wait
|
||||||
|
* for certain resources to be reported before coming out of chill mode.
|
||||||
|
*
|
||||||
|
* ChillModeExitRule defines format to define new rules which must be satisfied
|
||||||
|
* to exit Chill mode.
|
||||||
|
* ContainerChillModeRule defines the only exit criteria right now.
|
||||||
|
* On every new datanode registration event this class adds replicas
|
||||||
|
* for reported containers and validates if cutoff threshold for
|
||||||
|
* containers is meet.
|
||||||
|
*/
|
||||||
|
public class SCMChillModeManager implements
|
||||||
|
EventHandler<NodeRegistrationContainerReport> {
|
||||||
|
|
||||||
|
private static final Logger LOG =
|
||||||
|
LoggerFactory.getLogger(SCMChillModeManager.class);
|
||||||
|
private AtomicBoolean inChillMode = new AtomicBoolean(true);
|
||||||
|
private AtomicLong containerWithMinReplicas = new AtomicLong(0);
|
||||||
|
private Map<String, ChillModeExitRule> exitRules = new HashMap(1);
|
||||||
|
private Configuration config;
|
||||||
|
private static final String CONT_EXIT_RULE = "ContainerChillModeRule";
|
||||||
|
|
||||||
|
SCMChillModeManager(Configuration conf, List<ContainerInfo> allContainers) {
|
||||||
|
this.config = conf;
|
||||||
|
exitRules
|
||||||
|
.put(CONT_EXIT_RULE, new ContainerChillModeRule(config, allContainers));
|
||||||
|
}
|
||||||
|
|
||||||
|
private void validateChillModeExitRules(EventPublisher eventQueue) {
|
||||||
|
for (ChillModeExitRule exitRule : exitRules.values()) {
|
||||||
|
if (!exitRule.validate()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
exitChillMode(eventQueue);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void exitChillMode(EventPublisher eventQueue) {
|
||||||
|
LOG.info("SCM exiting chill mode.");
|
||||||
|
setInChillMode(false);
|
||||||
|
// Emit event to ReplicationManager to start replication.
|
||||||
|
eventQueue.fireEvent(SCMEvents.START_REPLICATION, true);
|
||||||
|
|
||||||
|
// TODO: Remove handler registration as there is no need to listen to
|
||||||
|
// register events anymore.
|
||||||
|
|
||||||
|
for (ChillModeExitRule e : exitRules.values()) {
|
||||||
|
e.cleanup();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onMessage(
|
||||||
|
NodeRegistrationContainerReport nodeRegistrationContainerReport,
|
||||||
|
EventPublisher publisher) {
|
||||||
|
if (getInChillMode()) {
|
||||||
|
exitRules.get(CONT_EXIT_RULE).process(nodeRegistrationContainerReport);
|
||||||
|
validateChillModeExitRules(publisher);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean getInChillMode() {
|
||||||
|
return inChillMode.get();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setInChillMode(boolean inChillMode) {
|
||||||
|
this.inChillMode.set(inChillMode);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Interface for defining chill mode exit rules.
|
||||||
|
*
|
||||||
|
* @param <T>
|
||||||
|
*/
|
||||||
|
public interface ChillModeExitRule<T> {
|
||||||
|
|
||||||
|
boolean validate();
|
||||||
|
|
||||||
|
void process(T report);
|
||||||
|
|
||||||
|
void cleanup();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Class defining Chill mode exit criteria for Containers.
|
||||||
|
*/
|
||||||
|
public class ContainerChillModeRule implements
|
||||||
|
ChillModeExitRule<NodeRegistrationContainerReport> {
|
||||||
|
|
||||||
|
// Required cutoff % for containers with at least 1 reported replica.
|
||||||
|
private double chillModeCutoff;
|
||||||
|
// Containers read from scm db.
|
||||||
|
private Map<Long, ContainerInfo> containerMap;
|
||||||
|
private double maxContainer;
|
||||||
|
|
||||||
|
public ContainerChillModeRule(Configuration conf,
|
||||||
|
List<ContainerInfo> containers) {
|
||||||
|
chillModeCutoff = conf
|
||||||
|
.getDouble(HddsConfigKeys.HDDS_SCM_CHILLMODE_THRESHOLD_PCT,
|
||||||
|
HddsConfigKeys.HDDS_SCM_CHILLMODE_THRESHOLD_PCT_DEFAULT);
|
||||||
|
containerMap = new ConcurrentHashMap<>();
|
||||||
|
if(containers != null) {
|
||||||
|
containers.forEach(c -> {
|
||||||
|
if (c != null) {
|
||||||
|
containerMap.put(c.getContainerID(), c);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
maxContainer = containers.size();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean validate() {
|
||||||
|
if (maxContainer == 0) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return getCurrentContainerThreshold() >= chillModeCutoff;
|
||||||
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
public double getCurrentContainerThreshold() {
|
||||||
|
return (containerWithMinReplicas.doubleValue() / maxContainer);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void process(NodeRegistrationContainerReport reportsProto) {
|
||||||
|
if (maxContainer == 0) {
|
||||||
|
// No container to check.
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
reportsProto.getReport().getReportsList().forEach(c -> {
|
||||||
|
if (containerMap.containsKey(c.getContainerID())) {
|
||||||
|
if(containerMap.remove(c.getContainerID()) != null) {
|
||||||
|
containerWithMinReplicas.getAndAdd(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
LOG.info("SCM in chill mode. {} % containers have at least one reported "
|
||||||
|
+ "replica.", (containerWithMinReplicas.get() / maxContainer) * 100);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void cleanup() {
|
||||||
|
containerMap.clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
public static Logger getLogger() {
|
||||||
|
return LOG;
|
||||||
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
public double getCurrentContainerThreshold() {
|
||||||
|
return ((ContainerChillModeRule) exitRules.get(CONT_EXIT_RULE))
|
||||||
|
.getCurrentContainerThreshold();
|
||||||
|
}
|
||||||
|
}
|
|
@ -73,6 +73,8 @@ import static org.apache.hadoop.hdds.protocol.proto
|
||||||
|
|
||||||
|
|
||||||
import org.apache.hadoop.hdds.scm.HddsServerUtil;
|
import org.apache.hadoop.hdds.scm.HddsServerUtil;
|
||||||
|
import org.apache.hadoop.hdds.scm.events.SCMEvents;
|
||||||
|
import org.apache.hadoop.hdds.scm.server.SCMDatanodeHeartbeatDispatcher.ReportFromDatanode;
|
||||||
import org.apache.hadoop.hdds.server.events.EventPublisher;
|
import org.apache.hadoop.hdds.server.events.EventPublisher;
|
||||||
import org.apache.hadoop.io.IOUtils;
|
import org.apache.hadoop.io.IOUtils;
|
||||||
import org.apache.hadoop.ipc.ProtobufRpcEngine;
|
import org.apache.hadoop.ipc.ProtobufRpcEngine;
|
||||||
|
@ -120,6 +122,7 @@ public class SCMDatanodeProtocolServer implements
|
||||||
private final StorageContainerManager scm;
|
private final StorageContainerManager scm;
|
||||||
private final InetSocketAddress datanodeRpcAddress;
|
private final InetSocketAddress datanodeRpcAddress;
|
||||||
private final SCMDatanodeHeartbeatDispatcher heartbeatDispatcher;
|
private final SCMDatanodeHeartbeatDispatcher heartbeatDispatcher;
|
||||||
|
private final EventPublisher eventPublisher;
|
||||||
|
|
||||||
public SCMDatanodeProtocolServer(final OzoneConfiguration conf,
|
public SCMDatanodeProtocolServer(final OzoneConfiguration conf,
|
||||||
StorageContainerManager scm, EventPublisher eventPublisher)
|
StorageContainerManager scm, EventPublisher eventPublisher)
|
||||||
|
@ -129,6 +132,7 @@ public class SCMDatanodeProtocolServer implements
|
||||||
Preconditions.checkNotNull(eventPublisher, "EventPublisher cannot be null");
|
Preconditions.checkNotNull(eventPublisher, "EventPublisher cannot be null");
|
||||||
|
|
||||||
this.scm = scm;
|
this.scm = scm;
|
||||||
|
this.eventPublisher = eventPublisher;
|
||||||
final int handlerCount =
|
final int handlerCount =
|
||||||
conf.getInt(OZONE_SCM_HANDLER_COUNT_KEY,
|
conf.getInt(OZONE_SCM_HANDLER_COUNT_KEY,
|
||||||
OZONE_SCM_HANDLER_COUNT_DEFAULT);
|
OZONE_SCM_HANDLER_COUNT_DEFAULT);
|
||||||
|
@ -197,6 +201,9 @@ public class SCMDatanodeProtocolServer implements
|
||||||
== SCMRegisteredResponseProto.ErrorCode.success) {
|
== SCMRegisteredResponseProto.ErrorCode.success) {
|
||||||
scm.getScmContainerManager().processContainerReports(datanodeDetails,
|
scm.getScmContainerManager().processContainerReports(datanodeDetails,
|
||||||
containerReportsProto, true);
|
containerReportsProto, true);
|
||||||
|
eventPublisher.fireEvent(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
|
||||||
|
new NodeRegistrationContainerReport(datanodeDetails,
|
||||||
|
containerReportsProto));
|
||||||
}
|
}
|
||||||
return getRegisteredResponse(registeredCommand);
|
return getRegisteredResponse(registeredCommand);
|
||||||
}
|
}
|
||||||
|
@ -305,4 +312,16 @@ public class SCMDatanodeProtocolServer implements
|
||||||
IOUtils.cleanupWithLogger(LOG, scm.getScmNodeManager());
|
IOUtils.cleanupWithLogger(LOG, scm.getScmNodeManager());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wrapper class for events with the datanode origin.
|
||||||
|
*/
|
||||||
|
public static class NodeRegistrationContainerReport extends
|
||||||
|
ReportFromDatanode<ContainerReportsProto> {
|
||||||
|
|
||||||
|
public NodeRegistrationContainerReport(DatanodeDetails datanodeDetails,
|
||||||
|
ContainerReportsProto report) {
|
||||||
|
super(datanodeDetails, report);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -176,6 +176,7 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl
|
||||||
private final LeaseManager<Long> commandWatcherLeaseManager;
|
private final LeaseManager<Long> commandWatcherLeaseManager;
|
||||||
|
|
||||||
private final ReplicationActivityStatus replicationStatus;
|
private final ReplicationActivityStatus replicationStatus;
|
||||||
|
private final SCMChillModeManager scmChillModeManager;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a new StorageContainerManager. Configuration will be updated
|
* Creates a new StorageContainerManager. Configuration will be updated
|
||||||
|
@ -231,7 +232,8 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl
|
||||||
ContainerReportHandler containerReportHandler =
|
ContainerReportHandler containerReportHandler =
|
||||||
new ContainerReportHandler(scmContainerManager, node2ContainerMap,
|
new ContainerReportHandler(scmContainerManager, node2ContainerMap,
|
||||||
replicationStatus);
|
replicationStatus);
|
||||||
|
scmChillModeManager = new SCMChillModeManager(conf,
|
||||||
|
getScmContainerManager().getStateManager().getAllContainers());
|
||||||
PipelineActionEventHandler pipelineActionEventHandler =
|
PipelineActionEventHandler pipelineActionEventHandler =
|
||||||
new PipelineActionEventHandler();
|
new PipelineActionEventHandler();
|
||||||
|
|
||||||
|
@ -253,6 +255,8 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl
|
||||||
eventQueue.addHandler(SCMEvents.PIPELINE_ACTIONS,
|
eventQueue.addHandler(SCMEvents.PIPELINE_ACTIONS,
|
||||||
pipelineActionEventHandler);
|
pipelineActionEventHandler);
|
||||||
eventQueue.addHandler(SCMEvents.PIPELINE_CLOSE, pipelineCloseHandler);
|
eventQueue.addHandler(SCMEvents.PIPELINE_CLOSE, pipelineCloseHandler);
|
||||||
|
eventQueue.addHandler(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
|
||||||
|
scmChillModeManager);
|
||||||
|
|
||||||
long watcherTimeout =
|
long watcherTimeout =
|
||||||
conf.getTimeDuration(ScmConfigKeys.HDDS_SCM_WATCHER_TIMEOUT,
|
conf.getTimeDuration(ScmConfigKeys.HDDS_SCM_WATCHER_TIMEOUT,
|
||||||
|
@ -619,9 +623,9 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl
|
||||||
"server", getDatanodeProtocolServer().getDatanodeRpcAddress()));
|
"server", getDatanodeProtocolServer().getDatanodeRpcAddress()));
|
||||||
getDatanodeProtocolServer().start();
|
getDatanodeProtocolServer().start();
|
||||||
|
|
||||||
replicationStatus.start();
|
|
||||||
httpServer.start();
|
httpServer.start();
|
||||||
scmBlockManager.start();
|
scmBlockManager.start();
|
||||||
|
replicationStatus.start();
|
||||||
replicationManager.start();
|
replicationManager.start();
|
||||||
setStartTime();
|
setStartTime();
|
||||||
}
|
}
|
||||||
|
@ -809,6 +813,15 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl
|
||||||
return id2StatMap;
|
return id2StatMap;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean isInChillMode() {
|
||||||
|
return scmChillModeManager.getInChillMode();
|
||||||
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
public double getCurrentContainerThreshold() {
|
||||||
|
return scmChillModeManager.getCurrentContainerThreshold();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Startup options.
|
* Startup options.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -0,0 +1,85 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hdds.scm;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import org.apache.commons.lang3.RandomUtils;
|
||||||
|
import org.apache.hadoop.hdds.protocol.proto
|
||||||
|
.StorageContainerDatanodeProtocolProtos;
|
||||||
|
import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerInfo;
|
||||||
|
import org.apache.hadoop.hdds.scm.server.SCMDatanodeProtocolServer
|
||||||
|
.NodeRegistrationContainerReport;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Stateless helper functions for Hdds tests.
|
||||||
|
*/
|
||||||
|
public final class HddsTestUtils {
|
||||||
|
|
||||||
|
private HddsTestUtils() {
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create Command Status report object.
|
||||||
|
*
|
||||||
|
* @param numOfContainers number of containers to be included in report.
|
||||||
|
* @return CommandStatusReportsProto
|
||||||
|
*/
|
||||||
|
public static NodeRegistrationContainerReport
|
||||||
|
createNodeRegistrationContainerReport(int numOfContainers) {
|
||||||
|
return new NodeRegistrationContainerReport(
|
||||||
|
TestUtils.randomDatanodeDetails(),
|
||||||
|
TestUtils.getRandomContainerReports(numOfContainers));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create NodeRegistrationContainerReport object.
|
||||||
|
*
|
||||||
|
* @param dnContainers List of containers to be included in report
|
||||||
|
* @return NodeRegistrationContainerReport
|
||||||
|
*/
|
||||||
|
public static NodeRegistrationContainerReport
|
||||||
|
createNodeRegistrationContainerReport(List<ContainerInfo> dnContainers) {
|
||||||
|
List<StorageContainerDatanodeProtocolProtos.ContainerInfo>
|
||||||
|
containers = new ArrayList<>();
|
||||||
|
dnContainers.forEach(c -> {
|
||||||
|
containers.add(TestUtils.getRandomContainerInfo(c.getContainerID()));
|
||||||
|
});
|
||||||
|
return new NodeRegistrationContainerReport(
|
||||||
|
TestUtils.randomDatanodeDetails(),
|
||||||
|
TestUtils.getContainerReports(containers));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates list of ContainerInfo.
|
||||||
|
*
|
||||||
|
* @param numContainers number of ContainerInfo to be included in list.
|
||||||
|
* @return List<ContainerInfo>
|
||||||
|
*/
|
||||||
|
public static List<ContainerInfo> getContainerInfo(int numContainers) {
|
||||||
|
List<ContainerInfo> containerInfoList = new ArrayList<>();
|
||||||
|
for (int i = 0; i < numContainers; i++) {
|
||||||
|
ContainerInfo.Builder builder = new ContainerInfo.Builder();
|
||||||
|
containerInfoList.add(builder
|
||||||
|
.setContainerID(RandomUtils.nextLong())
|
||||||
|
.build());
|
||||||
|
}
|
||||||
|
return containerInfoList;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,115 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hdds.scm.server;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertTrue;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
|
||||||
|
import org.apache.hadoop.hdds.scm.HddsTestUtils;
|
||||||
|
import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerInfo;
|
||||||
|
import org.apache.hadoop.hdds.scm.events.SCMEvents;
|
||||||
|
import org.apache.hadoop.hdds.server.events.EventQueue;
|
||||||
|
import org.apache.hadoop.test.GenericTestUtils;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
import org.junit.Rule;
|
||||||
|
import org.junit.Test;
|
||||||
|
import org.junit.rules.Timeout;
|
||||||
|
|
||||||
|
/** Test class for SCMChillModeManager.
|
||||||
|
*/
|
||||||
|
public class TestSCMChillModeManager {
|
||||||
|
|
||||||
|
private static EventQueue queue;
|
||||||
|
private SCMChillModeManager scmChillModeManager;
|
||||||
|
private static Configuration config;
|
||||||
|
private List<ContainerInfo> containers;
|
||||||
|
|
||||||
|
@Rule
|
||||||
|
public Timeout timeout = new Timeout(1000 * 20);
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public static void setUp() {
|
||||||
|
queue = new EventQueue();
|
||||||
|
config = new OzoneConfiguration();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testChillModeState() throws Exception {
|
||||||
|
// Test 1: test for 0 containers
|
||||||
|
testChillMode(0);
|
||||||
|
|
||||||
|
// Test 2: test for 20 containers
|
||||||
|
testChillMode(20);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testChillModeStateWithNullContainers() {
|
||||||
|
new SCMChillModeManager(config, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testChillMode(int numContainers) throws Exception {
|
||||||
|
containers = new ArrayList<>();
|
||||||
|
containers.addAll(HddsTestUtils.getContainerInfo(numContainers));
|
||||||
|
scmChillModeManager = new SCMChillModeManager(config, containers);
|
||||||
|
queue.addHandler(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
|
||||||
|
scmChillModeManager);
|
||||||
|
assertTrue(scmChillModeManager.getInChillMode());
|
||||||
|
queue.fireEvent(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
|
||||||
|
HddsTestUtils.createNodeRegistrationContainerReport(containers));
|
||||||
|
GenericTestUtils.waitFor(() -> {
|
||||||
|
return !scmChillModeManager.getInChillMode();
|
||||||
|
}, 100, 1000 * 5);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testChillModeExitRule() throws Exception {
|
||||||
|
containers = new ArrayList<>();
|
||||||
|
containers.addAll(HddsTestUtils.getContainerInfo(25 * 4));
|
||||||
|
scmChillModeManager = new SCMChillModeManager(config, containers);
|
||||||
|
queue.addHandler(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
|
||||||
|
scmChillModeManager);
|
||||||
|
assertTrue(scmChillModeManager.getInChillMode());
|
||||||
|
|
||||||
|
testContainerThreshold(containers.subList(0, 25), 0.25);
|
||||||
|
assertTrue(scmChillModeManager.getInChillMode());
|
||||||
|
testContainerThreshold(containers.subList(25, 50), 0.50);
|
||||||
|
assertTrue(scmChillModeManager.getInChillMode());
|
||||||
|
testContainerThreshold(containers.subList(50, 75), 0.75);
|
||||||
|
assertTrue(scmChillModeManager.getInChillMode());
|
||||||
|
testContainerThreshold(containers.subList(75, 100), 1.0);
|
||||||
|
|
||||||
|
GenericTestUtils.waitFor(() -> {
|
||||||
|
return !scmChillModeManager.getInChillMode();
|
||||||
|
}, 100, 1000 * 5);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testContainerThreshold(List<ContainerInfo> dnContainers,
|
||||||
|
double expectedThreshold)
|
||||||
|
throws Exception {
|
||||||
|
queue.fireEvent(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
|
||||||
|
HddsTestUtils.createNodeRegistrationContainerReport(dnContainers));
|
||||||
|
GenericTestUtils.waitFor(() -> {
|
||||||
|
double threshold = scmChillModeManager.getCurrentContainerThreshold();
|
||||||
|
return threshold == expectedThreshold;
|
||||||
|
}, 100, 2000 * 9);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -178,10 +178,25 @@ public interface MiniOzoneCluster {
|
||||||
void shutdownHddsDatanode(DatanodeDetails dn) throws IOException;
|
void shutdownHddsDatanode(DatanodeDetails dn) throws IOException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Shutdown the MiniOzoneCluster.
|
* Shutdown the MiniOzoneCluster and delete the storage dirs.
|
||||||
*/
|
*/
|
||||||
void shutdown();
|
void shutdown();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Stop the MiniOzoneCluster without any cleanup.
|
||||||
|
*/
|
||||||
|
void stop();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Start Scm.
|
||||||
|
*/
|
||||||
|
void startScm() throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Start DataNodes.
|
||||||
|
*/
|
||||||
|
void startHddsDatanodes();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Builder class for MiniOzoneCluster.
|
* Builder class for MiniOzoneCluster.
|
||||||
*/
|
*/
|
||||||
|
@ -209,6 +224,7 @@ public interface MiniOzoneCluster {
|
||||||
protected int numOfOmHandlers = 20;
|
protected int numOfOmHandlers = 20;
|
||||||
protected int numOfScmHandlers = 20;
|
protected int numOfScmHandlers = 20;
|
||||||
protected int numOfDatanodes = 1;
|
protected int numOfDatanodes = 1;
|
||||||
|
protected boolean startDataNodes = true;
|
||||||
|
|
||||||
protected Builder(OzoneConfiguration conf) {
|
protected Builder(OzoneConfiguration conf) {
|
||||||
this.conf = conf;
|
this.conf = conf;
|
||||||
|
@ -229,6 +245,11 @@ public interface MiniOzoneCluster {
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Builder setStartDataNodes(boolean startDataNodes) {
|
||||||
|
this.startDataNodes = startDataNodes;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sets the SCM id.
|
* Sets the SCM id.
|
||||||
*
|
*
|
||||||
|
|
|
@ -34,6 +34,7 @@ import org.apache.hadoop.net.NetUtils;
|
||||||
import org.apache.hadoop.ozone.client.OzoneClient;
|
import org.apache.hadoop.ozone.client.OzoneClient;
|
||||||
import org.apache.hadoop.ozone.client.OzoneClientFactory;
|
import org.apache.hadoop.ozone.client.OzoneClientFactory;
|
||||||
import org.apache.hadoop.ozone.client.rest.OzoneException;
|
import org.apache.hadoop.ozone.client.rest.OzoneException;
|
||||||
|
import org.apache.hadoop.ozone.common.Storage.StorageState;
|
||||||
import org.apache.hadoop.ozone.om.OMConfigKeys;
|
import org.apache.hadoop.ozone.om.OMConfigKeys;
|
||||||
import org.apache.hadoop.ozone.om.OzoneManager;
|
import org.apache.hadoop.ozone.om.OzoneManager;
|
||||||
import org.apache.hadoop.hdds.scm.server.SCMStorage;
|
import org.apache.hadoop.hdds.scm.server.SCMStorage;
|
||||||
|
@ -276,32 +277,54 @@ public final class MiniOzoneClusterImpl implements MiniOzoneCluster {
|
||||||
File baseDir = new File(GenericTestUtils.getTempPath(
|
File baseDir = new File(GenericTestUtils.getTempPath(
|
||||||
MiniOzoneClusterImpl.class.getSimpleName() + "-" +
|
MiniOzoneClusterImpl.class.getSimpleName() + "-" +
|
||||||
scm.getClientProtocolServer().getScmInfo().getClusterId()));
|
scm.getClientProtocolServer().getScmInfo().getClusterId()));
|
||||||
|
stop();
|
||||||
FileUtils.deleteDirectory(baseDir);
|
FileUtils.deleteDirectory(baseDir);
|
||||||
|
|
||||||
if (ozoneManager != null) {
|
|
||||||
LOG.info("Shutting down the OzoneManager");
|
|
||||||
ozoneManager.stop();
|
|
||||||
ozoneManager.join();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (scm != null) {
|
|
||||||
LOG.info("Shutting down the StorageContainerManager");
|
|
||||||
scm.stop();
|
|
||||||
scm.join();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!hddsDatanodes.isEmpty()) {
|
|
||||||
LOG.info("Shutting down the HddsDatanodes");
|
|
||||||
for (HddsDatanodeService hddsDatanode : hddsDatanodes) {
|
|
||||||
hddsDatanode.stop();
|
|
||||||
hddsDatanode.join();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
LOG.error("Exception while shutting down the cluster.", e);
|
LOG.error("Exception while shutting down the cluster.", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void stop() {
|
||||||
|
LOG.info("Stopping the Mini Ozone Cluster");
|
||||||
|
if (ozoneManager != null) {
|
||||||
|
LOG.info("Stopping the OzoneManager");
|
||||||
|
ozoneManager.stop();
|
||||||
|
ozoneManager.join();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (scm != null) {
|
||||||
|
LOG.info("Stopping the StorageContainerManager");
|
||||||
|
scm.stop();
|
||||||
|
scm.join();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!hddsDatanodes.isEmpty()) {
|
||||||
|
LOG.info("Shutting the HddsDatanodes");
|
||||||
|
for (HddsDatanodeService hddsDatanode : hddsDatanodes) {
|
||||||
|
hddsDatanode.stop();
|
||||||
|
hddsDatanode.join();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Start Scm.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void startScm() throws IOException {
|
||||||
|
scm.start();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Start DataNodes.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void startHddsDatanodes() {
|
||||||
|
hddsDatanodes.forEach((datanode) -> datanode.start(null));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Builder for configuring the MiniOzoneCluster to run.
|
* Builder for configuring the MiniOzoneCluster to run.
|
||||||
*/
|
*/
|
||||||
|
@ -324,9 +347,13 @@ public final class MiniOzoneClusterImpl implements MiniOzoneCluster {
|
||||||
scm.start();
|
scm.start();
|
||||||
OzoneManager om = createOM();
|
OzoneManager om = createOM();
|
||||||
om.start();
|
om.start();
|
||||||
List<HddsDatanodeService> hddsDatanodes = createHddsDatanodes(scm);
|
final List<HddsDatanodeService> hddsDatanodes = createHddsDatanodes(scm);
|
||||||
hddsDatanodes.forEach((datanode) -> datanode.start(null));
|
MiniOzoneClusterImpl cluster = new MiniOzoneClusterImpl(conf, om, scm,
|
||||||
return new MiniOzoneClusterImpl(conf, om, scm, hddsDatanodes);
|
hddsDatanodes);
|
||||||
|
if (startDataNodes) {
|
||||||
|
cluster.startHddsDatanodes();
|
||||||
|
}
|
||||||
|
return cluster;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -352,13 +379,30 @@ public final class MiniOzoneClusterImpl implements MiniOzoneCluster {
|
||||||
private StorageContainerManager createSCM() throws IOException {
|
private StorageContainerManager createSCM() throws IOException {
|
||||||
configureSCM();
|
configureSCM();
|
||||||
SCMStorage scmStore = new SCMStorage(conf);
|
SCMStorage scmStore = new SCMStorage(conf);
|
||||||
|
initializeScmStorage(scmStore);
|
||||||
|
return StorageContainerManager.createSCM(null, conf);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void initializeScmStorage(SCMStorage scmStore) throws IOException {
|
||||||
|
if (scmStore.getState() == StorageState.INITIALIZED) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
scmStore.setClusterId(clusterId);
|
scmStore.setClusterId(clusterId);
|
||||||
if (!scmId.isPresent()) {
|
if (!scmId.isPresent()) {
|
||||||
scmId = Optional.of(UUID.randomUUID().toString());
|
scmId = Optional.of(UUID.randomUUID().toString());
|
||||||
}
|
}
|
||||||
scmStore.setScmId(scmId.get());
|
scmStore.setScmId(scmId.get());
|
||||||
scmStore.initialize();
|
scmStore.initialize();
|
||||||
return StorageContainerManager.createSCM(null, conf);
|
}
|
||||||
|
|
||||||
|
private void initializeOmStorage(OMStorage omStorage) throws IOException{
|
||||||
|
if (omStorage.getState() == StorageState.INITIALIZED) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
omStorage.setClusterId(clusterId);
|
||||||
|
omStorage.setScmId(scmId.get());
|
||||||
|
omStorage.setOmId(omId.orElse(UUID.randomUUID().toString()));
|
||||||
|
omStorage.initialize();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -371,10 +415,7 @@ public final class MiniOzoneClusterImpl implements MiniOzoneCluster {
|
||||||
private OzoneManager createOM() throws IOException {
|
private OzoneManager createOM() throws IOException {
|
||||||
configureOM();
|
configureOM();
|
||||||
OMStorage omStore = new OMStorage(conf);
|
OMStorage omStore = new OMStorage(conf);
|
||||||
omStore.setClusterId(clusterId);
|
initializeOmStorage(omStore);
|
||||||
omStore.setScmId(scmId.get());
|
|
||||||
omStore.setOmId(omId.orElse(UUID.randomUUID().toString()));
|
|
||||||
omStore.initialize();
|
|
||||||
return OzoneManager.createOm(null, conf);
|
return OzoneManager.createOm(null, conf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -17,59 +17,63 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.hadoop.ozone;
|
package org.apache.hadoop.ozone;
|
||||||
|
|
||||||
import java.io.IOException;
|
import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_CONTAINER_REPORT_INTERVAL;
|
||||||
|
import static org.junit.Assert.assertFalse;
|
||||||
import org.apache.commons.lang3.RandomUtils;
|
import static org.junit.Assert.assertTrue;
|
||||||
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
|
import static org.junit.Assert.fail;
|
||||||
import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline;
|
|
||||||
import org.apache.hadoop.hdds.scm.server.SCMClientProtocolServer;
|
|
||||||
import org.apache.hadoop.hdds.scm.server.SCMStorage;
|
|
||||||
import org.apache.hadoop.hdds.scm.node.NodeManager;
|
|
||||||
import org.apache.hadoop.ozone.container.ContainerTestHelper;
|
|
||||||
import org.apache.hadoop.ozone.om.helpers.OmKeyInfo;
|
|
||||||
import org.apache.hadoop.ozone.protocol.commands.DeleteBlocksCommand;
|
|
||||||
import org.apache.hadoop.ozone.protocol.commands.SCMCommand;
|
|
||||||
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
|
|
||||||
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeType;
|
|
||||||
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState;
|
|
||||||
import org.apache.hadoop.hdds.protocol.proto
|
|
||||||
.StorageContainerDatanodeProtocolProtos.DeletedBlocksTransaction;
|
|
||||||
import org.apache.hadoop.hdds.protocol.proto
|
|
||||||
.StorageContainerDatanodeProtocolProtos.SCMCommandProto;
|
|
||||||
import org.apache.hadoop.hdds.scm.server.StorageContainerManager;
|
|
||||||
import org.apache.hadoop.hdds.scm.server.StorageContainerManager.StartupOption;
|
|
||||||
import org.apache.hadoop.hdds.scm.block.DeletedBlockLog;
|
|
||||||
import org.apache.hadoop.hdds.scm.block.SCMBlockDeletingService;
|
|
||||||
import org.apache.hadoop.hdds.scm.exceptions.SCMException;
|
|
||||||
import org.apache.hadoop.hdds.scm.XceiverClientManager;
|
|
||||||
import org.apache.hadoop.hdds.scm.ScmInfo;
|
|
||||||
import org.junit.Rule;
|
|
||||||
import org.junit.Assert;
|
|
||||||
import org.junit.Test;
|
|
||||||
import org.junit.rules.ExpectedException;
|
|
||||||
|
|
||||||
import java.nio.file.Path;
|
|
||||||
import java.nio.file.Paths;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.Set;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.UUID;
|
|
||||||
import java.util.concurrent.TimeUnit;
|
|
||||||
|
|
||||||
import com.google.common.collect.Lists;
|
import com.google.common.collect.Lists;
|
||||||
import com.google.common.collect.Maps;
|
import com.google.common.collect.Maps;
|
||||||
import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo;
|
import com.google.common.util.concurrent.AtomicDouble;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.nio.file.Paths;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.UUID;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.concurrent.atomic.AtomicReference;
|
||||||
|
import org.apache.commons.lang3.RandomUtils;
|
||||||
|
import org.apache.hadoop.hdds.HddsConfigKeys;
|
||||||
|
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
|
||||||
|
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
|
||||||
|
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleEvent;
|
||||||
|
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState;
|
||||||
|
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeType;
|
||||||
|
import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.DeletedBlocksTransaction;
|
||||||
|
import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMCommandProto;
|
||||||
import org.apache.hadoop.hdds.scm.ScmConfigKeys;
|
import org.apache.hadoop.hdds.scm.ScmConfigKeys;
|
||||||
|
import org.apache.hadoop.hdds.scm.ScmInfo;
|
||||||
|
import org.apache.hadoop.hdds.scm.XceiverClientManager;
|
||||||
|
import org.apache.hadoop.hdds.scm.block.DeletedBlockLog;
|
||||||
|
import org.apache.hadoop.hdds.scm.block.SCMBlockDeletingService;
|
||||||
|
import org.apache.hadoop.hdds.scm.container.ContainerMapping;
|
||||||
|
import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerInfo;
|
||||||
|
import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline;
|
||||||
|
import org.apache.hadoop.hdds.scm.exceptions.SCMException;
|
||||||
|
import org.apache.hadoop.hdds.scm.node.NodeManager;
|
||||||
|
import org.apache.hadoop.hdds.scm.server.SCMChillModeManager;
|
||||||
|
import org.apache.hadoop.hdds.scm.server.SCMClientProtocolServer;
|
||||||
|
import org.apache.hadoop.hdds.scm.server.SCMStorage;
|
||||||
|
import org.apache.hadoop.hdds.scm.server.StorageContainerManager;
|
||||||
|
import org.apache.hadoop.hdds.scm.server.StorageContainerManager.StartupOption;
|
||||||
|
import org.apache.hadoop.ozone.container.ContainerTestHelper;
|
||||||
|
import org.apache.hadoop.ozone.om.helpers.OmKeyInfo;
|
||||||
|
import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo;
|
||||||
|
import org.apache.hadoop.ozone.protocol.commands.DeleteBlocksCommand;
|
||||||
|
import org.apache.hadoop.ozone.protocol.commands.SCMCommand;
|
||||||
|
import org.apache.hadoop.test.GenericTestUtils;
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.Rule;
|
||||||
|
import org.junit.Test;
|
||||||
|
import org.junit.rules.ExpectedException;
|
||||||
import org.junit.rules.Timeout;
|
import org.junit.rules.Timeout;
|
||||||
import org.mockito.Mockito;
|
import org.mockito.Mockito;
|
||||||
import org.apache.hadoop.test.GenericTestUtils;
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
import static org.apache.hadoop.hdds
|
|
||||||
.HddsConfigKeys.HDDS_CONTAINER_REPORT_INTERVAL;
|
|
||||||
import static org.junit.Assert.fail;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test class that exercises the StorageContainerManager.
|
* Test class that exercises the StorageContainerManager.
|
||||||
|
@ -78,6 +82,8 @@ public class TestStorageContainerManager {
|
||||||
private static XceiverClientManager xceiverClientManager =
|
private static XceiverClientManager xceiverClientManager =
|
||||||
new XceiverClientManager(
|
new XceiverClientManager(
|
||||||
new OzoneConfiguration());
|
new OzoneConfiguration());
|
||||||
|
private static final Logger LOG = LoggerFactory
|
||||||
|
.getLogger(TestStorageContainerManager.class);
|
||||||
/**
|
/**
|
||||||
* Set the timeout for every test.
|
* Set the timeout for every test.
|
||||||
*/
|
*/
|
||||||
|
@ -457,4 +463,92 @@ public class TestStorageContainerManager {
|
||||||
Assert.assertEquals(clusterId, scmInfo.getClusterId());
|
Assert.assertEquals(clusterId, scmInfo.getClusterId());
|
||||||
Assert.assertEquals(scmId, scmInfo.getScmId());
|
Assert.assertEquals(scmId, scmInfo.getScmId());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSCMChillMode() throws Exception {
|
||||||
|
OzoneConfiguration conf = new OzoneConfiguration();
|
||||||
|
MiniOzoneCluster.Builder builder = MiniOzoneCluster.newBuilder(conf)
|
||||||
|
.setHbInterval(1000)
|
||||||
|
.setNumDatanodes(3)
|
||||||
|
.setStartDataNodes(false)
|
||||||
|
.setHbProcessorInterval(500);
|
||||||
|
MiniOzoneClusterImpl cluster = (MiniOzoneClusterImpl) builder.build();
|
||||||
|
// Test1: Test chill mode when there are no containers in system.
|
||||||
|
assertTrue(cluster.getStorageContainerManager().isInChillMode());
|
||||||
|
cluster.startHddsDatanodes();
|
||||||
|
cluster.waitForClusterToBeReady();
|
||||||
|
assertFalse(cluster.getStorageContainerManager().isInChillMode());
|
||||||
|
|
||||||
|
// Test2: Test chill mode when containers are there in system.
|
||||||
|
// Create {numKeys} random names keys.
|
||||||
|
TestStorageContainerManagerHelper helper =
|
||||||
|
new TestStorageContainerManagerHelper(cluster, conf);
|
||||||
|
Map<String, OmKeyInfo> keyLocations = helper.createKeys(100*2, 4096);
|
||||||
|
final List<ContainerInfo> containers = cluster.getStorageContainerManager()
|
||||||
|
.getScmContainerManager().getStateManager().getAllContainers();
|
||||||
|
GenericTestUtils.waitFor(() -> {
|
||||||
|
return containers.size() > 10;
|
||||||
|
}, 100, 1000);
|
||||||
|
|
||||||
|
// Removing some container to keep them open.
|
||||||
|
containers.remove(0);
|
||||||
|
containers.remove(1);
|
||||||
|
containers.remove(2);
|
||||||
|
containers.remove(3);
|
||||||
|
|
||||||
|
// Close remaining containers
|
||||||
|
ContainerMapping mapping = (ContainerMapping) cluster
|
||||||
|
.getStorageContainerManager().getScmContainerManager();
|
||||||
|
containers.forEach(c -> {
|
||||||
|
try {
|
||||||
|
mapping.updateContainerState(c.getContainerID(),
|
||||||
|
HddsProtos.LifeCycleEvent.FINALIZE);
|
||||||
|
mapping.updateContainerState(c.getContainerID(),
|
||||||
|
LifeCycleEvent.CLOSE);
|
||||||
|
} catch (IOException e) {
|
||||||
|
LOG.info("Failed to change state of open containers.", e);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
cluster.stop();
|
||||||
|
|
||||||
|
GenericTestUtils.LogCapturer logCapturer = GenericTestUtils.LogCapturer
|
||||||
|
.captureLogs(SCMChillModeManager.getLogger());
|
||||||
|
logCapturer.clearOutput();
|
||||||
|
AtomicReference<MiniOzoneCluster> miniCluster = new AtomicReference<>();
|
||||||
|
new Thread(() -> {
|
||||||
|
try {
|
||||||
|
miniCluster.set(builder.setStartDataNodes(false).build());
|
||||||
|
} catch (IOException e) {
|
||||||
|
fail("failed");
|
||||||
|
}
|
||||||
|
}).start();
|
||||||
|
|
||||||
|
StorageContainerManager scm;
|
||||||
|
GenericTestUtils.waitFor(() -> {
|
||||||
|
return miniCluster.get() != null;
|
||||||
|
}, 100, 1000 * 3);
|
||||||
|
|
||||||
|
scm = miniCluster.get().getStorageContainerManager();
|
||||||
|
assertTrue(scm.isInChillMode());
|
||||||
|
assertFalse(logCapturer.getOutput().contains("SCM exiting chill mode."));
|
||||||
|
assertTrue(scm.getCurrentContainerThreshold() == 0);
|
||||||
|
AtomicDouble curThreshold = new AtomicDouble();
|
||||||
|
AtomicDouble lastReportedThreshold = new AtomicDouble();
|
||||||
|
for(HddsDatanodeService dn:miniCluster.get().getHddsDatanodes()){
|
||||||
|
dn.start(null);
|
||||||
|
GenericTestUtils.waitFor(() -> {
|
||||||
|
curThreshold.set(scm.getCurrentContainerThreshold());
|
||||||
|
return curThreshold.get() > lastReportedThreshold.get();
|
||||||
|
}, 100, 1000 * 5);
|
||||||
|
lastReportedThreshold.set(curThreshold.get());
|
||||||
|
}
|
||||||
|
double chillModeCutoff = conf
|
||||||
|
.getDouble(HddsConfigKeys.HDDS_SCM_CHILLMODE_THRESHOLD_PCT,
|
||||||
|
HddsConfigKeys.HDDS_SCM_CHILLMODE_THRESHOLD_PCT_DEFAULT);
|
||||||
|
assertTrue(scm.getCurrentContainerThreshold() >= chillModeCutoff);
|
||||||
|
assertTrue(logCapturer.getOutput().contains("SCM exiting chill mode."));
|
||||||
|
assertFalse(scm.isInChillMode());
|
||||||
|
cluster.shutdown();
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue