HDDS-809. Refactor SCMChillModeManager.

This commit is contained in:
Yiqun Lin 2018-11-07 13:53:28 +08:00
parent 482716e5a4
commit addec29297
15 changed files with 447 additions and 326 deletions

View File

@ -19,8 +19,8 @@
package org.apache.hadoop.hdds.scm; package org.apache.hadoop.hdds.scm;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ScmOps; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ScmOps;
import org.apache.hadoop.hdds.scm.chillmode.Precheck;
import org.apache.hadoop.hdds.scm.exceptions.SCMException; import org.apache.hadoop.hdds.scm.exceptions.SCMException;
import org.apache.hadoop.hdds.scm.server.Precheck;
/** /**
* SCM utility class. * SCM utility class.

View File

@ -23,6 +23,7 @@ import org.apache.hadoop.hdds.client.ContainerBlockID;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ScmOps; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ScmOps;
import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.ScmConfigKeys;
import org.apache.hadoop.hdds.scm.ScmUtils; import org.apache.hadoop.hdds.scm.ScmUtils;
import org.apache.hadoop.hdds.scm.chillmode.ChillModePrecheck;
import org.apache.hadoop.hdds.scm.container.ContainerManager; import org.apache.hadoop.hdds.scm.container.ContainerManager;
import org.apache.hadoop.hdds.scm.container.common.helpers.AllocatedBlock; import org.apache.hadoop.hdds.scm.container.common.helpers.AllocatedBlock;
import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline; import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline;
@ -32,7 +33,6 @@ import org.apache.hadoop.hdds.scm.node.NodeManager;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType;
import org.apache.hadoop.hdds.scm.server.ChillModePrecheck;
import org.apache.hadoop.hdds.server.events.EventHandler; import org.apache.hadoop.hdds.server.events.EventHandler;
import org.apache.hadoop.hdds.server.events.EventPublisher; import org.apache.hadoop.hdds.server.events.EventPublisher;
import org.apache.hadoop.metrics2.util.MBeans; import org.apache.hadoop.metrics2.util.MBeans;

View File

@ -0,0 +1,32 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdds.scm.chillmode;
/**
* Interface for defining chill mode exit rules.
*
* @param <T>
*/
public interface ChillModeExitRule<T> {
boolean validate();
void process(T report);
void cleanup();
}

View File

@ -16,7 +16,7 @@
* limitations under the License. * limitations under the License.
*/ */
package org.apache.hadoop.hdds.scm.server; package org.apache.hadoop.hdds.scm.chillmode;
import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
@ -24,7 +24,6 @@ import org.apache.hadoop.hdds.HddsConfigKeys;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ScmOps; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ScmOps;
import org.apache.hadoop.hdds.scm.exceptions.SCMException; import org.apache.hadoop.hdds.scm.exceptions.SCMException;
import org.apache.hadoop.hdds.scm.exceptions.SCMException.ResultCodes; import org.apache.hadoop.hdds.scm.exceptions.SCMException.ResultCodes;
import org.apache.hadoop.hdds.scm.server.SCMChillModeManager.ChillModeRestrictedOps;
/** /**
* Chill mode pre-check for SCM operations. * Chill mode pre-check for SCM operations.

View File

@ -0,0 +1,41 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdds.scm.chillmode;
import java.util.EnumSet;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ScmOps;
/**
* Operations restricted in SCM chill mode.
*/
public final class ChillModeRestrictedOps {
private static EnumSet restrictedOps = EnumSet.noneOf(ScmOps.class);
private ChillModeRestrictedOps() {
}
static {
restrictedOps.add(ScmOps.allocateBlock);
restrictedOps.add(ScmOps.allocateContainer);
}
public static boolean isRestrictedInChillMode(ScmOps opName) {
return restrictedOps.contains(opName);
}
}

View File

@ -0,0 +1,112 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdds.scm.chillmode;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdds.HddsConfigKeys;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import org.apache.hadoop.hdds.scm.container.ContainerInfo;
import org.apache.hadoop.hdds.scm.server.SCMDatanodeProtocolServer.NodeRegistrationContainerReport;
import com.google.common.annotations.VisibleForTesting;
/**
* Class defining Chill mode exit criteria for Containers.
*/
public class ContainerChillModeRule implements
ChillModeExitRule<NodeRegistrationContainerReport> {
// Required cutoff % for containers with at least 1 reported replica.
private double chillModeCutoff;
// Containers read from scm db (excluding containers in ALLOCATED state).
private Map<Long, ContainerInfo> containerMap;
private double maxContainer;
private AtomicLong containerWithMinReplicas = new AtomicLong(0);
private final SCMChillModeManager chillModeManager;
public ContainerChillModeRule(Configuration conf,
List<ContainerInfo> containers, SCMChillModeManager manager) {
chillModeCutoff = conf.getDouble(
HddsConfigKeys.HDDS_SCM_CHILLMODE_THRESHOLD_PCT,
HddsConfigKeys.HDDS_SCM_CHILLMODE_THRESHOLD_PCT_DEFAULT);
chillModeManager = manager;
containerMap = new ConcurrentHashMap<>();
if(containers != null) {
containers.forEach(c -> {
// Containers in ALLOCATED state should not be included while
// calculating the total number of containers here. They are not
// reported by DNs and hence should not affect the chill mode exit
// rule.
if (c != null && c.getState() != null &&
!c.getState().equals(HddsProtos.LifeCycleState.ALLOCATED)) {
containerMap.put(c.getContainerID(), c);
}
});
maxContainer = containerMap.size();
}
}
@Override
public boolean validate() {
if (maxContainer == 0) {
return true;
}
return getCurrentContainerThreshold() >= chillModeCutoff;
}
@VisibleForTesting
public double getCurrentContainerThreshold() {
if (maxContainer == 0) {
return 1;
}
return (containerWithMinReplicas.doubleValue() / maxContainer);
}
@Override
public void process(NodeRegistrationContainerReport reportsProto) {
if (maxContainer == 0) {
// No container to check.
return;
}
reportsProto.getReport().getReportsList().forEach(c -> {
if (containerMap.containsKey(c.getContainerID())) {
if(containerMap.remove(c.getContainerID()) != null) {
containerWithMinReplicas.getAndAdd(1);
}
}
});
if(chillModeManager.getInChillMode()) {
SCMChillModeManager.getLogger().info(
"SCM in chill mode. {} % containers have at least one"
+ " reported replica.",
(containerWithMinReplicas.get() / maxContainer) * 100);
}
}
@Override
public void cleanup() {
containerMap.clear();
}
}

View File

@ -0,0 +1,83 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdds.scm.chillmode;
import java.util.HashSet;
import java.util.UUID;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdds.HddsConfigKeys;
import org.apache.hadoop.hdds.scm.server.SCMDatanodeProtocolServer.NodeRegistrationContainerReport;
import com.google.common.annotations.VisibleForTesting;
/**
* Class defining Chill mode exit criteria according to number of DataNodes
* registered with SCM.
*/
public class DataNodeChillModeRule implements
ChillModeExitRule<NodeRegistrationContainerReport> {
// Min DataNodes required to exit chill mode.
private int requiredDns;
private int registeredDns = 0;
// Set to track registered DataNodes.
private HashSet<UUID> registeredDnSet;
private final SCMChillModeManager chillModeManager;
public DataNodeChillModeRule(Configuration conf,
SCMChillModeManager manager) {
requiredDns = conf.getInt(
HddsConfigKeys.HDDS_SCM_CHILLMODE_MIN_DATANODE,
HddsConfigKeys.HDDS_SCM_CHILLMODE_MIN_DATANODE_DEFAULT);
registeredDnSet = new HashSet<>(requiredDns * 2);
chillModeManager = manager;
}
@Override
public boolean validate() {
return registeredDns >= requiredDns;
}
@VisibleForTesting
public double getRegisteredDataNodes() {
return registeredDns;
}
@Override
public void process(NodeRegistrationContainerReport reportsProto) {
if (requiredDns == 0) {
// No dn check required.
return;
}
if(chillModeManager.getInChillMode()) {
registeredDnSet.add(reportsProto.getDatanodeDetails().getUuid());
registeredDns = registeredDnSet.size();
SCMChillModeManager.getLogger().info(
"SCM in chill mode. {} DataNodes registered, {} required.",
registeredDns, requiredDns);
}
}
@Override
public void cleanup() {
registeredDnSet.clear();
}
}

View File

@ -16,7 +16,7 @@
* limitations under the License. * limitations under the License.
*/ */
package org.apache.hadoop.hdds.scm.server; package org.apache.hadoop.hdds.scm.chillmode;
import org.apache.hadoop.hdds.scm.exceptions.SCMException; import org.apache.hadoop.hdds.scm.exceptions.SCMException;

View File

@ -0,0 +1,153 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdds.scm.chillmode;
import com.google.common.annotations.VisibleForTesting;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdds.HddsConfigKeys;
import org.apache.hadoop.hdds.scm.container.ContainerInfo;
import org.apache.hadoop.hdds.scm.events.SCMEvents;
import org.apache.hadoop.hdds.scm.server.SCMDatanodeProtocolServer
.NodeRegistrationContainerReport;
import org.apache.hadoop.hdds.server.events.EventHandler;
import org.apache.hadoop.hdds.server.events.EventPublisher;
import org.apache.hadoop.hdds.server.events.EventQueue;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* StorageContainerManager enters chill mode on startup to allow system to
* reach a stable state before becoming fully functional. SCM will wait
* for certain resources to be reported before coming out of chill mode.
*
* ChillModeExitRule defines format to define new rules which must be satisfied
* to exit Chill mode.
* ContainerChillModeRule defines the only exit criteria right now.
* On every new datanode registration event this class adds replicas
* for reported containers and validates if cutoff threshold for
* containers is meet.
*/
public class SCMChillModeManager implements
EventHandler<NodeRegistrationContainerReport> {
private static final Logger LOG =
LoggerFactory.getLogger(SCMChillModeManager.class);
private final boolean isChillModeEnabled;
private AtomicBoolean inChillMode = new AtomicBoolean(true);
private Map<String, ChillModeExitRule> exitRules = new HashMap(1);
private Configuration config;
private static final String CONT_EXIT_RULE = "ContainerChillModeRule";
private static final String DN_EXIT_RULE = "DataNodeChillModeRule";
private final EventQueue eventPublisher;
public SCMChillModeManager(Configuration conf,
List<ContainerInfo> allContainers, EventQueue eventQueue) {
this.config = conf;
this.eventPublisher = eventQueue;
this.isChillModeEnabled = conf.getBoolean(
HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED,
HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED_DEFAULT);
if (isChillModeEnabled) {
exitRules.put(CONT_EXIT_RULE,
new ContainerChillModeRule(config, allContainers, this));
exitRules.put(DN_EXIT_RULE, new DataNodeChillModeRule(config, this));
emitChillModeStatus();
} else {
exitChillMode(eventQueue);
}
}
/**
* Emit Chill mode status.
*/
@VisibleForTesting
public void emitChillModeStatus() {
eventPublisher.fireEvent(SCMEvents.CHILL_MODE_STATUS, getInChillMode());
}
private void validateChillModeExitRules(EventPublisher eventQueue) {
for (ChillModeExitRule exitRule : exitRules.values()) {
if (!exitRule.validate()) {
return;
}
}
exitChillMode(eventQueue);
}
/**
* Exit chill mode. It does following actions:
* 1. Set chill mode status to false.
* 2. Emits START_REPLICATION for ReplicationManager.
* 3. Cleanup resources.
* 4. Emit chill mode status.
* @param eventQueue
*/
@VisibleForTesting
public void exitChillMode(EventPublisher eventQueue) {
LOG.info("SCM exiting chill mode.");
setInChillMode(false);
// TODO: Remove handler registration as there is no need to listen to
// register events anymore.
for (ChillModeExitRule e : exitRules.values()) {
e.cleanup();
}
emitChillModeStatus();
}
@Override
public void onMessage(
NodeRegistrationContainerReport nodeRegistrationContainerReport,
EventPublisher publisher) {
if (getInChillMode()) {
exitRules.get(CONT_EXIT_RULE).process(nodeRegistrationContainerReport);
exitRules.get(DN_EXIT_RULE).process(nodeRegistrationContainerReport);
validateChillModeExitRules(publisher);
}
}
public boolean getInChillMode() {
if (!isChillModeEnabled) {
return false;
}
return inChillMode.get();
}
/**
* Set chill mode status.
*/
public void setInChillMode(boolean inChillMode) {
this.inChillMode.set(inChillMode);
}
public static Logger getLogger() {
return LOG;
}
@VisibleForTesting
public double getCurrentContainerThreshold() {
return ((ContainerChillModeRule) exitRules.get(CONT_EXIT_RULE))
.getCurrentContainerThreshold();
}
}

View File

@ -0,0 +1,18 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdds.scm.chillmode;

View File

@ -1,319 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdds.scm.server;
import com.google.common.annotations.VisibleForTesting;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdds.HddsConfigKeys;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ScmOps;
import org.apache.hadoop.hdds.scm.container.ContainerInfo;
import org.apache.hadoop.hdds.scm.events.SCMEvents;
import org.apache.hadoop.hdds.scm.server.SCMDatanodeProtocolServer
.NodeRegistrationContainerReport;
import org.apache.hadoop.hdds.server.events.EventHandler;
import org.apache.hadoop.hdds.server.events.EventPublisher;
import org.apache.hadoop.hdds.server.events.EventQueue;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* StorageContainerManager enters chill mode on startup to allow system to
* reach a stable state before becoming fully functional. SCM will wait
* for certain resources to be reported before coming out of chill mode.
*
* ChillModeExitRule defines format to define new rules which must be satisfied
* to exit Chill mode.
* ContainerChillModeRule defines the only exit criteria right now.
* On every new datanode registration event this class adds replicas
* for reported containers and validates if cutoff threshold for
* containers is meet.
*/
public class SCMChillModeManager implements
EventHandler<NodeRegistrationContainerReport> {
private static final Logger LOG =
LoggerFactory.getLogger(SCMChillModeManager.class);
private final boolean isChillModeEnabled;
private AtomicBoolean inChillMode = new AtomicBoolean(true);
private AtomicLong containerWithMinReplicas = new AtomicLong(0);
private Map<String, ChillModeExitRule> exitRules = new HashMap(1);
private Configuration config;
private static final String CONT_EXIT_RULE = "ContainerChillModeRule";
private static final String DN_EXIT_RULE = "DataNodeChillModeRule";
private final EventQueue eventPublisher;
SCMChillModeManager(Configuration conf, List<ContainerInfo> allContainers,
EventQueue eventQueue) {
this.config = conf;
this.eventPublisher = eventQueue;
this.isChillModeEnabled = conf.getBoolean(
HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED,
HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED_DEFAULT);
if (isChillModeEnabled) {
exitRules.put(CONT_EXIT_RULE,
new ContainerChillModeRule(config, allContainers));
exitRules.put(DN_EXIT_RULE, new DataNodeChillModeRule(config));
emitChillModeStatus();
} else {
exitChillMode(eventQueue);
}
}
/**
* Emit Chill mode status.
*/
@VisibleForTesting
public void emitChillModeStatus() {
eventPublisher.fireEvent(SCMEvents.CHILL_MODE_STATUS, getInChillMode());
}
private void validateChillModeExitRules(EventPublisher eventQueue) {
for (ChillModeExitRule exitRule : exitRules.values()) {
if (!exitRule.validate()) {
return;
}
}
exitChillMode(eventQueue);
}
/**
* Exit chill mode. It does following actions:
* 1. Set chill mode status to false.
* 2. Emits START_REPLICATION for ReplicationManager.
* 3. Cleanup resources.
* 4. Emit chill mode status.
* @param eventQueue
*/
@VisibleForTesting
public void exitChillMode(EventPublisher eventQueue) {
LOG.info("SCM exiting chill mode.");
setInChillMode(false);
// TODO: Remove handler registration as there is no need to listen to
// register events anymore.
for (ChillModeExitRule e : exitRules.values()) {
e.cleanup();
}
emitChillModeStatus();
}
@Override
public void onMessage(
NodeRegistrationContainerReport nodeRegistrationContainerReport,
EventPublisher publisher) {
if (getInChillMode()) {
exitRules.get(CONT_EXIT_RULE).process(nodeRegistrationContainerReport);
exitRules.get(DN_EXIT_RULE).process(nodeRegistrationContainerReport);
validateChillModeExitRules(publisher);
}
}
public boolean getInChillMode() {
if (!isChillModeEnabled) {
return false;
}
return inChillMode.get();
}
/**
* Set chill mode status.
*/
public void setInChillMode(boolean inChillMode) {
this.inChillMode.set(inChillMode);
}
/**
* Interface for defining chill mode exit rules.
*
* @param <T>
*/
public interface ChillModeExitRule<T> {
boolean validate();
void process(T report);
void cleanup();
}
/**
* Class defining Chill mode exit criteria for Containers.
*/
public class ContainerChillModeRule implements
ChillModeExitRule<NodeRegistrationContainerReport> {
// Required cutoff % for containers with at least 1 reported replica.
private double chillModeCutoff;
// Containers read from scm db (excluding containers in ALLOCATED state).
private Map<Long, ContainerInfo> containerMap;
private double maxContainer;
public ContainerChillModeRule(Configuration conf,
List<ContainerInfo> containers) {
chillModeCutoff = conf
.getDouble(HddsConfigKeys.HDDS_SCM_CHILLMODE_THRESHOLD_PCT,
HddsConfigKeys.HDDS_SCM_CHILLMODE_THRESHOLD_PCT_DEFAULT);
containerMap = new ConcurrentHashMap<>();
if(containers != null) {
containers.forEach(c -> {
// Containers in ALLOCATED state should not be included while
// calculating the total number of containers here. They are not
// reported by DNs and hence should not affect the chill mode exit
// rule.
if (c != null && c.getState() != null &&
!c.getState().equals(HddsProtos.LifeCycleState.ALLOCATED)) {
containerMap.put(c.getContainerID(), c);
}
});
maxContainer = containerMap.size();
}
}
@Override
public boolean validate() {
if (maxContainer == 0) {
return true;
}
return getCurrentContainerThreshold() >= chillModeCutoff;
}
@VisibleForTesting
public double getCurrentContainerThreshold() {
if (maxContainer == 0) {
return 1;
}
return (containerWithMinReplicas.doubleValue() / maxContainer);
}
@Override
public void process(NodeRegistrationContainerReport reportsProto) {
if (maxContainer == 0) {
// No container to check.
return;
}
reportsProto.getReport().getReportsList().forEach(c -> {
if (containerMap.containsKey(c.getContainerID())) {
if(containerMap.remove(c.getContainerID()) != null) {
containerWithMinReplicas.getAndAdd(1);
}
}
});
if(getInChillMode()) {
LOG.info("SCM in chill mode. {} % containers have at least one"
+ " reported replica.",
(containerWithMinReplicas.get() / maxContainer) * 100);
}
}
@Override
public void cleanup() {
containerMap.clear();
}
}
/**
* Class defining Chill mode exit criteria according to number of DataNodes
* registered with SCM.
*/
public class DataNodeChillModeRule implements
ChillModeExitRule<NodeRegistrationContainerReport> {
// Min DataNodes required to exit chill mode.
private int requiredDns;
private int registeredDns = 0;
// Set to track registered DataNodes.
private HashSet<UUID> registeredDnSet;
public DataNodeChillModeRule(Configuration conf) {
requiredDns = conf
.getInt(HddsConfigKeys.HDDS_SCM_CHILLMODE_MIN_DATANODE,
HddsConfigKeys.HDDS_SCM_CHILLMODE_MIN_DATANODE_DEFAULT);
registeredDnSet = new HashSet<>(requiredDns * 2);
}
@Override
public boolean validate() {
return registeredDns >= requiredDns;
}
@VisibleForTesting
public double getRegisteredDataNodes() {
return registeredDns;
}
@Override
public void process(NodeRegistrationContainerReport reportsProto) {
if (requiredDns == 0) {
// No dn check required.
return;
}
if(getInChillMode()) {
registeredDnSet.add(reportsProto.getDatanodeDetails().getUuid());
registeredDns = registeredDnSet.size();
LOG.info("SCM in chill mode. {} DataNodes registered, {} required.",
registeredDns, requiredDns);
}
}
@Override
public void cleanup() {
registeredDnSet.clear();
}
}
@VisibleForTesting
public static Logger getLogger() {
return LOG;
}
@VisibleForTesting
public double getCurrentContainerThreshold() {
return ((ContainerChillModeRule) exitRules.get(CONT_EXIT_RULE))
.getCurrentContainerThreshold();
}
/**
* Operations restricted in SCM chill mode.
*/
public static class ChillModeRestrictedOps {
private static EnumSet restrictedOps = EnumSet.noneOf(ScmOps.class);
static {
restrictedOps.add(ScmOps.allocateBlock);
restrictedOps.add(ScmOps.allocateContainer);
}
public static boolean isRestrictedInChillMode(ScmOps opName) {
return restrictedOps.contains(opName);
}
}
}

View File

@ -33,6 +33,7 @@ import org.apache.hadoop.hdds.protocol.proto
import org.apache.hadoop.hdds.scm.HddsServerUtil; import org.apache.hadoop.hdds.scm.HddsServerUtil;
import org.apache.hadoop.hdds.scm.ScmInfo; import org.apache.hadoop.hdds.scm.ScmInfo;
import org.apache.hadoop.hdds.scm.ScmUtils; import org.apache.hadoop.hdds.scm.ScmUtils;
import org.apache.hadoop.hdds.scm.chillmode.ChillModePrecheck;
import org.apache.hadoop.hdds.scm.container.ContainerID; import org.apache.hadoop.hdds.scm.container.ContainerID;
import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException; import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException;
import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline; import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline;

View File

@ -36,6 +36,7 @@ import org.apache.hadoop.hdds.scm.block.BlockManager;
import org.apache.hadoop.hdds.scm.block.BlockManagerImpl; import org.apache.hadoop.hdds.scm.block.BlockManagerImpl;
import org.apache.hadoop.hdds.scm.block.DeletedBlockLogImpl; import org.apache.hadoop.hdds.scm.block.DeletedBlockLogImpl;
import org.apache.hadoop.hdds.scm.block.PendingDeleteHandler; import org.apache.hadoop.hdds.scm.block.PendingDeleteHandler;
import org.apache.hadoop.hdds.scm.chillmode.SCMChillModeManager;
import org.apache.hadoop.hdds.scm.command.CommandStatusReportHandler; import org.apache.hadoop.hdds.scm.command.CommandStatusReportHandler;
import org.apache.hadoop.hdds.scm.container.CloseContainerEventHandler; import org.apache.hadoop.hdds.scm.container.CloseContainerEventHandler;
import org.apache.hadoop.hdds.scm.container.CloseContainerWatcher; import org.apache.hadoop.hdds.scm.container.CloseContainerWatcher;

View File

@ -15,7 +15,7 @@
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
package org.apache.hadoop.hdds.scm.server; package org.apache.hadoop.hdds.scm.chillmode;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;

View File

@ -30,11 +30,11 @@ import org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleEvent;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType;
import org.apache.hadoop.hdds.scm.container.SCMContainerManager; import org.apache.hadoop.hdds.scm.container.SCMContainerManager;
import org.apache.hadoop.hdds.scm.chillmode.SCMChillModeManager;
import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.container.ContainerInfo;
import org.apache.hadoop.hdds.scm.events.SCMEvents; import org.apache.hadoop.hdds.scm.events.SCMEvents;
import org.apache.hadoop.hdds.scm.exceptions.SCMException; import org.apache.hadoop.hdds.scm.exceptions.SCMException;
import org.apache.hadoop.hdds.scm.protocolPB.StorageContainerLocationProtocolClientSideTranslatorPB; import org.apache.hadoop.hdds.scm.protocolPB.StorageContainerLocationProtocolClientSideTranslatorPB;
import org.apache.hadoop.hdds.scm.server.SCMChillModeManager;
import org.apache.hadoop.hdds.scm.server.SCMClientProtocolServer; import org.apache.hadoop.hdds.scm.server.SCMClientProtocolServer;
import org.apache.hadoop.hdds.scm.server.StorageContainerManager; import org.apache.hadoop.hdds.scm.server.StorageContainerManager;
import org.apache.hadoop.ozone.HddsDatanodeService; import org.apache.hadoop.ozone.HddsDatanodeService;