HDFS-11081. Ozone:SCM: Add support for registerNode in datanode. Contributed by Anu Engineer.
This commit is contained in:
parent
2b8b3754f6
commit
8bd85268e6
|
@ -913,7 +913,7 @@ public class DataNode extends ReconfigurableBase
|
||||||
* @throws UnknownHostException if the dfs.datanode.dns.interface
|
* @throws UnknownHostException if the dfs.datanode.dns.interface
|
||||||
* option is used and the hostname can not be determined
|
* option is used and the hostname can not be determined
|
||||||
*/
|
*/
|
||||||
private static String getHostName(Configuration config)
|
public static String getHostName(Configuration config)
|
||||||
throws UnknownHostException {
|
throws UnknownHostException {
|
||||||
String name = config.get(DFS_DATANODE_HOST_NAME_KEY);
|
String name = config.get(DFS_DATANODE_HOST_NAME_KEY);
|
||||||
if (name == null) {
|
if (name == null) {
|
||||||
|
|
|
@ -1,43 +1,61 @@
|
||||||
/**
|
/**
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* contributor license agreements. See the NOTICE file distributed with this
|
||||||
* distributed with this work for additional information
|
* work for additional information regarding copyright ownership. The ASF
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
* licenses this file to you under the Apache License, Version 2.0 (the
|
||||||
* to you under the Apache License, Version 2.0 (the
|
* "License"); you may not use this file except in compliance with the License.
|
||||||
* "License"); you may not use this file except in compliance
|
* You may obtain a copy of the License at
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
* <p>
|
* <p>
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
* <p>
|
* <p>
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
* See the License for the specific language governing permissions and
|
* License for the specific language governing permissions and limitations under
|
||||||
* limitations under the License.
|
* the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.apache.hadoop.ozone;
|
package org.apache.hadoop.ozone;
|
||||||
|
|
||||||
import com.google.common.base.Optional;
|
import com.google.common.base.Optional;
|
||||||
|
|
||||||
|
import com.google.common.net.HostAndPort;
|
||||||
import org.apache.hadoop.classification.InterfaceAudience;
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
import org.apache.hadoop.classification.InterfaceStability;
|
import org.apache.hadoop.classification.InterfaceStability;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.net.NetUtils;
|
import org.apache.hadoop.net.NetUtils;
|
||||||
|
import org.apache.hadoop.scm.ScmConfigKeys;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import java.net.InetSocketAddress;
|
import java.net.InetSocketAddress;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
import static org.apache.hadoop.ozone.OzoneConfigKeys.*;
|
import static org.apache.hadoop.ozone.OzoneConfigKeys.*;
|
||||||
import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_SCM_DEADNODE_INTERVAL_DEFAULT;
|
import static org.apache.hadoop.ozone.OzoneConfigKeys
|
||||||
import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_SCM_DEADNODE_INTERVAL_MS;
|
.OZONE_SCM_DEADNODE_INTERVAL_DEFAULT;
|
||||||
import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_SCM_HEARTBEAT_INTERVAL_SECONDS;
|
import static org.apache.hadoop.ozone.OzoneConfigKeys
|
||||||
import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL_MS;
|
.OZONE_SCM_DEADNODE_INTERVAL_MS;
|
||||||
import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_SCM_STALENODE_INTERVAL_DEFAULT;
|
import static org.apache.hadoop.ozone.OzoneConfigKeys
|
||||||
import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_SCM_STALENODE_INTERVAL_MS;
|
.OZONE_SCM_HEARTBEAT_INTERVAL_SECONDS;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.ozone.OzoneConfigKeys
|
||||||
|
.OZONE_SCM_HEARTBEAT_LOG_WARN_DEFAULT;
|
||||||
|
import static org.apache.hadoop.ozone.OzoneConfigKeys
|
||||||
|
.OZONE_SCM_HEARTBEAT_LOG_WARN_INTERVAL_COUNT;
|
||||||
|
import static org.apache.hadoop.ozone.OzoneConfigKeys
|
||||||
|
.OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL_MS;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.ozone.OzoneConfigKeys
|
||||||
|
.OZONE_SCM_HEARTBEAT_RPC_TIMEOUT;
|
||||||
|
import static org.apache.hadoop.ozone.OzoneConfigKeys
|
||||||
|
.OZONE_SCM_HEARTBEAT_RPC_TIMEOUT_DEFAULT;
|
||||||
|
import static org.apache.hadoop.ozone.OzoneConfigKeys
|
||||||
|
.OZONE_SCM_STALENODE_INTERVAL_DEFAULT;
|
||||||
|
import static org.apache.hadoop.ozone.OzoneConfigKeys
|
||||||
|
.OZONE_SCM_STALENODE_INTERVAL_MS;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Utility methods for Ozone and Container Clients.
|
* Utility methods for Ozone and Container Clients.
|
||||||
|
@ -51,6 +69,7 @@ import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_SCM_STALENODE_INTERV
|
||||||
public final class OzoneClientUtils {
|
public final class OzoneClientUtils {
|
||||||
private static final Logger LOG = LoggerFactory.getLogger(
|
private static final Logger LOG = LoggerFactory.getLogger(
|
||||||
OzoneClientUtils.class);
|
OzoneClientUtils.class);
|
||||||
|
private static final int NO_PORT = -1;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The service ID of the solitary Ozone SCM service.
|
* The service ID of the solitary Ozone SCM service.
|
||||||
|
@ -168,7 +187,7 @@ public final class OzoneClientUtils {
|
||||||
* Each config value may be absent, or if present in the format
|
* Each config value may be absent, or if present in the format
|
||||||
* host:port (the :port part is optional).
|
* host:port (the :port part is optional).
|
||||||
*
|
*
|
||||||
* @param conf
|
* @param conf - Conf
|
||||||
* @param keys a list of configuration key names.
|
* @param keys a list of configuration key names.
|
||||||
*
|
*
|
||||||
* @return first hostname component found from the given keys, or absent.
|
* @return first hostname component found from the given keys, or absent.
|
||||||
|
@ -176,51 +195,65 @@ public final class OzoneClientUtils {
|
||||||
* or host:port format.
|
* or host:port format.
|
||||||
*/
|
*/
|
||||||
static Optional<String> getHostNameFromConfigKeys(
|
static Optional<String> getHostNameFromConfigKeys(
|
||||||
Configuration conf, String ... keys) {
|
Configuration conf, String... keys) {
|
||||||
for (final String key : keys) {
|
for (final String key : keys) {
|
||||||
final String value = conf.getTrimmed(key);
|
final String value = conf.getTrimmed(key);
|
||||||
if (value != null && !value.isEmpty()) {
|
final Optional<String> hostName = getHostName(value);
|
||||||
String[] splits = value.split(":");
|
if (hostName.isPresent()) {
|
||||||
|
return hostName;
|
||||||
if(splits.length < 1 || splits.length > 2) {
|
|
||||||
throw new IllegalArgumentException(
|
|
||||||
"Invalid value " + value + " for config key " + key +
|
|
||||||
". It should be in 'host' or 'host:port' format");
|
|
||||||
}
|
|
||||||
return Optional.of(splits[0]);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return Optional.absent();
|
return Optional.absent();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the hostname or Indicates that it is absent.
|
||||||
|
* @param value host or host:port
|
||||||
|
* @return hostname
|
||||||
|
*/
|
||||||
|
public static Optional<String> getHostName(String value) {
|
||||||
|
if ((value == null) || value.isEmpty()) {
|
||||||
|
return Optional.absent();
|
||||||
|
}
|
||||||
|
return Optional.of(HostAndPort.fromString(value).getHostText());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the port if there is one, throws otherwise.
|
||||||
|
* @param value String in host:port format.
|
||||||
|
* @return Port
|
||||||
|
*/
|
||||||
|
public static Optional<Integer> getHostPort(String value) {
|
||||||
|
if((value == null) || value.isEmpty()) {
|
||||||
|
return Optional.absent();
|
||||||
|
}
|
||||||
|
int port = HostAndPort.fromString(value).getPortOrDefault(NO_PORT);
|
||||||
|
if (port == NO_PORT) {
|
||||||
|
return Optional.absent();
|
||||||
|
} else {
|
||||||
|
return Optional.of(port);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Retrieve the port number, trying the supplied config keys in order.
|
* Retrieve the port number, trying the supplied config keys in order.
|
||||||
* Each config value may be absent, or if present in the format
|
* Each config value may be absent, or if present in the format
|
||||||
* host:port (the :port part is optional).
|
* host:port (the :port part is optional).
|
||||||
*
|
*
|
||||||
* @param conf
|
* @param conf Conf
|
||||||
* @param keys a list of configuration key names.
|
* @param keys a list of configuration key names.
|
||||||
*
|
*
|
||||||
* @return first port number component found from the given keys, or absent.
|
* @return first port number component found from the given keys, or absent.
|
||||||
* @throws IllegalArgumentException if any values are not in the 'host'
|
* @throws IllegalArgumentException if any values are not in the 'host'
|
||||||
* or host:port format.
|
* or host:port format.
|
||||||
*/
|
*/
|
||||||
static Optional<Integer> getPortNumberFromConfigKeys(
|
public static Optional<Integer> getPortNumberFromConfigKeys(
|
||||||
Configuration conf, String ... keys) {
|
Configuration conf, String... keys) {
|
||||||
for (final String key : keys) {
|
for (final String key : keys) {
|
||||||
final String value = conf.getTrimmed(key);
|
final String value = conf.getTrimmed(key);
|
||||||
if (value != null && !value.isEmpty()) {
|
final Optional<Integer> hostPort = getHostPort(value);
|
||||||
String[] splits = value.split(":");
|
if (hostPort.isPresent()) {
|
||||||
|
return hostPort;
|
||||||
if(splits.length < 1 || splits.length > 2) {
|
|
||||||
throw new IllegalArgumentException(
|
|
||||||
"Invalid value " + value + " for config key " + key +
|
|
||||||
". It should be in 'host' or 'host:port' format");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (splits.length == 2) {
|
|
||||||
return Optional.of(Integer.parseInt(splits[1]));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return Optional.absent();
|
return Optional.absent();
|
||||||
|
@ -270,7 +303,6 @@ public final class OzoneClientUtils {
|
||||||
throw new IllegalArgumentException(errMsg);
|
throw new IllegalArgumentException(errMsg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the interval in which the heartbeat processor thread runs.
|
* Returns the interval in which the heartbeat processor thread runs.
|
||||||
*
|
*
|
||||||
|
@ -282,7 +314,6 @@ public final class OzoneClientUtils {
|
||||||
OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL_MS_DEFAULT);
|
OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL_MS_DEFAULT);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Heartbeat Interval - Defines the heartbeat frequency from a datanode to
|
* Heartbeat Interval - Defines the heartbeat frequency from a datanode to
|
||||||
* SCM.
|
* SCM.
|
||||||
|
@ -295,7 +326,6 @@ public final class OzoneClientUtils {
|
||||||
OZONE_SCM_HEARBEAT_INTERVAL_SECONDS_DEFAULT);
|
OZONE_SCM_HEARBEAT_INTERVAL_SECONDS_DEFAULT);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the Stale Node interval, which is used by SCM to flag a datanode as
|
* Get the Stale Node interval, which is used by SCM to flag a datanode as
|
||||||
* stale, if the heartbeat from that node has been missing for this duration.
|
* stale, if the heartbeat from that node has been missing for this duration.
|
||||||
|
@ -340,7 +370,6 @@ public final class OzoneClientUtils {
|
||||||
return staleNodeIntevalMs;
|
return staleNodeIntevalMs;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the interval for dead node flagging. This has to be a value that is
|
* Gets the interval for dead node flagging. This has to be a value that is
|
||||||
* greater than stale node value, and by transitive relation we also know
|
* greater than stale node value, and by transitive relation we also know
|
||||||
|
@ -374,8 +403,42 @@ public final class OzoneClientUtils {
|
||||||
* @param conf Configration
|
* @param conf Configration
|
||||||
* @return - int -- Number of HBs to process
|
* @return - int -- Number of HBs to process
|
||||||
*/
|
*/
|
||||||
public static int getMaxHBToProcessPerLoop(Configuration conf){
|
public static int getMaxHBToProcessPerLoop(Configuration conf) {
|
||||||
return conf.getInt(OZONE_SCM_MAX_HB_COUNT_TO_PROCESS,
|
return conf.getInt(OZONE_SCM_MAX_HB_COUNT_TO_PROCESS,
|
||||||
OZONE_SCM_MAX_HB_COUNT_TO_PROCESS_DEFAULT);
|
OZONE_SCM_MAX_HB_COUNT_TO_PROCESS_DEFAULT);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Timeout value for the RPC from Datanode to SCM, primarily used for
|
||||||
|
* Heartbeats and container reports.
|
||||||
|
*
|
||||||
|
* @param conf - Ozone Config
|
||||||
|
* @return - Rpc timeout in Milliseconds.
|
||||||
|
*/
|
||||||
|
public static long getScmRpcTimeOutInMilliseconds(Configuration conf) {
|
||||||
|
return conf.getTimeDuration(OZONE_SCM_HEARTBEAT_RPC_TIMEOUT,
|
||||||
|
OZONE_SCM_HEARTBEAT_RPC_TIMEOUT_DEFAULT, TimeUnit.MILLISECONDS);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Log Warn interval.
|
||||||
|
*
|
||||||
|
* @param conf - Ozone Config
|
||||||
|
* @return - Log warn interval.
|
||||||
|
*/
|
||||||
|
public static int getLogWarnInterval(Configuration conf) {
|
||||||
|
return conf.getInt(OZONE_SCM_HEARTBEAT_LOG_WARN_INTERVAL_COUNT,
|
||||||
|
OZONE_SCM_HEARTBEAT_LOG_WARN_DEFAULT);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* returns the Container port.
|
||||||
|
* @param conf - Conf
|
||||||
|
* @return port number.
|
||||||
|
*/
|
||||||
|
public static int getContainerPort(Configuration conf) {
|
||||||
|
return conf.getInt(ScmConfigKeys.DFS_CONTAINER_IPC_PORT, ScmConfigKeys
|
||||||
|
.DFS_CONTAINER_IPC_PORT_DEFAULT);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -43,8 +43,8 @@ public final class OzoneConfigKeys {
|
||||||
"ozone.trace.enabled";
|
"ozone.trace.enabled";
|
||||||
public static final boolean OZONE_TRACE_ENABLED_DEFAULT = false;
|
public static final boolean OZONE_TRACE_ENABLED_DEFAULT = false;
|
||||||
|
|
||||||
public static final String OZONE_METADATA_DIRS =
|
public static final String OZONE_CONTAINER_METADATA_DIRS =
|
||||||
"ozone.metadata.dirs";
|
"ozone.container.metadata.dirs";
|
||||||
|
|
||||||
public static final String OZONE_KEY_CACHE = "ozone.key.cache.size";
|
public static final String OZONE_KEY_CACHE = "ozone.key.cache.size";
|
||||||
public static final int OZONE_KEY_CACHE_DEFAULT = 1024;
|
public static final int OZONE_KEY_CACHE_DEFAULT = 1024;
|
||||||
|
@ -94,6 +94,54 @@ public final class OzoneConfigKeys {
|
||||||
public static final long OZONE_SCM_STALENODE_INTERVAL_DEFAULT =
|
public static final long OZONE_SCM_STALENODE_INTERVAL_DEFAULT =
|
||||||
OZONE_SCM_HEARBEAT_INTERVAL_SECONDS_DEFAULT * 1000L * 3L;
|
OZONE_SCM_HEARBEAT_INTERVAL_SECONDS_DEFAULT * 1000L * 3L;
|
||||||
|
|
||||||
|
public static final String OZONE_SCM_CONTAINER_THREADS =
|
||||||
|
"ozone.scm.container.threads";
|
||||||
|
public static final int OZONE_SCM_CONTAINER_THREADS_DEFAULT =
|
||||||
|
Runtime.getRuntime().availableProcessors() * 2;
|
||||||
|
|
||||||
|
public static final String OZONE_SCM_HEARTBEAT_RPC_TIMEOUT =
|
||||||
|
"ozone.scm.heartbeat.rpc-timeout";
|
||||||
|
public static final long OZONE_SCM_HEARTBEAT_RPC_TIMEOUT_DEFAULT =
|
||||||
|
100;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Defines how frequently we will log the missing of heartbeat to a specific
|
||||||
|
* SCM. In the default case we will write a warning message for each 10
|
||||||
|
* sequential heart beats that we miss to a specific SCM. This is to avoid
|
||||||
|
* overrunning the log with lots of HB missed Log statements.
|
||||||
|
*/
|
||||||
|
public static final String OZONE_SCM_HEARTBEAT_LOG_WARN_INTERVAL_COUNT =
|
||||||
|
"ozone.scm.heartbeat.log.warn.interval.count";
|
||||||
|
public static final int OZONE_SCM_HEARTBEAT_LOG_WARN_DEFAULT =
|
||||||
|
10;
|
||||||
|
|
||||||
|
public static final String OZONE_CONTAINER_TASK_WAIT =
|
||||||
|
"ozone.container.task.wait.seconds";
|
||||||
|
public static final long OZONE_CONTAINER_TASK_WAIT_DEFAULT = 5;
|
||||||
|
|
||||||
|
|
||||||
|
// ozone.scm.names key is a set of DNS | DNS:PORT | IP Address | IP:PORT.
|
||||||
|
// Written as a comma separated string. e.g. scm1, scm2:8020, 7.7.7.7:7777
|
||||||
|
//
|
||||||
|
// If this key is not specified datanodes will not be able to find
|
||||||
|
// SCM. The SCM membership can be dynamic, so this key should contain
|
||||||
|
// all possible SCM names. Once the SCM leader is discovered datanodes will
|
||||||
|
// get the right list of SCMs to heartbeat to from the leader.
|
||||||
|
// While it is good for the datanodes to know the names of all SCM nodes,
|
||||||
|
// it is sufficient to actually know the name of on working SCM. That SCM
|
||||||
|
// will be able to return the information about other SCMs that are part of
|
||||||
|
// the SCM replicated Log.
|
||||||
|
//
|
||||||
|
//In case of a membership change, any one of the SCM machines will be
|
||||||
|
// able to send back a new list to the datanodes.
|
||||||
|
public static final String OZONE_SCM_NAMES = "ozone.scm.names";
|
||||||
|
|
||||||
|
public static final int OZONE_SCM_DEFAULT_PORT = 9862;
|
||||||
|
// File Name and path where datanode ID is to written to.
|
||||||
|
// if this value is not set then container startup will fail.
|
||||||
|
public static final String OZONE_SCM_DATANODE_ID = "ozone.scm.datanode.id";
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* There is no need to instantiate this class.
|
* There is no need to instantiate this class.
|
||||||
|
|
|
@ -0,0 +1,213 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with this
|
||||||
|
* work for additional information regarding copyright ownership. The ASF
|
||||||
|
* licenses this file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
* <p>
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
* <p>
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations under
|
||||||
|
* the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.ozone.container.common.statemachine;
|
||||||
|
|
||||||
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
|
import com.google.common.util.concurrent.ThreadFactoryBuilder;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.ozone.OzoneClientUtils;
|
||||||
|
import org.apache.hadoop.ozone.OzoneConfigKeys;
|
||||||
|
import org.apache.hadoop.util.Time;
|
||||||
|
import org.apache.hadoop.util.concurrent.HadoopExecutors;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.io.Closeable;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.concurrent.ExecutionException;
|
||||||
|
import java.util.concurrent.ExecutorService;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.concurrent.TimeoutException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* State Machine Class.
|
||||||
|
*/
|
||||||
|
public class DatanodeStateMachine implements Closeable {
|
||||||
|
@VisibleForTesting
|
||||||
|
static final Logger LOG =
|
||||||
|
LoggerFactory.getLogger(DatanodeStateMachine.class);
|
||||||
|
private final ExecutorService executorService;
|
||||||
|
private final Configuration conf;
|
||||||
|
private final SCMConnectionManager connectionManager;
|
||||||
|
private final long taskWaitTime;
|
||||||
|
private final long heartbeatFrequency;
|
||||||
|
private StateContext context;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs a container state machine.
|
||||||
|
*
|
||||||
|
* @param conf - Configration.
|
||||||
|
*/
|
||||||
|
public DatanodeStateMachine(Configuration conf) {
|
||||||
|
this.conf = conf;
|
||||||
|
executorService = HadoopExecutors.newScheduledThreadPool(
|
||||||
|
this.conf.getInt(OzoneConfigKeys.OZONE_SCM_CONTAINER_THREADS,
|
||||||
|
OzoneConfigKeys.OZONE_SCM_CONTAINER_THREADS_DEFAULT),
|
||||||
|
new ThreadFactoryBuilder().setDaemon(true)
|
||||||
|
.setNameFormat("Container State Machine Thread - %d").build());
|
||||||
|
connectionManager = new SCMConnectionManager(conf);
|
||||||
|
context = new StateContext(this.conf, DatanodeStates.getInitState(), this);
|
||||||
|
taskWaitTime = this.conf.getLong(OzoneConfigKeys.OZONE_CONTAINER_TASK_WAIT,
|
||||||
|
OzoneConfigKeys.OZONE_CONTAINER_TASK_WAIT_DEFAULT);
|
||||||
|
heartbeatFrequency = OzoneClientUtils.getScmHeartbeatInterval(conf);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the Connection manager for this state machine.
|
||||||
|
*
|
||||||
|
* @return - SCMConnectionManager.
|
||||||
|
*/
|
||||||
|
public SCMConnectionManager getConnectionManager() {
|
||||||
|
return connectionManager;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Runs the state machine at a fixed frequency.
|
||||||
|
*/
|
||||||
|
public void start() throws IOException {
|
||||||
|
long now = 0;
|
||||||
|
long nextHB = 0;
|
||||||
|
while (context.getState() != DatanodeStates.SHUTDOWN) {
|
||||||
|
try {
|
||||||
|
nextHB = Time.monotonicNow() + heartbeatFrequency;
|
||||||
|
context.execute(executorService, taskWaitTime, TimeUnit.SECONDS);
|
||||||
|
now = Time.monotonicNow();
|
||||||
|
if (now < nextHB) {
|
||||||
|
Thread.sleep(nextHB - now);
|
||||||
|
}
|
||||||
|
} catch (InterruptedException | ExecutionException | TimeoutException e) {
|
||||||
|
LOG.error("Unable to finish the execution", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the current context.
|
||||||
|
*
|
||||||
|
* @return StateContext
|
||||||
|
*/
|
||||||
|
public StateContext getContext() {
|
||||||
|
return context;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the current context.
|
||||||
|
*
|
||||||
|
* @param context - Context
|
||||||
|
*/
|
||||||
|
public void setContext(StateContext context) {
|
||||||
|
this.context = context;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Closes this stream and releases any system resources associated with it. If
|
||||||
|
* the stream is already closed then invoking this method has no effect.
|
||||||
|
* <p>
|
||||||
|
* <p> As noted in {@link AutoCloseable#close()}, cases where the close may
|
||||||
|
* fail require careful attention. It is strongly advised to relinquish the
|
||||||
|
* underlying resources and to internally <em>mark</em> the {@code Closeable}
|
||||||
|
* as closed, prior to throwing the {@code IOException}.
|
||||||
|
*
|
||||||
|
* @throws IOException if an I/O error occurs
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
executorService.shutdown();
|
||||||
|
try {
|
||||||
|
if (!executorService.awaitTermination(5, TimeUnit.SECONDS)) {
|
||||||
|
executorService.shutdownNow();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!executorService.awaitTermination(5, TimeUnit.SECONDS)) {
|
||||||
|
LOG.error("Unable to shutdown statemachine properly.");
|
||||||
|
}
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
LOG.error("Error attempting to shutdown.", e);
|
||||||
|
executorService.shutdownNow();
|
||||||
|
Thread.currentThread().interrupt();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (EndpointStateMachine endPoint : connectionManager.getValues()) {
|
||||||
|
endPoint.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* States that a datanode can be in. GetNextState will move this enum from
|
||||||
|
* getInitState to getLastState.
|
||||||
|
*/
|
||||||
|
public enum DatanodeStates {
|
||||||
|
INIT(1),
|
||||||
|
RUNNING(2),
|
||||||
|
SHUTDOWN(3);
|
||||||
|
private final int value;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs ContainerStates.
|
||||||
|
*
|
||||||
|
* @param value
|
||||||
|
*/
|
||||||
|
DatanodeStates(int value) {
|
||||||
|
this.value = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the first State.
|
||||||
|
*
|
||||||
|
* @return First State.
|
||||||
|
*/
|
||||||
|
public static DatanodeStates getInitState() {
|
||||||
|
return INIT;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The last state of endpoint states.
|
||||||
|
*
|
||||||
|
* @return last state.
|
||||||
|
*/
|
||||||
|
public static DatanodeStates getLastState() {
|
||||||
|
return SHUTDOWN;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* returns the numeric value associated with the endPoint.
|
||||||
|
*
|
||||||
|
* @return int.
|
||||||
|
*/
|
||||||
|
public int getValue() {
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the next logical state that endPoint should move to. This
|
||||||
|
* function assumes the States are sequentially numbered.
|
||||||
|
*
|
||||||
|
* @return NextState.
|
||||||
|
*/
|
||||||
|
public DatanodeStates getNextState() {
|
||||||
|
if (this.value < getLastState().getValue()) {
|
||||||
|
int stateValue = this.getValue() + 1;
|
||||||
|
for (DatanodeStates iter : values()) {
|
||||||
|
if (stateValue == iter.getValue()) {
|
||||||
|
return iter;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return getLastState();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,265 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with this
|
||||||
|
* work for additional information regarding copyright ownership. The ASF
|
||||||
|
* licenses this file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
* <p>
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
* <p>
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations under
|
||||||
|
* the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.ozone.container.common.statemachine;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.ozone.OzoneClientUtils;
|
||||||
|
import org.apache.hadoop.ozone.protocol.VersionResponse;
|
||||||
|
import org.apache.hadoop.ozone.protocolPB
|
||||||
|
.StorageContainerDatanodeProtocolClientSideTranslatorPB;
|
||||||
|
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.io.Closeable;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.InetSocketAddress;
|
||||||
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
|
import java.util.concurrent.locks.Lock;
|
||||||
|
import java.util.concurrent.locks.ReentrantLock;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Endpoint is used as holder class that keeps state around the RPC endpoint.
|
||||||
|
*/
|
||||||
|
public class EndpointStateMachine implements Closeable {
|
||||||
|
static final Logger
|
||||||
|
LOG = LoggerFactory.getLogger(EndpointStateMachine.class);
|
||||||
|
private final StorageContainerDatanodeProtocolClientSideTranslatorPB endPoint;
|
||||||
|
private final AtomicLong missedCount;
|
||||||
|
private final InetSocketAddress address;
|
||||||
|
private final Lock lock;
|
||||||
|
private final Configuration conf;
|
||||||
|
private EndPointStates state;
|
||||||
|
private VersionResponse version;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs RPC Endpoints.
|
||||||
|
*
|
||||||
|
* @param endPoint - RPC endPoint.
|
||||||
|
*/
|
||||||
|
public EndpointStateMachine(InetSocketAddress address,
|
||||||
|
StorageContainerDatanodeProtocolClientSideTranslatorPB endPoint,
|
||||||
|
Configuration conf) {
|
||||||
|
this.endPoint = endPoint;
|
||||||
|
this.missedCount = new AtomicLong(0);
|
||||||
|
this.address = address;
|
||||||
|
state = EndPointStates.getInitState();
|
||||||
|
lock = new ReentrantLock();
|
||||||
|
this.conf = conf;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Takes a lock on this EndPoint so that other threads don't use this while we
|
||||||
|
* are trying to communicate via this endpoint.
|
||||||
|
*/
|
||||||
|
public void lock() {
|
||||||
|
lock.lock();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Unlocks this endpoint.
|
||||||
|
*/
|
||||||
|
public void unlock() {
|
||||||
|
lock.unlock();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the version that we read from the server if anyone asks .
|
||||||
|
*
|
||||||
|
* @return - Version Response.
|
||||||
|
*/
|
||||||
|
public VersionResponse getVersion() {
|
||||||
|
return version;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the Version reponse we recieved from the SCM.
|
||||||
|
*
|
||||||
|
* @param version VersionResponse
|
||||||
|
*/
|
||||||
|
public void setVersion(VersionResponse version) {
|
||||||
|
this.version = version;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the current State this end point is in.
|
||||||
|
*
|
||||||
|
* @return - getState.
|
||||||
|
*/
|
||||||
|
public EndPointStates getState() {
|
||||||
|
return state;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the endpoint state.
|
||||||
|
*
|
||||||
|
* @param state - state.
|
||||||
|
*/
|
||||||
|
public EndPointStates setState(EndPointStates state) {
|
||||||
|
this.state = state;
|
||||||
|
return this.state;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Closes the connection.
|
||||||
|
*
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
if (endPoint != null) {
|
||||||
|
endPoint.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* We maintain a count of how many times we missed communicating with a
|
||||||
|
* specific SCM. This is not made atomic since the access to this is always
|
||||||
|
* guarded by the read or write lock. That is, it is serialized.
|
||||||
|
*/
|
||||||
|
public void incMissed() {
|
||||||
|
this.missedCount.incrementAndGet();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the value of the missed count.
|
||||||
|
*
|
||||||
|
* @return int
|
||||||
|
*/
|
||||||
|
public long getMissedCount() {
|
||||||
|
return this.missedCount.get();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void zeroMissedCount() {
|
||||||
|
this.missedCount.set(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the InetAddress of the endPoint.
|
||||||
|
*
|
||||||
|
* @return - EndPoint.
|
||||||
|
*/
|
||||||
|
public InetSocketAddress getAddress() {
|
||||||
|
return this.address;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns real RPC endPoint.
|
||||||
|
*
|
||||||
|
* @return rpc client.
|
||||||
|
*/
|
||||||
|
public StorageContainerDatanodeProtocolClientSideTranslatorPB
|
||||||
|
getEndPoint() {
|
||||||
|
return endPoint;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the string that represents this endpoint.
|
||||||
|
*
|
||||||
|
* @return - String
|
||||||
|
*/
|
||||||
|
public String toString() {
|
||||||
|
return address.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Logs exception if needed.
|
||||||
|
* @param ex - Exception
|
||||||
|
*/
|
||||||
|
public void logIfNeeded(Exception ex) {
|
||||||
|
LOG.trace("Incrementing the Missed count. Ex : {}", ex);
|
||||||
|
this.incMissed();
|
||||||
|
if (this.getMissedCount() % OzoneClientUtils.getLogWarnInterval(conf) ==
|
||||||
|
0) {
|
||||||
|
LOG.warn("Unable to communicate to SCM server at {}. We have not been " +
|
||||||
|
"able to communicate to this SCM server for past {} seconds.",
|
||||||
|
this.getAddress().getHostString() + ":" + this.getAddress().getPort(),
|
||||||
|
this.getMissedCount() * OzoneClientUtils.getScmHeartbeatInterval(
|
||||||
|
this.conf));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* States that an Endpoint can be in.
|
||||||
|
* <p>
|
||||||
|
* This is a sorted list of states that EndPoint will traverse.
|
||||||
|
* <p>
|
||||||
|
* GetNextState will move this enum from getInitState to getLastState.
|
||||||
|
*/
|
||||||
|
public enum EndPointStates {
|
||||||
|
GETVERSION(1),
|
||||||
|
REGISTER(2),
|
||||||
|
HEARTBEAT(3),
|
||||||
|
SHUTDOWN(4); // if you add value after this please edit getLastState too.
|
||||||
|
private final int value;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs endPointStates.
|
||||||
|
*
|
||||||
|
* @param value state.
|
||||||
|
*/
|
||||||
|
EndPointStates(int value) {
|
||||||
|
this.value = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the first State.
|
||||||
|
*
|
||||||
|
* @return First State.
|
||||||
|
*/
|
||||||
|
public static EndPointStates getInitState() {
|
||||||
|
return GETVERSION;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The last state of endpoint states.
|
||||||
|
*
|
||||||
|
* @return last state.
|
||||||
|
*/
|
||||||
|
public static EndPointStates getLastState() {
|
||||||
|
return SHUTDOWN;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* returns the numeric value associated with the endPoint.
|
||||||
|
*
|
||||||
|
* @return int.
|
||||||
|
*/
|
||||||
|
public int getValue() {
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the next logical state that endPoint should move to.
|
||||||
|
* The next state is computed by adding 1 to the current state.
|
||||||
|
*
|
||||||
|
* @return NextState.
|
||||||
|
*/
|
||||||
|
public EndPointStates getNextState() {
|
||||||
|
if (this.getValue() < getLastState().getValue()) {
|
||||||
|
int stateValue = this.getValue() + 1;
|
||||||
|
for (EndPointStates iter : values()) {
|
||||||
|
if (stateValue == iter.getValue()) {
|
||||||
|
return iter;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return getLastState();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,174 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with this
|
||||||
|
* work for additional information regarding copyright ownership. The ASF
|
||||||
|
* licenses this file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
* <p>
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
* <p>
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations under
|
||||||
|
* the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.ozone.container.common.statemachine;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.ipc.ProtobufRpcEngine;
|
||||||
|
import org.apache.hadoop.ipc.RPC;
|
||||||
|
import org.apache.hadoop.net.NetUtils;
|
||||||
|
import org.apache.hadoop.ozone.OzoneClientUtils;
|
||||||
|
import org.apache.hadoop.ozone.protocolPB
|
||||||
|
.StorageContainerDatanodeProtocolClientSideTranslatorPB;
|
||||||
|
import org.apache.hadoop.ozone.protocolPB.StorageContainerDatanodeProtocolPB;
|
||||||
|
import org.apache.hadoop.security.UserGroupInformation;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.InetSocketAddress;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.concurrent.locks.ReadWriteLock;
|
||||||
|
import java.util.concurrent.locks.ReentrantReadWriteLock;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* SCMConnectionManager - Acts as a class that manages the membership
|
||||||
|
* information of the SCMs that we are working with.
|
||||||
|
*/
|
||||||
|
public class SCMConnectionManager {
|
||||||
|
private static final Logger LOG =
|
||||||
|
LoggerFactory.getLogger(SCMConnectionManager.class);
|
||||||
|
|
||||||
|
private final ReadWriteLock mapLock;
|
||||||
|
private final Map<InetSocketAddress, EndpointStateMachine> scmMachines;
|
||||||
|
|
||||||
|
private final int rpcTimeout;
|
||||||
|
private final Configuration conf;
|
||||||
|
|
||||||
|
|
||||||
|
public SCMConnectionManager(Configuration conf) {
|
||||||
|
this.mapLock = new ReentrantReadWriteLock();
|
||||||
|
Long timeOut = OzoneClientUtils.getScmRpcTimeOutInMilliseconds(conf);
|
||||||
|
this.rpcTimeout = timeOut.intValue();
|
||||||
|
this.scmMachines = new HashMap<>();
|
||||||
|
this.conf = conf;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns Config.
|
||||||
|
*
|
||||||
|
* @return ozoneConfig.
|
||||||
|
*/
|
||||||
|
public Configuration getConf() {
|
||||||
|
return conf;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get RpcTimeout.
|
||||||
|
*
|
||||||
|
* @return - Return RPC timeout.
|
||||||
|
*/
|
||||||
|
public long getRpcTimeout() {
|
||||||
|
return rpcTimeout;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Takes a read lock.
|
||||||
|
*/
|
||||||
|
public void readLock() {
|
||||||
|
this.mapLock.readLock().lock();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Releases the read lock.
|
||||||
|
*/
|
||||||
|
public void readUnlock() {
|
||||||
|
this.mapLock.readLock().unlock();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Takes the write lock.
|
||||||
|
*/
|
||||||
|
public void writeLock() {
|
||||||
|
this.mapLock.writeLock().lock();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Releases the write lock.
|
||||||
|
*/
|
||||||
|
public void writeUnlock() {
|
||||||
|
this.mapLock.writeLock().unlock();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* adds a new SCM machine to the target set.
|
||||||
|
*
|
||||||
|
* @param address - Address of the SCM machine to send heatbeat to.
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public void addSCMServer(InetSocketAddress address) throws IOException {
|
||||||
|
writeLock();
|
||||||
|
try {
|
||||||
|
if (scmMachines.containsKey(address)) {
|
||||||
|
LOG.warn("Trying to add an existing SCM Machine to Machines group. " +
|
||||||
|
"Ignoring the request.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
RPC.setProtocolEngine(conf, StorageContainerDatanodeProtocolPB.class,
|
||||||
|
ProtobufRpcEngine.class);
|
||||||
|
long version =
|
||||||
|
RPC.getProtocolVersion(StorageContainerDatanodeProtocolPB.class);
|
||||||
|
|
||||||
|
StorageContainerDatanodeProtocolPB rpcProxy = RPC.getProxy(
|
||||||
|
StorageContainerDatanodeProtocolPB.class, version,
|
||||||
|
address, UserGroupInformation.getCurrentUser(), conf,
|
||||||
|
NetUtils.getDefaultSocketFactory(conf), rpcTimeout);
|
||||||
|
|
||||||
|
StorageContainerDatanodeProtocolClientSideTranslatorPB rpcClient =
|
||||||
|
new StorageContainerDatanodeProtocolClientSideTranslatorPB(rpcProxy);
|
||||||
|
EndpointStateMachine endPoint =
|
||||||
|
new EndpointStateMachine(address, rpcClient, conf);
|
||||||
|
scmMachines.put(address, endPoint);
|
||||||
|
} finally {
|
||||||
|
writeUnlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Removes a SCM machine for the target set.
|
||||||
|
*
|
||||||
|
* @param address - Address of the SCM machine to send heatbeat to.
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public void removeSCMServer(InetSocketAddress address) throws IOException {
|
||||||
|
writeLock();
|
||||||
|
try {
|
||||||
|
if (!scmMachines.containsKey(address)) {
|
||||||
|
LOG.warn("Trying to remove a non-existent SCM machine. " +
|
||||||
|
"Ignoring the request.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
EndpointStateMachine endPoint = scmMachines.get(address);
|
||||||
|
endPoint.close();
|
||||||
|
scmMachines.remove(address);
|
||||||
|
} finally {
|
||||||
|
writeUnlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns all known RPCEndpoints.
|
||||||
|
*
|
||||||
|
* @return - List of RPC Endpoints.
|
||||||
|
*/
|
||||||
|
public Collection<EndpointStateMachine> getValues() {
|
||||||
|
return scmMachines.values();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,191 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with this
|
||||||
|
* work for additional information regarding copyright ownership. The ASF
|
||||||
|
* licenses this file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
* <p>
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
* <p>
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations under
|
||||||
|
* the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.ozone.container.common.statemachine;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.ozone.container.common.states.datanode.InitDatanodeState;
|
||||||
|
import org.apache.hadoop.ozone.container.common.states.DatanodeState;
|
||||||
|
import org.apache.hadoop.ozone.container.common.states.datanode
|
||||||
|
.RunningDatanodeState;
|
||||||
|
import org.apache.hadoop.ozone.protocol.commands.SCMCommand;
|
||||||
|
|
||||||
|
import java.util.LinkedList;
|
||||||
|
import java.util.Queue;
|
||||||
|
import java.util.concurrent.ExecutionException;
|
||||||
|
import java.util.concurrent.ExecutorService;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.concurrent.TimeoutException;
|
||||||
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
|
import java.util.concurrent.locks.Lock;
|
||||||
|
import java.util.concurrent.locks.ReentrantLock;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Current Context of State Machine.
|
||||||
|
*/
|
||||||
|
public class StateContext {
|
||||||
|
private final Queue<SCMCommand> commandQueue;
|
||||||
|
private final Lock lock;
|
||||||
|
private final DatanodeStateMachine parent;
|
||||||
|
private final AtomicLong stateExecutionCount;
|
||||||
|
private final Configuration conf;
|
||||||
|
private DatanodeStateMachine.DatanodeStates state;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs a StateContext.
|
||||||
|
*
|
||||||
|
* @param conf - Configration
|
||||||
|
* @param state - State
|
||||||
|
* @param parent Parent State Machine
|
||||||
|
*/
|
||||||
|
public StateContext(Configuration conf, DatanodeStateMachine.DatanodeStates
|
||||||
|
state, DatanodeStateMachine parent) {
|
||||||
|
this.conf = conf;
|
||||||
|
this.state = state;
|
||||||
|
this.parent = parent;
|
||||||
|
commandQueue = new LinkedList<>();
|
||||||
|
lock = new ReentrantLock();
|
||||||
|
stateExecutionCount = new AtomicLong(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the ContainerStateMachine class that holds this state.
|
||||||
|
*
|
||||||
|
* @return ContainerStateMachine.
|
||||||
|
*/
|
||||||
|
public DatanodeStateMachine getParent() {
|
||||||
|
return parent;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns true if we are entering a new state.
|
||||||
|
*
|
||||||
|
* @return boolean
|
||||||
|
*/
|
||||||
|
boolean isEntering() {
|
||||||
|
return stateExecutionCount.get() == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns true if we are exiting from the current state.
|
||||||
|
*
|
||||||
|
* @param newState - newState.
|
||||||
|
* @return boolean
|
||||||
|
*/
|
||||||
|
boolean isExiting(DatanodeStateMachine.DatanodeStates newState) {
|
||||||
|
boolean isExiting = state != newState && stateExecutionCount.get() > 0;
|
||||||
|
if(isExiting) {
|
||||||
|
stateExecutionCount.set(0);
|
||||||
|
}
|
||||||
|
return isExiting;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the current state the machine is in.
|
||||||
|
*
|
||||||
|
* @return state.
|
||||||
|
*/
|
||||||
|
public DatanodeStateMachine.DatanodeStates getState() {
|
||||||
|
return state;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the current state of the machine.
|
||||||
|
*
|
||||||
|
* @param state state.
|
||||||
|
*/
|
||||||
|
public void setState(DatanodeStateMachine.DatanodeStates state) {
|
||||||
|
this.state = state;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the next task to get executed by the datanode state machine.
|
||||||
|
* @return A callable that will be executed by the
|
||||||
|
* {@link DatanodeStateMachine}
|
||||||
|
*/
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
public DatanodeState<DatanodeStateMachine.DatanodeStates> getTask() {
|
||||||
|
switch (this.state) {
|
||||||
|
case INIT:
|
||||||
|
return new InitDatanodeState(this.conf, parent.getConnectionManager(),
|
||||||
|
this);
|
||||||
|
case RUNNING:
|
||||||
|
return new RunningDatanodeState(this.conf, parent.getConnectionManager(),
|
||||||
|
this);
|
||||||
|
case SHUTDOWN:
|
||||||
|
return null;
|
||||||
|
default:
|
||||||
|
throw new IllegalArgumentException("Not Implemented yet.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Executes the required state function.
|
||||||
|
*
|
||||||
|
* @param service - Executor Service
|
||||||
|
* @param time - seconds to wait
|
||||||
|
* @param unit - Seconds.
|
||||||
|
* @throws InterruptedException
|
||||||
|
* @throws ExecutionException
|
||||||
|
* @throws TimeoutException
|
||||||
|
*/
|
||||||
|
public void execute(ExecutorService service, long time, TimeUnit unit)
|
||||||
|
throws InterruptedException, ExecutionException, TimeoutException {
|
||||||
|
stateExecutionCount.incrementAndGet();
|
||||||
|
DatanodeState<DatanodeStateMachine.DatanodeStates> task = getTask();
|
||||||
|
if (this.isEntering()) {
|
||||||
|
task.onEnter();
|
||||||
|
}
|
||||||
|
task.execute(service);
|
||||||
|
DatanodeStateMachine.DatanodeStates newState = task.await(time, unit);
|
||||||
|
if (this.state != newState) {
|
||||||
|
if (isExiting(newState)) {
|
||||||
|
task.onExit();
|
||||||
|
}
|
||||||
|
this.setState(newState);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the next command or null if it is empty.
|
||||||
|
*
|
||||||
|
* @return SCMCommand or Null.
|
||||||
|
*/
|
||||||
|
public SCMCommand getNextCommand() {
|
||||||
|
lock.lock();
|
||||||
|
try {
|
||||||
|
return commandQueue.poll();
|
||||||
|
} finally {
|
||||||
|
lock.unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds a command to the State Machine queue.
|
||||||
|
*
|
||||||
|
* @param command - SCMCommand.
|
||||||
|
*/
|
||||||
|
public void addCommand(SCMCommand command) {
|
||||||
|
lock.lock();
|
||||||
|
try {
|
||||||
|
commandQueue.add(command);
|
||||||
|
} finally {
|
||||||
|
lock.unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,28 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.ozone.container.common.statemachine;
|
||||||
|
/**
|
||||||
|
|
||||||
|
State machine class is used by the container to denote various states a
|
||||||
|
container can be in and also is used for command processing.
|
||||||
|
|
||||||
|
Container has the following states.
|
||||||
|
|
||||||
|
Start - > getVersion -> Register -> Running -> Shutdown
|
||||||
|
|
||||||
|
*/
|
|
@ -0,0 +1,55 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.ozone.container.common.states;
|
||||||
|
|
||||||
|
import java.util.concurrent.ExecutionException;
|
||||||
|
import java.util.concurrent.ExecutorService;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.concurrent.TimeoutException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* State Interface that allows tasks to maintain states.
|
||||||
|
*/
|
||||||
|
public interface DatanodeState<T> {
|
||||||
|
/**
|
||||||
|
* Called before entering this state.
|
||||||
|
*/
|
||||||
|
void onEnter();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Called After exiting this state.
|
||||||
|
*/
|
||||||
|
void onExit();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Executes one or more tasks that is needed by this state.
|
||||||
|
*
|
||||||
|
* @param executor - ExecutorService
|
||||||
|
*/
|
||||||
|
void execute(ExecutorService executor);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wait for execute to finish.
|
||||||
|
*
|
||||||
|
* @param time - Time
|
||||||
|
* @param timeUnit - Unit of time.
|
||||||
|
*/
|
||||||
|
T await(long time, TimeUnit timeUnit)
|
||||||
|
throws InterruptedException, ExecutionException, TimeoutException;
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,135 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with this
|
||||||
|
* work for additional information regarding copyright ownership. The ASF
|
||||||
|
* licenses this file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
* <p>
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
* <p>
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations under
|
||||||
|
* the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.ozone.container.common.states.datanode;
|
||||||
|
|
||||||
|
import com.google.common.base.Optional;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.net.NetUtils;
|
||||||
|
import org.apache.hadoop.ozone.OzoneClientUtils;
|
||||||
|
import org.apache.hadoop.ozone.OzoneConfigKeys;
|
||||||
|
import org.apache.hadoop.ozone.container.common.statemachine.DatanodeStateMachine;
|
||||||
|
import org.apache.hadoop.ozone.container.common.statemachine.SCMConnectionManager;
|
||||||
|
import org.apache.hadoop.ozone.container.common.statemachine.StateContext;
|
||||||
|
import org.apache.hadoop.ozone.container.common.states.DatanodeState;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.net.InetSocketAddress;
|
||||||
|
import java.util.concurrent.Callable;
|
||||||
|
import java.util.concurrent.ExecutionException;
|
||||||
|
import java.util.concurrent.ExecutorService;
|
||||||
|
import java.util.concurrent.Future;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.concurrent.TimeoutException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Init Datanode State is the task that gets run when we are in Init State.
|
||||||
|
*/
|
||||||
|
public class InitDatanodeState implements DatanodeState,
|
||||||
|
Callable<DatanodeStateMachine.DatanodeStates> {
|
||||||
|
static final Logger LOG = LoggerFactory.getLogger(InitDatanodeState.class);
|
||||||
|
private final SCMConnectionManager connectionManager;
|
||||||
|
private final Configuration conf;
|
||||||
|
private final StateContext context;
|
||||||
|
private Future<DatanodeStateMachine.DatanodeStates> result;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create InitDatanodeState Task.
|
||||||
|
*
|
||||||
|
* @param conf - Conf
|
||||||
|
* @param connectionManager - Connection Manager
|
||||||
|
* @param context - Current Context
|
||||||
|
*/
|
||||||
|
public InitDatanodeState(Configuration conf,
|
||||||
|
SCMConnectionManager connectionManager,
|
||||||
|
StateContext context) {
|
||||||
|
this.conf = conf;
|
||||||
|
this.connectionManager = connectionManager;
|
||||||
|
this.context = context;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Computes a result, or throws an exception if unable to do so.
|
||||||
|
*
|
||||||
|
* @return computed result
|
||||||
|
* @throws Exception if unable to compute a result
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public DatanodeStateMachine.DatanodeStates call() throws Exception {
|
||||||
|
String[] addresses = conf.getStrings(OzoneConfigKeys.OZONE_SCM_NAMES);
|
||||||
|
final Optional<Integer> defaultPort = Optional.of(OzoneConfigKeys
|
||||||
|
.OZONE_SCM_DEFAULT_PORT);
|
||||||
|
|
||||||
|
if (addresses == null || addresses.length <= 0) {
|
||||||
|
LOG.error("SCM addresses need to be a set of valid DNS names " +
|
||||||
|
"or IP addresses. Null or empty address list found. Aborting " +
|
||||||
|
"containers.");
|
||||||
|
return DatanodeStateMachine.DatanodeStates.SHUTDOWN;
|
||||||
|
}
|
||||||
|
for (String address : addresses) {
|
||||||
|
Optional<String> hostname = OzoneClientUtils.getHostName(address);
|
||||||
|
if (!hostname.isPresent()) {
|
||||||
|
LOG.error("Invalid hostname for SCM.");
|
||||||
|
return DatanodeStateMachine.DatanodeStates.SHUTDOWN;
|
||||||
|
}
|
||||||
|
Optional<Integer> port = OzoneClientUtils.getHostPort(address);
|
||||||
|
InetSocketAddress addr = NetUtils.createSocketAddr(hostname.get(),
|
||||||
|
port.or(defaultPort.get()));
|
||||||
|
connectionManager.addSCMServer(addr);
|
||||||
|
}
|
||||||
|
return this.context.getState().getNextState();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Called before entering this state.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void onEnter() {
|
||||||
|
LOG.trace("Entering init container state");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Called After exiting this state.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void onExit() {
|
||||||
|
LOG.trace("Exiting init container state");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Executes one or more tasks that is needed by this state.
|
||||||
|
*
|
||||||
|
* @param executor - ExecutorService
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void execute(ExecutorService executor) {
|
||||||
|
result = executor.submit(this);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wait for execute to finish.
|
||||||
|
*
|
||||||
|
* @param time - Time
|
||||||
|
* @param timeUnit - Unit of time.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public DatanodeStateMachine.DatanodeStates await(long time,
|
||||||
|
TimeUnit timeUnit) throws InterruptedException,
|
||||||
|
ExecutionException, TimeoutException {
|
||||||
|
return result.get(time, timeUnit);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,297 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with this
|
||||||
|
* work for additional information regarding copyright ownership. The ASF
|
||||||
|
* licenses this file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
* <p>
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
* <p>
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations under
|
||||||
|
* the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.ozone.container.common.states.datanode;
|
||||||
|
|
||||||
|
import com.google.common.base.Preconditions;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.DatanodeID;
|
||||||
|
import org.apache.hadoop.hdfs.server.datanode.DataNode;
|
||||||
|
import org.apache.hadoop.ozone.OzoneClientUtils;
|
||||||
|
import org.apache.hadoop.ozone.OzoneConfigKeys;
|
||||||
|
import org.apache.hadoop.ozone.container.common.statemachine
|
||||||
|
.DatanodeStateMachine;
|
||||||
|
import org.apache.hadoop.ozone.container.common.statemachine
|
||||||
|
.EndpointStateMachine;
|
||||||
|
import org.apache.hadoop.ozone.container.common.statemachine
|
||||||
|
.SCMConnectionManager;
|
||||||
|
import org.apache.hadoop.ozone.container.common.statemachine.StateContext;
|
||||||
|
import org.apache.hadoop.ozone.container.common.states.DatanodeState;
|
||||||
|
import org.apache.hadoop.ozone.container.common.states.endpoint
|
||||||
|
.HeartbeatEndpointTask;
|
||||||
|
import org.apache.hadoop.ozone.container.common.states.endpoint
|
||||||
|
.RegisterEndpointTask;
|
||||||
|
import org.apache.hadoop.ozone.container.common.states.endpoint
|
||||||
|
.VersionEndpointTask;
|
||||||
|
import org.apache.hadoop.ozone.protocol.proto
|
||||||
|
.StorageContainerDatanodeProtocolProtos;
|
||||||
|
import org.apache.hadoop.util.Time;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.io.FileInputStream;
|
||||||
|
import java.io.FileOutputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.InetAddress;
|
||||||
|
import java.net.UnknownHostException;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.nio.file.Paths;
|
||||||
|
import java.util.LinkedList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.UUID;
|
||||||
|
import java.util.concurrent.Callable;
|
||||||
|
import java.util.concurrent.CompletionService;
|
||||||
|
import java.util.concurrent.ExecutionException;
|
||||||
|
import java.util.concurrent.ExecutorCompletionService;
|
||||||
|
import java.util.concurrent.ExecutorService;
|
||||||
|
import java.util.concurrent.Future;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.concurrent.TimeoutException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Class that implements handshake with SCM.
|
||||||
|
*/
|
||||||
|
public class RunningDatanodeState implements DatanodeState {
|
||||||
|
static final Logger
|
||||||
|
LOG = LoggerFactory.getLogger(RunningDatanodeState.class);
|
||||||
|
private final SCMConnectionManager connectionManager;
|
||||||
|
private final Configuration conf;
|
||||||
|
private final StateContext context;
|
||||||
|
private CompletionService<EndpointStateMachine.EndPointStates> ecs;
|
||||||
|
|
||||||
|
public RunningDatanodeState(Configuration conf,
|
||||||
|
SCMConnectionManager connectionManager,
|
||||||
|
StateContext context) {
|
||||||
|
this.connectionManager = connectionManager;
|
||||||
|
this.conf = conf;
|
||||||
|
this.context = context;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads a datanode ID from the persisted information.
|
||||||
|
*
|
||||||
|
* @param idPath - Path to the ID File.
|
||||||
|
* @return DatanodeID
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
private StorageContainerDatanodeProtocolProtos.ContainerNodeIDProto
|
||||||
|
readPersistedDatanodeID(Path idPath) throws IOException {
|
||||||
|
Preconditions.checkNotNull(idPath);
|
||||||
|
StorageContainerDatanodeProtocolProtos.ContainerNodeIDProto
|
||||||
|
containerIDProto;
|
||||||
|
try (FileInputStream stream = new FileInputStream(idPath.toFile())) {
|
||||||
|
containerIDProto = StorageContainerDatanodeProtocolProtos
|
||||||
|
.ContainerNodeIDProto.parseFrom(stream);
|
||||||
|
return containerIDProto;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a DatanodeID from the datanode information.
|
||||||
|
*
|
||||||
|
* @return DatanodeID
|
||||||
|
* @throws UnknownHostException
|
||||||
|
*/
|
||||||
|
private DatanodeID createDatanodeID() throws UnknownHostException {
|
||||||
|
DatanodeID temp = new DatanodeID(
|
||||||
|
//TODO : Replace this with proper network and kerberos
|
||||||
|
// support code.
|
||||||
|
InetAddress.getLocalHost().getHostAddress().toString(),
|
||||||
|
DataNode.getHostName(conf),
|
||||||
|
UUID.randomUUID().toString(),
|
||||||
|
0, /** XferPort - SCM does not use this port */
|
||||||
|
0, /** Info port - SCM does not use this port */
|
||||||
|
0, /** Info Secure Port - SCM does not use this port */
|
||||||
|
0); /** IPC port - SCM does not use this port */
|
||||||
|
|
||||||
|
// TODO: make this dynamically discoverable. SCM can hand out this
|
||||||
|
// port number to calling applications. This makes it easy to run multiple
|
||||||
|
// container endpoints on the same machine.
|
||||||
|
temp.setContainerPort(OzoneClientUtils.getContainerPort(conf));
|
||||||
|
return temp;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new ContainerID that persists both DatanodeID and ClusterID.
|
||||||
|
*
|
||||||
|
* @param idPath Path to the id file.
|
||||||
|
* @return ContainerNodeIDProto
|
||||||
|
* @throws UnknownHostException
|
||||||
|
*/
|
||||||
|
private StorageContainerDatanodeProtocolProtos.ContainerNodeIDProto
|
||||||
|
createNewContainerID(Path idPath)
|
||||||
|
throws IOException {
|
||||||
|
StorageContainerDatanodeProtocolProtos.ContainerNodeIDProto
|
||||||
|
containerIDProto = StorageContainerDatanodeProtocolProtos
|
||||||
|
.ContainerNodeIDProto.newBuilder()
|
||||||
|
.setDatanodeID(createDatanodeID().getProtoBufMessage()).build();
|
||||||
|
try (FileOutputStream stream = new FileOutputStream(idPath.toFile())) {
|
||||||
|
stream.write(containerIDProto.toByteArray());
|
||||||
|
return containerIDProto;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns ContainerNodeIDProto or null in case of Error.
|
||||||
|
*
|
||||||
|
* @return ContainerNodeIDProto
|
||||||
|
*/
|
||||||
|
private StorageContainerDatanodeProtocolProtos.ContainerNodeIDProto
|
||||||
|
getContainerNodeID() {
|
||||||
|
String dataNodeIDPath = conf.get(OzoneConfigKeys.OZONE_SCM_DATANODE_ID);
|
||||||
|
if (dataNodeIDPath == null || dataNodeIDPath.isEmpty()) {
|
||||||
|
LOG.error("A valid file path is needed for config setting {}",
|
||||||
|
OzoneConfigKeys.OZONE_SCM_DATANODE_ID);
|
||||||
|
|
||||||
|
// This is an unrecoverable error.
|
||||||
|
this.context.setState(DatanodeStateMachine.DatanodeStates.SHUTDOWN);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
StorageContainerDatanodeProtocolProtos.ContainerNodeIDProto nodeID;
|
||||||
|
// try to read an existing ContainerNode ID.
|
||||||
|
try {
|
||||||
|
nodeID = readPersistedDatanodeID(Paths.get(dataNodeIDPath));
|
||||||
|
if (nodeID != null) {
|
||||||
|
LOG.trace("Read Node ID :", nodeID.getDatanodeID().getDatanodeUuid());
|
||||||
|
return nodeID;
|
||||||
|
}
|
||||||
|
} catch (IOException ex) {
|
||||||
|
LOG.trace("Not able to find container Node ID, creating it.", ex);
|
||||||
|
}
|
||||||
|
// Not found, let us create a new datanode ID, persist it and return that
|
||||||
|
// info to SCM.
|
||||||
|
try {
|
||||||
|
nodeID = createNewContainerID(Paths.get(dataNodeIDPath));
|
||||||
|
LOG.trace("Created Node ID :", nodeID.getDatanodeID().getDatanodeUuid());
|
||||||
|
return nodeID;
|
||||||
|
} catch (IOException ex) {
|
||||||
|
LOG.error("Creating new node ID failed.", ex);
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Called before entering this state.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void onEnter() {
|
||||||
|
LOG.trace("Entering handshake task.");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Called After exiting this state.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void onExit() {
|
||||||
|
LOG.trace("Exiting handshake task.");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Executes one or more tasks that is needed by this state.
|
||||||
|
*
|
||||||
|
* @param executor - ExecutorService
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void execute(ExecutorService executor) {
|
||||||
|
ecs = new ExecutorCompletionService<>(executor);
|
||||||
|
for (EndpointStateMachine endpoint : connectionManager.getValues()) {
|
||||||
|
Callable<EndpointStateMachine.EndPointStates> endpointTask
|
||||||
|
= getEndPointTask(endpoint);
|
||||||
|
ecs.submit(endpointTask);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private Callable<EndpointStateMachine.EndPointStates>
|
||||||
|
getEndPointTask(EndpointStateMachine endpoint) {
|
||||||
|
switch (endpoint.getState()) {
|
||||||
|
case GETVERSION:
|
||||||
|
return new VersionEndpointTask(endpoint, conf);
|
||||||
|
case REGISTER:
|
||||||
|
return RegisterEndpointTask.newBuilder()
|
||||||
|
.setConfig(conf)
|
||||||
|
.setEndpointStateMachine(endpoint)
|
||||||
|
.setNodeID(getContainerNodeID())
|
||||||
|
.build();
|
||||||
|
case HEARTBEAT:
|
||||||
|
return HeartbeatEndpointTask.newBuilder()
|
||||||
|
.setConfig(conf)
|
||||||
|
.setEndpointStateMachine(endpoint)
|
||||||
|
.setNodeID(getContainerNodeID())
|
||||||
|
.build();
|
||||||
|
case SHUTDOWN:
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new IllegalArgumentException("Illegal Argument.");
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Computes the next state the container state machine must move to by looking
|
||||||
|
* at all the state of endpoints.
|
||||||
|
* <p>
|
||||||
|
* if any endpoint state has moved to Shutdown, either we have an
|
||||||
|
* unrecoverable error or we have been told to shutdown. Either case the
|
||||||
|
* datanode state machine should move to Shutdown state, otherwise we
|
||||||
|
* remain in the Running state.
|
||||||
|
*
|
||||||
|
* @return next container state.
|
||||||
|
*/
|
||||||
|
private DatanodeStateMachine.DatanodeStates
|
||||||
|
computeNextContainerState(
|
||||||
|
List<Future<EndpointStateMachine.EndPointStates>> results) {
|
||||||
|
for (Future<EndpointStateMachine.EndPointStates> state : results) {
|
||||||
|
try {
|
||||||
|
if (state.get() == EndpointStateMachine.EndPointStates.SHUTDOWN) {
|
||||||
|
// if any endpoint tells us to shutdown we move to shutdown state.
|
||||||
|
return DatanodeStateMachine.DatanodeStates.SHUTDOWN;
|
||||||
|
}
|
||||||
|
} catch (InterruptedException | ExecutionException e) {
|
||||||
|
LOG.error("Error in executing end point task.", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return DatanodeStateMachine.DatanodeStates.RUNNING;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wait for execute to finish.
|
||||||
|
*
|
||||||
|
* @param duration - Time
|
||||||
|
* @param timeUnit - Unit of duration.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public DatanodeStateMachine.DatanodeStates
|
||||||
|
await(long duration, TimeUnit timeUnit)
|
||||||
|
throws InterruptedException, ExecutionException, TimeoutException {
|
||||||
|
int count = connectionManager.getValues().size();
|
||||||
|
int returned = 0;
|
||||||
|
long timeLeft = timeUnit.toMillis(duration);
|
||||||
|
long startTime = Time.monotonicNow();
|
||||||
|
List<Future<EndpointStateMachine.EndPointStates>> results = new
|
||||||
|
LinkedList<>();
|
||||||
|
|
||||||
|
while (returned < count && timeLeft > 0) {
|
||||||
|
Future<EndpointStateMachine.EndPointStates> result =
|
||||||
|
ecs.poll(timeLeft, TimeUnit.MILLISECONDS);
|
||||||
|
if (result != null) {
|
||||||
|
results.add(result);
|
||||||
|
returned++;
|
||||||
|
}
|
||||||
|
timeLeft = timeLeft - (Time.monotonicNow() - startTime);
|
||||||
|
}
|
||||||
|
return computeNextContainerState(results);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,21 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with this
|
||||||
|
* work for additional information regarding copyright ownership. The ASF
|
||||||
|
* licenses this file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
* <p>
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
* <p>
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations under
|
||||||
|
* the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.ozone.container.common.states.datanode;
|
||||||
|
/**
|
||||||
|
This package contians files that guide the state transitions from
|
||||||
|
Init->Running->Shutdown for the datanode.
|
||||||
|
*/
|
|
@ -0,0 +1,181 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.ozone.container.common.states.endpoint;
|
||||||
|
|
||||||
|
import com.google.common.base.Preconditions;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.DatanodeID;
|
||||||
|
import org.apache.hadoop.ozone.container.common.statemachine
|
||||||
|
.EndpointStateMachine;
|
||||||
|
import org.apache.hadoop.ozone.protocol.proto
|
||||||
|
.StorageContainerDatanodeProtocolProtos.ContainerNodeIDProto;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.concurrent.Callable;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Heartbeat class for SCMs.
|
||||||
|
*/
|
||||||
|
public class HeartbeatEndpointTask
|
||||||
|
implements Callable<EndpointStateMachine.EndPointStates> {
|
||||||
|
static final Logger LOG =
|
||||||
|
LoggerFactory.getLogger(HeartbeatEndpointTask.class);
|
||||||
|
private final EndpointStateMachine rpcEndpoint;
|
||||||
|
private final Configuration conf;
|
||||||
|
private ContainerNodeIDProto containerNodeIDProto;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs a SCM heart beat.
|
||||||
|
*
|
||||||
|
* @param conf Config.
|
||||||
|
*/
|
||||||
|
public HeartbeatEndpointTask(EndpointStateMachine rpcEndpoint,
|
||||||
|
Configuration conf) {
|
||||||
|
this.rpcEndpoint = rpcEndpoint;
|
||||||
|
this.conf = conf;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the container Node ID proto.
|
||||||
|
*
|
||||||
|
* @return ContainerNodeIDProto
|
||||||
|
*/
|
||||||
|
public ContainerNodeIDProto getContainerNodeIDProto() {
|
||||||
|
return containerNodeIDProto;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set container node ID proto.
|
||||||
|
*
|
||||||
|
* @param containerNodeIDProto - the node id.
|
||||||
|
*/
|
||||||
|
public void setContainerNodeIDProto(ContainerNodeIDProto
|
||||||
|
containerNodeIDProto) {
|
||||||
|
this.containerNodeIDProto = containerNodeIDProto;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Computes a result, or throws an exception if unable to do so.
|
||||||
|
*
|
||||||
|
* @return computed result
|
||||||
|
* @throws Exception if unable to compute a result
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public EndpointStateMachine.EndPointStates call() throws Exception {
|
||||||
|
rpcEndpoint.lock();
|
||||||
|
try {
|
||||||
|
Preconditions.checkState(this.containerNodeIDProto != null);
|
||||||
|
DatanodeID datanodeID = DatanodeID.getFromProtoBuf(this
|
||||||
|
.containerNodeIDProto.getDatanodeID());
|
||||||
|
// TODO : Add the command to command processor queue.
|
||||||
|
rpcEndpoint.getEndPoint().sendHeartbeat(datanodeID);
|
||||||
|
rpcEndpoint.zeroMissedCount();
|
||||||
|
} catch (IOException ex) {
|
||||||
|
rpcEndpoint.logIfNeeded(ex
|
||||||
|
);
|
||||||
|
} finally {
|
||||||
|
rpcEndpoint.unlock();
|
||||||
|
}
|
||||||
|
return rpcEndpoint.getState();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a builder class for HeartbeatEndpointTask task.
|
||||||
|
* @return Builder.
|
||||||
|
*/
|
||||||
|
public static Builder newBuilder() {
|
||||||
|
return new Builder();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Builder class for HeartbeatEndpointTask.
|
||||||
|
*/
|
||||||
|
public static class Builder {
|
||||||
|
private EndpointStateMachine endPointStateMachine;
|
||||||
|
private Configuration conf;
|
||||||
|
private ContainerNodeIDProto containerNodeIDProto;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs the builder class.
|
||||||
|
*/
|
||||||
|
public Builder() {
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the endpoint state machine.
|
||||||
|
*
|
||||||
|
* @param rpcEndPoint - Endpoint state machine.
|
||||||
|
* @return Builder
|
||||||
|
*/
|
||||||
|
public Builder setEndpointStateMachine(EndpointStateMachine rpcEndPoint) {
|
||||||
|
this.endPointStateMachine = rpcEndPoint;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the Config.
|
||||||
|
*
|
||||||
|
* @param config - config
|
||||||
|
* @return Builder
|
||||||
|
*/
|
||||||
|
public Builder setConfig(Configuration config) {
|
||||||
|
this.conf = config;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the NodeID.
|
||||||
|
*
|
||||||
|
* @param nodeID - NodeID proto
|
||||||
|
* @return Builder
|
||||||
|
*/
|
||||||
|
public Builder setNodeID(ContainerNodeIDProto nodeID) {
|
||||||
|
this.containerNodeIDProto = nodeID;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public HeartbeatEndpointTask build() {
|
||||||
|
if (endPointStateMachine == null) {
|
||||||
|
LOG.error("No endpoint specified.");
|
||||||
|
throw new IllegalArgumentException("A valid endpoint state machine is" +
|
||||||
|
" needed to construct HeartbeatEndpointTask task");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (conf == null) {
|
||||||
|
LOG.error("No config specified.");
|
||||||
|
throw new IllegalArgumentException("A valid configration is needed to" +
|
||||||
|
" construct HeartbeatEndpointTask task");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (containerNodeIDProto == null) {
|
||||||
|
LOG.error("No nodeID specified.");
|
||||||
|
throw new IllegalArgumentException("A vaild Node ID is needed to " +
|
||||||
|
"construct HeartbeatEndpointTask task");
|
||||||
|
}
|
||||||
|
|
||||||
|
HeartbeatEndpointTask task = new HeartbeatEndpointTask(this
|
||||||
|
.endPointStateMachine, this.conf);
|
||||||
|
task.setContainerNodeIDProto(containerNodeIDProto);
|
||||||
|
return task;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,198 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with this
|
||||||
|
* work for additional information regarding copyright ownership. The ASF
|
||||||
|
* licenses this file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
* <p>
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
* <p>
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations under
|
||||||
|
* the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.ozone.container.common.states.endpoint;
|
||||||
|
|
||||||
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.DatanodeID;
|
||||||
|
import org.apache.hadoop.ozone.OzoneConfigKeys;
|
||||||
|
import org.apache.hadoop.ozone.container.common.statemachine
|
||||||
|
.EndpointStateMachine;
|
||||||
|
|
||||||
|
import org.apache.hadoop.ozone.protocol.proto
|
||||||
|
.StorageContainerDatanodeProtocolProtos.ContainerNodeIDProto;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.concurrent.Callable;
|
||||||
|
import java.util.concurrent.Future;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Register a container with SCM.
|
||||||
|
*/
|
||||||
|
public final class RegisterEndpointTask implements
|
||||||
|
Callable<EndpointStateMachine.EndPointStates> {
|
||||||
|
static final Logger LOG = LoggerFactory.getLogger(RegisterEndpointTask.class);
|
||||||
|
|
||||||
|
private final EndpointStateMachine rpcEndPoint;
|
||||||
|
private final Configuration conf;
|
||||||
|
private Future<EndpointStateMachine.EndPointStates> result;
|
||||||
|
private ContainerNodeIDProto containerNodeIDProto;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a register endpoint task.
|
||||||
|
*
|
||||||
|
* @param rpcEndPoint - endpoint
|
||||||
|
* @param conf - conf
|
||||||
|
*/
|
||||||
|
@VisibleForTesting
|
||||||
|
public RegisterEndpointTask(EndpointStateMachine rpcEndPoint,
|
||||||
|
Configuration conf) {
|
||||||
|
this.rpcEndPoint = rpcEndPoint;
|
||||||
|
this.conf = conf;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the ContainerNodeID Proto.
|
||||||
|
*
|
||||||
|
* @return ContainerNodeIDProto
|
||||||
|
*/
|
||||||
|
public ContainerNodeIDProto getContainerNodeIDProto() {
|
||||||
|
return containerNodeIDProto;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the contiainerNodeID Proto.
|
||||||
|
*
|
||||||
|
* @param containerNodeIDProto - Container Node ID.
|
||||||
|
*/
|
||||||
|
public void setContainerNodeIDProto(ContainerNodeIDProto
|
||||||
|
containerNodeIDProto) {
|
||||||
|
this.containerNodeIDProto = containerNodeIDProto;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Computes a result, or throws an exception if unable to do so.
|
||||||
|
*
|
||||||
|
* @return computed result
|
||||||
|
* @throws Exception if unable to compute a result
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public EndpointStateMachine.EndPointStates call() throws Exception {
|
||||||
|
|
||||||
|
if (getContainerNodeIDProto() == null) {
|
||||||
|
LOG.error("Container ID proto cannot be null in RegisterEndpoint task, " +
|
||||||
|
"shutting down the endpoint.");
|
||||||
|
return rpcEndPoint.setState(EndpointStateMachine.EndPointStates.SHUTDOWN);
|
||||||
|
}
|
||||||
|
|
||||||
|
rpcEndPoint.lock();
|
||||||
|
try {
|
||||||
|
DatanodeID dnNodeID = DatanodeID.getFromProtoBuf(
|
||||||
|
getContainerNodeIDProto().getDatanodeID());
|
||||||
|
|
||||||
|
// TODO : Add responses to the command Queue.
|
||||||
|
rpcEndPoint.getEndPoint().register(dnNodeID,
|
||||||
|
conf.getStrings(OzoneConfigKeys.OZONE_SCM_NAMES));
|
||||||
|
EndpointStateMachine.EndPointStates nextState =
|
||||||
|
rpcEndPoint.getState().getNextState();
|
||||||
|
rpcEndPoint.setState(nextState);
|
||||||
|
rpcEndPoint.zeroMissedCount();
|
||||||
|
} catch (IOException ex) {
|
||||||
|
rpcEndPoint.logIfNeeded(ex
|
||||||
|
);
|
||||||
|
} finally {
|
||||||
|
rpcEndPoint.unlock();
|
||||||
|
}
|
||||||
|
|
||||||
|
return rpcEndPoint.getState();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a builder class for RegisterEndPoint task.
|
||||||
|
*
|
||||||
|
* @return Builder.
|
||||||
|
*/
|
||||||
|
public static Builder newBuilder() {
|
||||||
|
return new Builder();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Builder class for RegisterEndPoint task.
|
||||||
|
*/
|
||||||
|
public static class Builder {
|
||||||
|
private EndpointStateMachine endPointStateMachine;
|
||||||
|
private Configuration conf;
|
||||||
|
private ContainerNodeIDProto containerNodeIDProto;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs the builder class.
|
||||||
|
*/
|
||||||
|
public Builder() {
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the endpoint state machine.
|
||||||
|
*
|
||||||
|
* @param rpcEndPoint - Endpoint state machine.
|
||||||
|
* @return Builder
|
||||||
|
*/
|
||||||
|
public Builder setEndpointStateMachine(EndpointStateMachine rpcEndPoint) {
|
||||||
|
this.endPointStateMachine = rpcEndPoint;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the Config.
|
||||||
|
*
|
||||||
|
* @param config - config
|
||||||
|
* @return Builder.
|
||||||
|
*/
|
||||||
|
public Builder setConfig(Configuration config) {
|
||||||
|
this.conf = config;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the NodeID.
|
||||||
|
*
|
||||||
|
* @param nodeID - NodeID proto
|
||||||
|
* @return Builder
|
||||||
|
*/
|
||||||
|
public Builder setNodeID(ContainerNodeIDProto nodeID) {
|
||||||
|
this.containerNodeIDProto = nodeID;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public RegisterEndpointTask build() {
|
||||||
|
if (endPointStateMachine == null) {
|
||||||
|
LOG.error("No endpoint specified.");
|
||||||
|
throw new IllegalArgumentException("A valid endpoint state machine is" +
|
||||||
|
" needed to construct RegisterEndPoint task");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (conf == null) {
|
||||||
|
LOG.error("No config specified.");
|
||||||
|
throw new IllegalArgumentException("A valid configration is needed to" +
|
||||||
|
" construct RegisterEndpoint task");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (containerNodeIDProto == null) {
|
||||||
|
LOG.error("No nodeID specified.");
|
||||||
|
throw new IllegalArgumentException("A vaild Node ID is needed to " +
|
||||||
|
"construct RegisterEndpoint task");
|
||||||
|
}
|
||||||
|
|
||||||
|
RegisterEndpointTask task = new RegisterEndpointTask(this
|
||||||
|
.endPointStateMachine, this.conf);
|
||||||
|
task.setContainerNodeIDProto(containerNodeIDProto);
|
||||||
|
return task;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,66 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with this
|
||||||
|
* work for additional information regarding copyright ownership. The ASF
|
||||||
|
* licenses this file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
* <p>
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
* <p>
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations under
|
||||||
|
* the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.ozone.container.common.states.endpoint;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.ozone.container.common.statemachine.EndpointStateMachine;
|
||||||
|
import org.apache.hadoop.ozone.protocol.VersionResponse;
|
||||||
|
import org.apache.hadoop.ozone.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMVersionResponseProto;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.concurrent.Callable;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Task that returns version.
|
||||||
|
*/
|
||||||
|
public class VersionEndpointTask implements
|
||||||
|
Callable<EndpointStateMachine.EndPointStates> {
|
||||||
|
private final EndpointStateMachine rpcEndPoint;
|
||||||
|
private final Configuration configuration;
|
||||||
|
|
||||||
|
public VersionEndpointTask(EndpointStateMachine rpcEndPoint,
|
||||||
|
Configuration conf) {
|
||||||
|
this.rpcEndPoint = rpcEndPoint;
|
||||||
|
this.configuration = conf;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Computes a result, or throws an exception if unable to do so.
|
||||||
|
*
|
||||||
|
* @return computed result
|
||||||
|
* @throws Exception if unable to compute a result
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public EndpointStateMachine.EndPointStates call() throws Exception {
|
||||||
|
rpcEndPoint.lock();
|
||||||
|
try{
|
||||||
|
SCMVersionResponseProto versionResponse =
|
||||||
|
rpcEndPoint.getEndPoint().getVersion();
|
||||||
|
rpcEndPoint.setVersion(VersionResponse.getFromProtobuf(versionResponse));
|
||||||
|
|
||||||
|
EndpointStateMachine.EndPointStates nextState =
|
||||||
|
rpcEndPoint.getState().getNextState();
|
||||||
|
rpcEndPoint.setState(nextState);
|
||||||
|
rpcEndPoint.zeroMissedCount();
|
||||||
|
} catch (IOException ex) {
|
||||||
|
rpcEndPoint.logIfNeeded(ex);
|
||||||
|
} finally {
|
||||||
|
rpcEndPoint.unlock();
|
||||||
|
}
|
||||||
|
return rpcEndPoint.getState();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,20 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with this
|
||||||
|
* work for additional information regarding copyright ownership. The ASF
|
||||||
|
* licenses this file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
* <p>
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
* <p>
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations under
|
||||||
|
* the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.ozone.container.common.states.endpoint;
|
||||||
|
/**
|
||||||
|
This package contains code for RPC endpoints transitions.
|
||||||
|
*/
|
|
@ -0,0 +1,18 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.ozone.container.common.states;
|
|
@ -64,8 +64,8 @@ public class OzoneContainer {
|
||||||
Configuration ozoneConfig,
|
Configuration ozoneConfig,
|
||||||
FsDatasetSpi<? extends FsVolumeSpi> dataSet) throws Exception {
|
FsDatasetSpi<? extends FsVolumeSpi> dataSet) throws Exception {
|
||||||
List<StorageLocation> locations = new LinkedList<>();
|
List<StorageLocation> locations = new LinkedList<>();
|
||||||
String[] paths = ozoneConfig.getStrings(OzoneConfigKeys
|
String[] paths = ozoneConfig.getStrings(
|
||||||
.OZONE_METADATA_DIRS);
|
OzoneConfigKeys.OZONE_CONTAINER_METADATA_DIRS);
|
||||||
if (paths != null && paths.length > 0) {
|
if (paths != null && paths.length > 0) {
|
||||||
for (String p : paths) {
|
for (String p : paths) {
|
||||||
locations.add(StorageLocation.parse(p));
|
locations.add(StorageLocation.parse(p));
|
||||||
|
|
|
@ -0,0 +1,58 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with this
|
||||||
|
* work for additional information regarding copyright ownership. The ASF
|
||||||
|
* licenses this file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
* <p>
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
* <p>
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations under
|
||||||
|
* the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.ozone.protocol;
|
||||||
|
|
||||||
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.DatanodeID;
|
||||||
|
import org.apache.hadoop.ozone.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMHeartbeatResponseProto;
|
||||||
|
import org.apache.hadoop.ozone.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMVersionResponseProto;
|
||||||
|
import org.apache.hadoop.ozone.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMRegisteredCmdResponseProto;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The protocol spoken between datanodes and SCM. For specifics please the
|
||||||
|
* Protoc file that defines this protocol.
|
||||||
|
*/
|
||||||
|
@InterfaceAudience.Private
|
||||||
|
public interface StorageContainerDatanodeProtocol {
|
||||||
|
/**
|
||||||
|
* Returns SCM version.
|
||||||
|
* @return Version info.
|
||||||
|
*/
|
||||||
|
SCMVersionResponseProto getVersion() throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Used by data node to send a Heartbeat.
|
||||||
|
* @param datanodeID - Datanode ID.
|
||||||
|
* @return - SCMHeartbeatResponseProto
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
SCMHeartbeatResponseProto sendHeartbeat(DatanodeID datanodeID)
|
||||||
|
throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Register Datanode.
|
||||||
|
* @param datanodeID - DatanodID.
|
||||||
|
* @param scmAddresses - List of SCMs this datanode is configured to
|
||||||
|
* communicate.
|
||||||
|
* @return SCM Command.
|
||||||
|
*/
|
||||||
|
SCMRegisteredCmdResponseProto register(DatanodeID datanodeID,
|
||||||
|
String[] scmAddresses) throws IOException;
|
||||||
|
|
||||||
|
}
|
|
@ -17,7 +17,7 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.hadoop.ozone.protocol.commands;
|
package org.apache.hadoop.ozone.protocol.commands;
|
||||||
|
|
||||||
import org.apache.hadoop.ozone.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMCommandResponseProto.Type;
|
import org.apache.hadoop.ozone.protocol.proto.StorageContainerDatanodeProtocolProtos.Type;
|
||||||
import org.apache.hadoop.ozone.protocol.proto.StorageContainerDatanodeProtocolProtos.NullCmdResponseProto;
|
import org.apache.hadoop.ozone.protocol.proto.StorageContainerDatanodeProtocolProtos.NullCmdResponseProto;
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -18,15 +18,19 @@
|
||||||
package org.apache.hadoop.ozone.protocol.commands;
|
package org.apache.hadoop.ozone.protocol.commands;
|
||||||
|
|
||||||
import com.google.common.base.Preconditions;
|
import com.google.common.base.Preconditions;
|
||||||
import org.apache.hadoop.ozone.protocol.proto.StorageContainerDatanodeProtocolProtos.RegisteredCmdResponseProto;
|
import org.apache.hadoop.ozone.protocol.proto
|
||||||
import org.apache.hadoop.ozone.protocol.proto.StorageContainerDatanodeProtocolProtos.RegisteredCmdResponseProto.ErrorCode;
|
.StorageContainerDatanodeProtocolProtos.SCMRegisteredCmdResponseProto;
|
||||||
import org.apache.hadoop.ozone.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMCommandResponseProto.Type;
|
import org.apache.hadoop.ozone.protocol.proto
|
||||||
|
.StorageContainerDatanodeProtocolProtos.SCMRegisteredCmdResponseProto
|
||||||
|
.ErrorCode;
|
||||||
|
import org.apache.hadoop.ozone.protocol.proto
|
||||||
|
.StorageContainerDatanodeProtocolProtos.Type;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Response to Datanode Register call.
|
* Response to Datanode Register call.
|
||||||
*/
|
*/
|
||||||
public class RegisteredCommand extends SCMCommand<RegisteredCmdResponseProto> {
|
public class RegisteredCommand extends
|
||||||
|
SCMCommand<SCMRegisteredCmdResponseProto> {
|
||||||
private String datanodeUUID;
|
private String datanodeUUID;
|
||||||
private String clusterID;
|
private String clusterID;
|
||||||
private ErrorCode error;
|
private ErrorCode error;
|
||||||
|
@ -38,8 +42,6 @@ public class RegisteredCommand extends SCMCommand<RegisteredCmdResponseProto> {
|
||||||
this.error = error;
|
this.error = error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a new builder.
|
* Returns a new builder.
|
||||||
*
|
*
|
||||||
|
@ -56,11 +58,12 @@ public class RegisteredCommand extends SCMCommand<RegisteredCmdResponseProto> {
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
Type getType() {
|
Type getType() {
|
||||||
return Type.registeredCmd;
|
return Type.registeredCommand;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns datanode UUID.
|
* Returns datanode UUID.
|
||||||
|
*
|
||||||
* @return - Datanode ID.
|
* @return - Datanode ID.
|
||||||
*/
|
*/
|
||||||
public String getDatanodeUUID() {
|
public String getDatanodeUUID() {
|
||||||
|
@ -69,6 +72,7 @@ public class RegisteredCommand extends SCMCommand<RegisteredCmdResponseProto> {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns cluster ID.
|
* Returns cluster ID.
|
||||||
|
*
|
||||||
* @return -- ClusterID
|
* @return -- ClusterID
|
||||||
*/
|
*/
|
||||||
public String getClusterID() {
|
public String getClusterID() {
|
||||||
|
@ -77,6 +81,7 @@ public class RegisteredCommand extends SCMCommand<RegisteredCmdResponseProto> {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns ErrorCode.
|
* Returns ErrorCode.
|
||||||
|
*
|
||||||
* @return - ErrorCode
|
* @return - ErrorCode
|
||||||
*/
|
*/
|
||||||
public ErrorCode getError() {
|
public ErrorCode getError() {
|
||||||
|
@ -89,8 +94,8 @@ public class RegisteredCommand extends SCMCommand<RegisteredCmdResponseProto> {
|
||||||
* @return A protobuf message.
|
* @return A protobuf message.
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
RegisteredCmdResponseProto getProtoBufMessage() {
|
SCMRegisteredCmdResponseProto getProtoBufMessage() {
|
||||||
return RegisteredCmdResponseProto.newBuilder()
|
return SCMRegisteredCmdResponseProto.newBuilder()
|
||||||
.setClusterID(this.clusterID)
|
.setClusterID(this.clusterID)
|
||||||
.setDatanodeUUID(this.datanodeUUID)
|
.setDatanodeUUID(this.datanodeUUID)
|
||||||
.setErrorCode(this.error)
|
.setErrorCode(this.error)
|
||||||
|
@ -122,7 +127,7 @@ public class RegisteredCommand extends SCMCommand<RegisteredCmdResponseProto> {
|
||||||
* @param response - RegisteredCmdResponseProto
|
* @param response - RegisteredCmdResponseProto
|
||||||
* @return RegisteredCommand
|
* @return RegisteredCommand
|
||||||
*/
|
*/
|
||||||
public RegisteredCommand getFromProtobuf(RegisteredCmdResponseProto
|
public RegisteredCommand getFromProtobuf(SCMRegisteredCmdResponseProto
|
||||||
response) {
|
response) {
|
||||||
Preconditions.checkNotNull(response);
|
Preconditions.checkNotNull(response);
|
||||||
return new RegisteredCommand(response.getErrorCode(),
|
return new RegisteredCommand(response.getErrorCode(),
|
||||||
|
|
|
@ -18,7 +18,7 @@
|
||||||
package org.apache.hadoop.ozone.protocol.commands;
|
package org.apache.hadoop.ozone.protocol.commands;
|
||||||
|
|
||||||
import org.apache.hadoop.ozone.protocol.proto
|
import org.apache.hadoop.ozone.protocol.proto
|
||||||
.StorageContainerDatanodeProtocolProtos.SCMCommandResponseProto.Type;
|
.StorageContainerDatanodeProtocolProtos.Type;
|
||||||
import com.google.protobuf.GeneratedMessage;
|
import com.google.protobuf.GeneratedMessage;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -0,0 +1,154 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with this
|
||||||
|
* work for additional information regarding copyright ownership. The ASF
|
||||||
|
* licenses this file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
* <p>
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
* <p>
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations under
|
||||||
|
* the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.ozone.protocolPB;
|
||||||
|
|
||||||
|
import com.google.protobuf.RpcController;
|
||||||
|
import com.google.protobuf.ServiceException;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.DatanodeID;
|
||||||
|
import org.apache.hadoop.ipc.ProtobufHelper;
|
||||||
|
import org.apache.hadoop.ipc.ProtocolTranslator;
|
||||||
|
import org.apache.hadoop.ipc.RPC;
|
||||||
|
import org.apache.hadoop.ozone.protocol.StorageContainerDatanodeProtocol;
|
||||||
|
import org.apache.hadoop.ozone.protocol.proto
|
||||||
|
.StorageContainerDatanodeProtocolProtos.SCMHeartbeatRequestProto;
|
||||||
|
import org.apache.hadoop.ozone.protocol.proto
|
||||||
|
.StorageContainerDatanodeProtocolProtos.SCMHeartbeatResponseProto;
|
||||||
|
import org.apache.hadoop.ozone.protocol.proto
|
||||||
|
.StorageContainerDatanodeProtocolProtos.SCMVersionRequestProto;
|
||||||
|
import org.apache.hadoop.ozone.protocol.proto
|
||||||
|
.StorageContainerDatanodeProtocolProtos.SCMVersionResponseProto;
|
||||||
|
import org.apache.hadoop.ozone.protocol.proto
|
||||||
|
.StorageContainerDatanodeProtocolProtos.SCMRegisterRequestProto;
|
||||||
|
import org.apache.hadoop.ozone.protocol.proto
|
||||||
|
.StorageContainerDatanodeProtocolProtos.SCMRegisteredCmdResponseProto;
|
||||||
|
|
||||||
|
import java.io.Closeable;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This class is the client-side translator to translate the requests made on
|
||||||
|
* the {@link StorageContainerDatanodeProtocol} interface to the RPC server
|
||||||
|
* implementing {@link StorageContainerDatanodeProtocolPB}.
|
||||||
|
*/
|
||||||
|
public class StorageContainerDatanodeProtocolClientSideTranslatorPB
|
||||||
|
implements StorageContainerDatanodeProtocol, ProtocolTranslator, Closeable {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* RpcController is not used and hence is set to null.
|
||||||
|
*/
|
||||||
|
private static final RpcController NULL_RPC_CONTROLLER = null;
|
||||||
|
private final StorageContainerDatanodeProtocolPB rpcProxy;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs a Client side interface that calls into SCM datanode protocol.
|
||||||
|
*
|
||||||
|
* @param rpcProxy - Proxy for RPC.
|
||||||
|
*/
|
||||||
|
public StorageContainerDatanodeProtocolClientSideTranslatorPB(
|
||||||
|
StorageContainerDatanodeProtocolPB rpcProxy) {
|
||||||
|
this.rpcProxy = rpcProxy;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Closes this stream and releases any system resources associated with it. If
|
||||||
|
* the stream is already closed then invoking this method has no effect.
|
||||||
|
* <p>
|
||||||
|
* <p> As noted in {@link AutoCloseable#close()}, cases where the close may
|
||||||
|
* fail require careful attention. It is strongly advised to relinquish the
|
||||||
|
* underlying resources and to internally <em>mark</em> the {@code Closeable}
|
||||||
|
* as closed, prior to throwing the {@code IOException}.
|
||||||
|
*
|
||||||
|
* @throws IOException if an I/O error occurs
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
RPC.stopProxy(rpcProxy);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the proxy object underlying this protocol translator.
|
||||||
|
*
|
||||||
|
* @return the proxy object underlying this protocol translator.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public Object getUnderlyingProxyObject() {
|
||||||
|
return rpcProxy;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns SCM version.
|
||||||
|
*
|
||||||
|
* @return Version info.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public SCMVersionResponseProto getVersion() throws IOException {
|
||||||
|
|
||||||
|
SCMVersionRequestProto request =
|
||||||
|
SCMVersionRequestProto.newBuilder().build();
|
||||||
|
final SCMVersionResponseProto response;
|
||||||
|
try {
|
||||||
|
response = rpcProxy.getVersion(NULL_RPC_CONTROLLER, request);
|
||||||
|
} catch (ServiceException ex) {
|
||||||
|
throw ProtobufHelper.getRemoteException(ex);
|
||||||
|
}
|
||||||
|
return response;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Send by datanode to SCM.
|
||||||
|
*
|
||||||
|
* @param datanodeID - DatanodeID
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SCMHeartbeatResponseProto sendHeartbeat(DatanodeID datanodeID)
|
||||||
|
throws IOException {
|
||||||
|
SCMHeartbeatRequestProto.Builder req =
|
||||||
|
SCMHeartbeatRequestProto.newBuilder();
|
||||||
|
req.setDatanodeID(datanodeID.getProtoBufMessage());
|
||||||
|
final SCMHeartbeatResponseProto resp;
|
||||||
|
try {
|
||||||
|
resp = rpcProxy.sendHeartbeat(NULL_RPC_CONTROLLER, req.build());
|
||||||
|
} catch (ServiceException e) {
|
||||||
|
throw ProtobufHelper.getRemoteException(e);
|
||||||
|
}
|
||||||
|
return resp;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Register Datanode.
|
||||||
|
*
|
||||||
|
* @param datanodeID - DatanodID.
|
||||||
|
* @return SCM Command.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public SCMRegisteredCmdResponseProto register(DatanodeID datanodeID,
|
||||||
|
String[] scmAddresses) throws IOException {
|
||||||
|
SCMRegisterRequestProto.Builder req =
|
||||||
|
SCMRegisterRequestProto.newBuilder();
|
||||||
|
req.setDatanodeID(datanodeID.getProtoBufMessage());
|
||||||
|
final SCMRegisteredCmdResponseProto response;
|
||||||
|
try {
|
||||||
|
response = rpcProxy.register(NULL_RPC_CONTROLLER, req.build());
|
||||||
|
} catch (ServiceException e) {
|
||||||
|
throw ProtobufHelper.getRemoteException(e);
|
||||||
|
}
|
||||||
|
return response;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,32 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with this
|
||||||
|
* work for additional information regarding copyright ownership. The ASF
|
||||||
|
* licenses this file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
* <p/>
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
* <p/>
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations under
|
||||||
|
* the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.ozone.protocolPB;
|
||||||
|
|
||||||
|
import org.apache.hadoop.ipc.ProtocolInfo;
|
||||||
|
import org.apache.hadoop.ozone.protocol.proto.StorageContainerDatanodeProtocolProtos.StorageContainerDatanodeProtocolService;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Protocol used from a datanode to StorageContainerManager. This extends
|
||||||
|
* the Protocol Buffers service interface to add Hadoop-specific annotations.
|
||||||
|
*/
|
||||||
|
|
||||||
|
@ProtocolInfo(protocolName =
|
||||||
|
"org.apache.hadoop.ozone.protocol.StorageContainerDatanodeProtocol",
|
||||||
|
protocolVersion = 1)
|
||||||
|
public interface StorageContainerDatanodeProtocolPB extends
|
||||||
|
StorageContainerDatanodeProtocolService.BlockingInterface {
|
||||||
|
}
|
|
@ -0,0 +1,86 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with this
|
||||||
|
* work for additional information regarding copyright ownership. The ASF
|
||||||
|
* licenses this file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
* <p>
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
* <p>
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations under
|
||||||
|
* the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.ozone.protocolPB;
|
||||||
|
|
||||||
|
import com.google.protobuf.RpcController;
|
||||||
|
import com.google.protobuf.ServiceException;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.DatanodeID;
|
||||||
|
import org.apache.hadoop.ozone.protocol.StorageContainerDatanodeProtocol;
|
||||||
|
import org.apache.hadoop.ozone.protocol.proto.StorageContainerDatanodeProtocolProtos;
|
||||||
|
import org.apache.hadoop.ozone.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMHeartbeatRequestProto;
|
||||||
|
import org.apache.hadoop.ozone.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMHeartbeatResponseProto;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This class is the server-side translator that forwards requests received on
|
||||||
|
* {@link StorageContainerDatanodeProtocolPB} to the {@link
|
||||||
|
* StorageContainerDatanodeProtocol} server implementation.
|
||||||
|
*/
|
||||||
|
public class StorageContainerDatanodeProtocolServerSideTranslatorPB
|
||||||
|
implements StorageContainerDatanodeProtocolPB {
|
||||||
|
|
||||||
|
private final StorageContainerDatanodeProtocol impl;
|
||||||
|
|
||||||
|
public StorageContainerDatanodeProtocolServerSideTranslatorPB(
|
||||||
|
StorageContainerDatanodeProtocol impl) {
|
||||||
|
this.impl = impl;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public StorageContainerDatanodeProtocolProtos.SCMVersionResponseProto
|
||||||
|
getVersion(RpcController controller,
|
||||||
|
StorageContainerDatanodeProtocolProtos.SCMVersionRequestProto request)
|
||||||
|
throws ServiceException {
|
||||||
|
try {
|
||||||
|
return impl.getVersion();
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new ServiceException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public StorageContainerDatanodeProtocolProtos.SCMRegisteredCmdResponseProto
|
||||||
|
register(RpcController controller, StorageContainerDatanodeProtocolProtos
|
||||||
|
.SCMRegisterRequestProto request) throws ServiceException {
|
||||||
|
String[] addressArray = null;
|
||||||
|
|
||||||
|
if (request.hasAddressList()) {
|
||||||
|
addressArray = request.getAddressList().getAddressListList()
|
||||||
|
.toArray(new String[0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
return impl.register(DatanodeID.getFromProtoBuf(request
|
||||||
|
.getDatanodeID()), addressArray);
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new ServiceException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SCMHeartbeatResponseProto
|
||||||
|
sendHeartbeat(RpcController controller,
|
||||||
|
SCMHeartbeatRequestProto request) throws ServiceException {
|
||||||
|
try {
|
||||||
|
return impl.sendHeartbeat(DatanodeID.getFromProtoBuf(request
|
||||||
|
.getDatanodeID()));
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new ServiceException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -26,6 +26,8 @@ public final class VersionInfo {
|
||||||
private final static VersionInfo[] VERSION_INFOS =
|
private final static VersionInfo[] VERSION_INFOS =
|
||||||
{new VersionInfo("First version of SCM", 1)};
|
{new VersionInfo("First version of SCM", 1)};
|
||||||
|
|
||||||
|
|
||||||
|
public static final String DESCRIPTION_KEY = "Description";
|
||||||
private final String description;
|
private final String description;
|
||||||
private final int version;
|
private final int version;
|
||||||
|
|
||||||
|
|
|
@ -29,8 +29,11 @@ import org.apache.hadoop.ozone.protocol.StorageContainerNodeProtocol;
|
||||||
import org.apache.hadoop.ozone.protocol.VersionResponse;
|
import org.apache.hadoop.ozone.protocol.VersionResponse;
|
||||||
import org.apache.hadoop.ozone.protocol.commands.RegisteredCommand;
|
import org.apache.hadoop.ozone.protocol.commands.RegisteredCommand;
|
||||||
import org.apache.hadoop.ozone.protocol.commands.SCMCommand;
|
import org.apache.hadoop.ozone.protocol.commands.SCMCommand;
|
||||||
import org.apache.hadoop.ozone.protocol.proto.StorageContainerDatanodeProtocolProtos.RegisteredCmdResponseProto.ErrorCode;
|
import org.apache.hadoop.ozone.protocol.proto
|
||||||
import org.apache.hadoop.ozone.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMVersionRequestProto;
|
.StorageContainerDatanodeProtocolProtos.SCMRegisteredCmdResponseProto
|
||||||
|
.ErrorCode;
|
||||||
|
import org.apache.hadoop.ozone.protocol.proto
|
||||||
|
.StorageContainerDatanodeProtocolProtos.SCMVersionRequestProto;
|
||||||
|
|
||||||
import org.apache.hadoop.ozone.scm.VersionInfo;
|
import org.apache.hadoop.ozone.scm.VersionInfo;
|
||||||
import org.apache.hadoop.util.concurrent.HadoopExecutors;
|
import org.apache.hadoop.util.concurrent.HadoopExecutors;
|
||||||
|
@ -43,7 +46,6 @@ import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Queue;
|
import java.util.Queue;
|
||||||
import java.util.UUID;
|
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
import java.util.concurrent.ConcurrentLinkedQueue;
|
import java.util.concurrent.ConcurrentLinkedQueue;
|
||||||
import java.util.concurrent.ScheduledExecutorService;
|
import java.util.concurrent.ScheduledExecutorService;
|
||||||
|
@ -580,21 +582,20 @@ public class SCMNodeManager
|
||||||
@Override
|
@Override
|
||||||
public SCMCommand register(DatanodeID datanodeID) {
|
public SCMCommand register(DatanodeID datanodeID) {
|
||||||
|
|
||||||
SCMCommand errorCode = verifyDatanodeUUID(datanodeID);
|
SCMCommand responseCommand = verifyDatanodeUUID(datanodeID);
|
||||||
if (errorCode != null) {
|
if (responseCommand != null) {
|
||||||
return errorCode;
|
return responseCommand;
|
||||||
}
|
}
|
||||||
DatanodeID newDatanodeID = new DatanodeID(UUID.randomUUID().toString(),
|
|
||||||
datanodeID);
|
nodes.put(datanodeID.getDatanodeUuid(), datanodeID);
|
||||||
nodes.put(newDatanodeID.getDatanodeUuid(), newDatanodeID);
|
|
||||||
totalNodes.incrementAndGet();
|
totalNodes.incrementAndGet();
|
||||||
healthyNodes.put(newDatanodeID.getDatanodeUuid(), monotonicNow());
|
healthyNodes.put(datanodeID.getDatanodeUuid(), monotonicNow());
|
||||||
healthyNodeCount.incrementAndGet();
|
healthyNodeCount.incrementAndGet();
|
||||||
LOG.info("Data node with ID: {} Registered.",
|
LOG.info("Data node with ID: {} Registered.",
|
||||||
newDatanodeID.getDatanodeUuid());
|
datanodeID.getDatanodeUuid());
|
||||||
return RegisteredCommand.newBuilder()
|
return RegisteredCommand.newBuilder()
|
||||||
.setErrorCode(ErrorCode.success)
|
.setErrorCode(ErrorCode.success)
|
||||||
.setDatanodeUUID(newDatanodeID.getDatanodeUuid())
|
.setDatanodeUUID(datanodeID.getDatanodeUuid())
|
||||||
.setClusterID(this.clusterID)
|
.setClusterID(this.clusterID)
|
||||||
.build();
|
.build();
|
||||||
}
|
}
|
||||||
|
@ -607,20 +608,12 @@ public class SCMNodeManager
|
||||||
*/
|
*/
|
||||||
private SCMCommand verifyDatanodeUUID(DatanodeID datanodeID) {
|
private SCMCommand verifyDatanodeUUID(DatanodeID datanodeID) {
|
||||||
|
|
||||||
// Make sure that we return the right error code, so that
|
|
||||||
// data node can log the correct error. if it is already registered then
|
|
||||||
// datanode should move to heartbeat state. It implies that somehow we
|
|
||||||
// have an error where the data node is trying to re-register.
|
|
||||||
//
|
|
||||||
// We are going to let the datanode know that there is an error but allow it
|
|
||||||
// to recover by sending it the right info that is needed for recovery.
|
|
||||||
|
|
||||||
if (datanodeID.getDatanodeUuid() != null &&
|
if (datanodeID.getDatanodeUuid() != null &&
|
||||||
nodes.containsKey(datanodeID.getDatanodeUuid())) {
|
nodes.containsKey(datanodeID.getDatanodeUuid())) {
|
||||||
LOG.error("Datanode is already registered. Datanode: {}",
|
LOG.trace("Datanode is already registered. Datanode: {}",
|
||||||
datanodeID.toString());
|
datanodeID.toString());
|
||||||
return RegisteredCommand.newBuilder()
|
return RegisteredCommand.newBuilder()
|
||||||
.setErrorCode(ErrorCode.errorNodeAlreadyRegistered)
|
.setErrorCode(ErrorCode.success)
|
||||||
.setClusterID(this.clusterID)
|
.setClusterID(this.clusterID)
|
||||||
.setDatanodeUUID(datanodeID.getDatanodeUuid())
|
.setDatanodeUUID(datanodeID.getDatanodeUuid())
|
||||||
.build();
|
.build();
|
||||||
|
|
|
@ -31,9 +31,13 @@ option java_generic_services = true;
|
||||||
option java_generate_equals_and_hash = true;
|
option java_generate_equals_and_hash = true;
|
||||||
|
|
||||||
package hadoop.hdfs;
|
package hadoop.hdfs;
|
||||||
|
|
||||||
import "hdfs.proto";
|
import "hdfs.proto";
|
||||||
|
|
||||||
import "HdfsServer.proto";
|
import "HdfsServer.proto";
|
||||||
|
|
||||||
import "DatanodeProtocol.proto";
|
import "DatanodeProtocol.proto";
|
||||||
|
|
||||||
import "DatanodeContainerProtocol.proto";
|
import "DatanodeContainerProtocol.proto";
|
||||||
|
|
||||||
|
|
||||||
|
@ -45,6 +49,10 @@ message SCMHeartbeatRequestProto {
|
||||||
required DatanodeIDProto datanodeID = 1;
|
required DatanodeIDProto datanodeID = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
message SCMRegisterRequestProto {
|
||||||
|
required DatanodeIDProto datanodeID = 1;
|
||||||
|
optional SCMNodeAddressList addressList = 2;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Request for version info of the software stack on the server.
|
* Request for version info of the software stack on the server.
|
||||||
|
@ -62,21 +70,35 @@ message SCMVersionResponseProto {
|
||||||
repeated hadoop.hdfs.ozone.KeyValue keys = 2;
|
repeated hadoop.hdfs.ozone.KeyValue keys = 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
message SCMNodeAddressList {
|
||||||
|
repeated string addressList = 1;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Datanode ID returned by the SCM. This is similar to name node
|
* Datanode ID returned by the SCM. This is similar to name node
|
||||||
* registeration of a datanode.
|
* registeration of a datanode.
|
||||||
*/
|
*/
|
||||||
message RegisteredCmdResponseProto {
|
message SCMRegisteredCmdResponseProto {
|
||||||
enum ErrorCode {
|
enum ErrorCode {
|
||||||
success = 1;
|
success = 1;
|
||||||
errorNodeAlreadyRegistered = 2;
|
errorNodeNotPermitted = 2;
|
||||||
errorNodeNotPermitted = 3;
|
|
||||||
}
|
}
|
||||||
required ErrorCode errorCode = 1;
|
required ErrorCode errorCode = 2;
|
||||||
optional string datanodeUUID = 2;
|
optional string datanodeUUID = 3;
|
||||||
optional string clusterID = 3;
|
optional string clusterID = 4;
|
||||||
|
optional SCMNodeAddressList addressList = 5;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Container ID maintains the container's Identity along with cluster ID
|
||||||
|
* after the registration is done.
|
||||||
|
*/
|
||||||
|
message ContainerNodeIDProto {
|
||||||
|
required DatanodeIDProto datanodeID = 1;
|
||||||
|
optional string clusterID = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Empty Command Response
|
* Empty Command Response
|
||||||
*/
|
*/
|
||||||
|
@ -84,18 +106,21 @@ message NullCmdResponseProto {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
Type of commands supported by SCM to datanode protocol.
|
||||||
|
*/
|
||||||
|
enum Type {
|
||||||
|
nullCmd = 1;
|
||||||
|
versionCommand = 2;
|
||||||
|
registeredCommand = 3;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* These are commands returned by SCM for to the datanode to execute.
|
* These are commands returned by SCM for to the datanode to execute.
|
||||||
*/
|
*/
|
||||||
message SCMCommandResponseProto {
|
message SCMCommandResponseProto {
|
||||||
enum Type {
|
required Type cmdType = 2; // Type of the command
|
||||||
nullCmd = 1;
|
optional NullCmdResponseProto nullCommand = 3;
|
||||||
registeredCmd = 2; // Returns the datanode ID after registeration.
|
|
||||||
}
|
|
||||||
|
|
||||||
required Type cmdType = 1; // Type of the command
|
|
||||||
optional NullCmdResponseProto nullCommand = 2;
|
|
||||||
optional RegisteredCmdResponseProto registerNode = 3;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -160,12 +185,11 @@ message SCMHeartbeatResponseProto {
|
||||||
* registered with some SCM. If this file is not found, datanode assumes that
|
* registered with some SCM. If this file is not found, datanode assumes that
|
||||||
* it needs to do a registration.
|
* it needs to do a registration.
|
||||||
*
|
*
|
||||||
* If registration is need datanode moves into REGISTERING_NODE state. It will
|
* If registration is need datanode moves into REGISTER state. It will
|
||||||
* send a register call with datanodeID data structure, but with datanode UUID
|
* send a register call with datanodeID data structure and presist that info.
|
||||||
* will be set to an empty string.
|
|
||||||
*
|
*
|
||||||
* The response to the command contains the datanode UUID and clusterID. This
|
* The response to the command contains clusterID. This information is
|
||||||
* information is persisted by the datanode and moves into heartbeat state.
|
* also persisted by the datanode and moves into heartbeat state.
|
||||||
*
|
*
|
||||||
* Once in the heartbeat state, datanode sends heartbeats and container reports
|
* Once in the heartbeat state, datanode sends heartbeats and container reports
|
||||||
* to SCM and process commands issued by SCM until it is shutdown.
|
* to SCM and process commands issued by SCM until it is shutdown.
|
||||||
|
@ -176,12 +200,12 @@ service StorageContainerDatanodeProtocolService {
|
||||||
/**
|
/**
|
||||||
* Gets the version information from the SCM.
|
* Gets the version information from the SCM.
|
||||||
*/
|
*/
|
||||||
rpc getVersion(SCMVersionRequestProto) returns (SCMVersionResponseProto);
|
rpc getVersion (SCMVersionRequestProto) returns (SCMVersionResponseProto);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Registers a data node with SCM.
|
* Registers a data node with SCM.
|
||||||
*/
|
*/
|
||||||
rpc register(SCMHeartbeatRequestProto) returns (SCMCommandResponseProto);
|
rpc register (SCMRegisterRequestProto) returns (SCMRegisteredCmdResponseProto);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Send heartbeat from datanode to SCM. HB's under SCM looks more
|
* Send heartbeat from datanode to SCM. HB's under SCM looks more
|
||||||
|
|
|
@ -0,0 +1,188 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with this
|
||||||
|
* work for additional information regarding copyright ownership. The ASF
|
||||||
|
* licenses this file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
* <p>
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
* <p>
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations under
|
||||||
|
* the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.ozone.container.common;
|
||||||
|
|
||||||
|
import com.google.protobuf.BlockingService;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.hdfs.DFSUtil;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.DatanodeID;
|
||||||
|
import org.apache.hadoop.io.retry.RetryPolicies;
|
||||||
|
import org.apache.hadoop.ipc.ProtobufRpcEngine;
|
||||||
|
import org.apache.hadoop.ipc.RPC;
|
||||||
|
import org.apache.hadoop.net.NetUtils;
|
||||||
|
import org.apache.hadoop.ozone.container.common.statemachine.EndpointStateMachine;
|
||||||
|
import org.apache.hadoop.ozone.protocol.StorageContainerDatanodeProtocol;
|
||||||
|
import org.apache.hadoop.ozone.protocol.commands.RegisteredCommand;
|
||||||
|
import org.apache.hadoop.ozone.protocol.proto.StorageContainerDatanodeProtocolProtos.StorageContainerDatanodeProtocolService;
|
||||||
|
import org.apache.hadoop.ozone.protocolPB.StorageContainerDatanodeProtocolClientSideTranslatorPB;
|
||||||
|
import org.apache.hadoop.ozone.protocolPB.StorageContainerDatanodeProtocolPB;
|
||||||
|
import org.apache.hadoop.ozone.protocolPB.StorageContainerDatanodeProtocolServerSideTranslatorPB;
|
||||||
|
import org.apache.hadoop.ozone.scm.node.SCMNodeManager;
|
||||||
|
import org.apache.hadoop.security.UserGroupInformation;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.InetAddress;
|
||||||
|
import java.net.InetSocketAddress;
|
||||||
|
import java.net.ServerSocket;
|
||||||
|
import java.util.Random;
|
||||||
|
import java.util.UUID;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test Endpoint class.
|
||||||
|
*/
|
||||||
|
public final class SCMTestUtils {
|
||||||
|
/**
|
||||||
|
* Never constructed.
|
||||||
|
*/
|
||||||
|
private SCMTestUtils() {
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Starts an RPC server, if configured.
|
||||||
|
*
|
||||||
|
* @param conf configuration
|
||||||
|
* @param addr configured address of RPC server
|
||||||
|
* @param protocol RPC protocol provided by RPC server
|
||||||
|
* @param instance RPC protocol implementation instance
|
||||||
|
* @param handlerCount RPC server handler count
|
||||||
|
* @return RPC server
|
||||||
|
* @throws IOException if there is an I/O error while creating RPC server
|
||||||
|
*/
|
||||||
|
private static RPC.Server startRpcServer(Configuration conf,
|
||||||
|
InetSocketAddress addr, Class<?>
|
||||||
|
protocol, BlockingService instance, int handlerCount)
|
||||||
|
throws IOException {
|
||||||
|
RPC.Server rpcServer = new RPC.Builder(conf)
|
||||||
|
.setProtocol(protocol)
|
||||||
|
.setInstance(instance)
|
||||||
|
.setBindAddress(addr.getHostString())
|
||||||
|
.setPort(addr.getPort())
|
||||||
|
.setNumHandlers(handlerCount)
|
||||||
|
.setVerbose(false)
|
||||||
|
.setSecretManager(null)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
DFSUtil.addPBProtocol(conf, protocol, instance, rpcServer);
|
||||||
|
return rpcServer;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates an Endpoint class for testing purpose.
|
||||||
|
*
|
||||||
|
* @param conf - Conf
|
||||||
|
* @param address - InetAddres
|
||||||
|
* @param rpcTimeout - rpcTimeOut
|
||||||
|
* @return EndPoint
|
||||||
|
* @throws Exception
|
||||||
|
*/
|
||||||
|
public static EndpointStateMachine createEndpoint(Configuration conf,
|
||||||
|
InetSocketAddress address, int rpcTimeout) throws Exception {
|
||||||
|
RPC.setProtocolEngine(conf, StorageContainerDatanodeProtocolPB.class,
|
||||||
|
ProtobufRpcEngine.class);
|
||||||
|
long version =
|
||||||
|
RPC.getProtocolVersion(StorageContainerDatanodeProtocolPB.class);
|
||||||
|
|
||||||
|
StorageContainerDatanodeProtocolPB rpcProxy = RPC.getProtocolProxy(
|
||||||
|
StorageContainerDatanodeProtocolPB.class, version,
|
||||||
|
address, UserGroupInformation.getCurrentUser(), conf,
|
||||||
|
NetUtils.getDefaultSocketFactory(conf), rpcTimeout,
|
||||||
|
RetryPolicies.TRY_ONCE_THEN_FAIL).getProxy();
|
||||||
|
|
||||||
|
StorageContainerDatanodeProtocolClientSideTranslatorPB rpcClient =
|
||||||
|
new StorageContainerDatanodeProtocolClientSideTranslatorPB(rpcProxy);
|
||||||
|
return new EndpointStateMachine(address, rpcClient, conf);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Start Datanode RPC server.
|
||||||
|
*/
|
||||||
|
public static RPC.Server startScmRpcServer(Configuration configuration,
|
||||||
|
StorageContainerDatanodeProtocol server,
|
||||||
|
InetSocketAddress rpcServerAddresss, int handlerCount) throws
|
||||||
|
IOException {
|
||||||
|
RPC.setProtocolEngine(configuration,
|
||||||
|
StorageContainerDatanodeProtocolPB.class,
|
||||||
|
ProtobufRpcEngine.class);
|
||||||
|
|
||||||
|
BlockingService scmDatanodeService =
|
||||||
|
StorageContainerDatanodeProtocolService.
|
||||||
|
newReflectiveBlockingService(
|
||||||
|
new StorageContainerDatanodeProtocolServerSideTranslatorPB(
|
||||||
|
server));
|
||||||
|
|
||||||
|
RPC.Server scmServer = startRpcServer(configuration, rpcServerAddresss,
|
||||||
|
StorageContainerDatanodeProtocolPB.class, scmDatanodeService,
|
||||||
|
handlerCount);
|
||||||
|
|
||||||
|
scmServer.start();
|
||||||
|
return scmServer;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static InetSocketAddress getReuseableAddress() throws IOException {
|
||||||
|
try (ServerSocket socket = new ServerSocket(0)) {
|
||||||
|
socket.setReuseAddress(true);
|
||||||
|
int port = socket.getLocalPort();
|
||||||
|
String addr = InetAddress.getLoopbackAddress().getHostAddress()
|
||||||
|
.toString();
|
||||||
|
return new InetSocketAddress(addr, port);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Configuration getConf() {
|
||||||
|
return new Configuration();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static DatanodeID getDatanodeID(SCMNodeManager nodeManager) {
|
||||||
|
|
||||||
|
return getDatanodeID(nodeManager, UUID.randomUUID().toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new DatanodeID with NodeID set to the string.
|
||||||
|
*
|
||||||
|
* @param uuid - node ID, it is generally UUID.
|
||||||
|
* @return DatanodeID.
|
||||||
|
*/
|
||||||
|
public static DatanodeID getDatanodeID(SCMNodeManager nodeManager, String
|
||||||
|
uuid) {
|
||||||
|
DatanodeID tempDataNode = getDatanodeID(uuid);
|
||||||
|
RegisteredCommand command =
|
||||||
|
(RegisteredCommand) nodeManager.register(tempDataNode);
|
||||||
|
return new DatanodeID(command.getDatanodeUUID(), tempDataNode);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get a datanode ID.
|
||||||
|
*
|
||||||
|
* @return DatanodeID
|
||||||
|
*/
|
||||||
|
public static DatanodeID getDatanodeID() {
|
||||||
|
return getDatanodeID(UUID.randomUUID().toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
private static DatanodeID getDatanodeID(String uuid) {
|
||||||
|
Random random = new Random();
|
||||||
|
String ipAddress = random.nextInt(256) + "."
|
||||||
|
+ random.nextInt(256) + "."
|
||||||
|
+ random.nextInt(256) + "."
|
||||||
|
+ random.nextInt(256);
|
||||||
|
|
||||||
|
String hostName = uuid;
|
||||||
|
return new DatanodeID(ipAddress,
|
||||||
|
hostName, uuid, 0, 0, 0, 0);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,149 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with this
|
||||||
|
* work for additional information regarding copyright ownership. The ASF
|
||||||
|
* licenses this file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
* <p>
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
* <p>
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations under
|
||||||
|
* the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.ozone.container.common;
|
||||||
|
|
||||||
|
import org.apache.hadoop.hdfs.protocol.DatanodeID;
|
||||||
|
import org.apache.hadoop.ozone.protocol.StorageContainerDatanodeProtocol;
|
||||||
|
import org.apache.hadoop.ozone.protocol.VersionResponse;
|
||||||
|
import org.apache.hadoop.ozone.protocol.commands.NullCommand;
|
||||||
|
import org.apache.hadoop.ozone.protocol.proto
|
||||||
|
.StorageContainerDatanodeProtocolProtos;
|
||||||
|
import org.apache.hadoop.ozone.scm.VersionInfo;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.UUID;
|
||||||
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* SCM RPC mock class.
|
||||||
|
*/
|
||||||
|
public class ScmTestMock implements StorageContainerDatanodeProtocol {
|
||||||
|
private int rpcResponseDelay;
|
||||||
|
private AtomicInteger heartbeatCount = new AtomicInteger(0);
|
||||||
|
private AtomicInteger rpcCount = new AtomicInteger(0);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the number of heartbeats made to this class.
|
||||||
|
*
|
||||||
|
* @return int
|
||||||
|
*/
|
||||||
|
public int getHeartbeatCount() {
|
||||||
|
return heartbeatCount.get();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the number of RPC calls made to this mock class instance.
|
||||||
|
*
|
||||||
|
* @return - Number of RPC calls serviced by this class.
|
||||||
|
*/
|
||||||
|
public int getRpcCount() {
|
||||||
|
return rpcCount.get();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the RPC response delay.
|
||||||
|
*
|
||||||
|
* @return delay in milliseconds.
|
||||||
|
*/
|
||||||
|
public int getRpcResponseDelay() {
|
||||||
|
return rpcResponseDelay;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the RPC response delay.
|
||||||
|
*
|
||||||
|
* @param rpcResponseDelay - delay in milliseconds.
|
||||||
|
*/
|
||||||
|
public void setRpcResponseDelay(int rpcResponseDelay) {
|
||||||
|
this.rpcResponseDelay = rpcResponseDelay;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns SCM version.
|
||||||
|
*
|
||||||
|
* @return Version info.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public StorageContainerDatanodeProtocolProtos.SCMVersionResponseProto
|
||||||
|
getVersion() throws IOException {
|
||||||
|
rpcCount.incrementAndGet();
|
||||||
|
sleepIfNeeded();
|
||||||
|
VersionInfo versionInfo = VersionInfo.getLatestVersion();
|
||||||
|
return VersionResponse.newBuilder()
|
||||||
|
.setVersion(versionInfo.getVersion())
|
||||||
|
.addValue(VersionInfo.DESCRIPTION_KEY, versionInfo.getDescription())
|
||||||
|
.build().getProtobufMessage();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void sleepIfNeeded() {
|
||||||
|
if (getRpcResponseDelay() > 0) {
|
||||||
|
try {
|
||||||
|
Thread.sleep(getRpcResponseDelay());
|
||||||
|
} catch (InterruptedException ex) {
|
||||||
|
// Just ignore this exception.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Used by data node to send a Heartbeat.
|
||||||
|
*
|
||||||
|
* @param datanodeID - Datanode ID.
|
||||||
|
* @return - SCMHeartbeatResponseProto
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public StorageContainerDatanodeProtocolProtos.SCMHeartbeatResponseProto
|
||||||
|
sendHeartbeat(DatanodeID datanodeID)
|
||||||
|
throws IOException {
|
||||||
|
rpcCount.incrementAndGet();
|
||||||
|
heartbeatCount.incrementAndGet();
|
||||||
|
sleepIfNeeded();
|
||||||
|
StorageContainerDatanodeProtocolProtos.SCMCommandResponseProto
|
||||||
|
cmdResponse = StorageContainerDatanodeProtocolProtos
|
||||||
|
.SCMCommandResponseProto
|
||||||
|
.newBuilder().setCmdType(StorageContainerDatanodeProtocolProtos
|
||||||
|
.Type.nullCmd)
|
||||||
|
.setNullCommand(
|
||||||
|
NullCommand.newBuilder().build().getProtoBufMessage()).build();
|
||||||
|
return StorageContainerDatanodeProtocolProtos.SCMHeartbeatResponseProto
|
||||||
|
.newBuilder()
|
||||||
|
.addCommands(cmdResponse).build();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Register Datanode.
|
||||||
|
*
|
||||||
|
* @param datanodeID - DatanodID.
|
||||||
|
* @param scmAddresses - List of SCMs this datanode is configured to
|
||||||
|
* communicate.
|
||||||
|
* @return SCM Command.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public StorageContainerDatanodeProtocolProtos
|
||||||
|
.SCMRegisteredCmdResponseProto register(DatanodeID datanodeID,
|
||||||
|
String[] scmAddresses) throws IOException {
|
||||||
|
rpcCount.incrementAndGet();
|
||||||
|
sleepIfNeeded();
|
||||||
|
return StorageContainerDatanodeProtocolProtos
|
||||||
|
.SCMRegisteredCmdResponseProto
|
||||||
|
.newBuilder().setClusterID(UUID.randomUUID().toString())
|
||||||
|
.setDatanodeUUID(datanodeID.getDatanodeUuid()).setErrorCode(
|
||||||
|
StorageContainerDatanodeProtocolProtos
|
||||||
|
.SCMRegisteredCmdResponseProto.ErrorCode.success).build();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,274 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with this
|
||||||
|
* work for additional information regarding copyright ownership. The ASF
|
||||||
|
* licenses this file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
* <p>
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
* <p>
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations under
|
||||||
|
* the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.ozone.container.common;
|
||||||
|
|
||||||
|
import com.google.common.util.concurrent.ThreadFactoryBuilder;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.ipc.RPC;
|
||||||
|
import org.apache.hadoop.ozone.OzoneConfigKeys;
|
||||||
|
import org.apache.hadoop.ozone.container.common.statemachine
|
||||||
|
.DatanodeStateMachine;
|
||||||
|
|
||||||
|
import org.apache.hadoop.ozone.container.common.statemachine
|
||||||
|
.EndpointStateMachine;
|
||||||
|
import org.apache.hadoop.ozone.container.common.statemachine
|
||||||
|
.SCMConnectionManager;
|
||||||
|
|
||||||
|
import org.apache.hadoop.ozone.container.common.states.DatanodeState;
|
||||||
|
import org.apache.hadoop.ozone.container.common.states.datanode
|
||||||
|
.InitDatanodeState;
|
||||||
|
import org.apache.hadoop.ozone.container.common.states.datanode
|
||||||
|
.RunningDatanodeState;
|
||||||
|
import org.apache.hadoop.test.GenericTestUtils;
|
||||||
|
import org.apache.hadoop.util.concurrent.HadoopExecutors;
|
||||||
|
import org.junit.After;
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.Test;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.InetSocketAddress;
|
||||||
|
import java.net.URL;
|
||||||
|
import java.nio.file.Paths;
|
||||||
|
import java.util.LinkedList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.concurrent.ExecutionException;
|
||||||
|
import java.util.concurrent.ExecutorService;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.concurrent.TimeoutException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests the datanode state machine class and its states.
|
||||||
|
*/
|
||||||
|
public class TestDatanodeStateMachine {
|
||||||
|
private final int scmServerCount = 3;
|
||||||
|
private List<String> serverAddresses;
|
||||||
|
private List<RPC.Server> scmServers;
|
||||||
|
private List<ScmTestMock> mockServers;
|
||||||
|
private ExecutorService executorService;
|
||||||
|
private Configuration conf;
|
||||||
|
private static final Logger LOG =
|
||||||
|
LoggerFactory.getLogger(TestDatanodeStateMachine.class);
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void setUp() throws Exception {
|
||||||
|
conf = SCMTestUtils.getConf();
|
||||||
|
serverAddresses = new LinkedList<>();
|
||||||
|
scmServers = new LinkedList<>();
|
||||||
|
mockServers = new LinkedList<>();
|
||||||
|
for (int x = 0; x < scmServerCount; x++) {
|
||||||
|
int port = SCMTestUtils.getReuseableAddress().getPort();
|
||||||
|
String address = "127.0.0.1";
|
||||||
|
serverAddresses.add(address + ":" + port);
|
||||||
|
ScmTestMock mock = new ScmTestMock();
|
||||||
|
|
||||||
|
scmServers.add(SCMTestUtils.startScmRpcServer(conf, mock,
|
||||||
|
new InetSocketAddress(address, port), 10));
|
||||||
|
mockServers.add(mock);
|
||||||
|
}
|
||||||
|
|
||||||
|
conf.setStrings(OzoneConfigKeys.OZONE_SCM_NAMES,
|
||||||
|
serverAddresses.toArray(new String[0]));
|
||||||
|
|
||||||
|
URL p = this.getClass().getResource("");
|
||||||
|
String path = p.getPath().concat(
|
||||||
|
TestDatanodeStateMachine.class.getSimpleName());
|
||||||
|
File f = new File(path);
|
||||||
|
if(!f.mkdirs()) {
|
||||||
|
LOG.info("Required directories already exist.");
|
||||||
|
}
|
||||||
|
|
||||||
|
path = Paths.get(path.toString(),
|
||||||
|
TestDatanodeStateMachine.class.getSimpleName() + ".id").toString();
|
||||||
|
conf.set(OzoneConfigKeys.OZONE_SCM_DATANODE_ID, path);
|
||||||
|
|
||||||
|
executorService = HadoopExecutors.newScheduledThreadPool(
|
||||||
|
conf.getInt(
|
||||||
|
OzoneConfigKeys.OZONE_SCM_CONTAINER_THREADS,
|
||||||
|
OzoneConfigKeys.OZONE_SCM_CONTAINER_THREADS_DEFAULT),
|
||||||
|
new ThreadFactoryBuilder().setDaemon(true)
|
||||||
|
.setNameFormat("Test Data Node State Machine Thread - %d").build());
|
||||||
|
}
|
||||||
|
|
||||||
|
@After
|
||||||
|
public void tearDown() throws Exception {
|
||||||
|
try {
|
||||||
|
executorService.shutdownNow();
|
||||||
|
for (RPC.Server s : scmServers) {
|
||||||
|
s.stop();
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
//ignore all execption from the shutdown
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Assert that starting statemachine executes the Init State.
|
||||||
|
*
|
||||||
|
* @throws IOException
|
||||||
|
* @throws InterruptedException
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testDatanodeStateMachineStartThread() throws IOException,
|
||||||
|
InterruptedException, TimeoutException {
|
||||||
|
final DatanodeStateMachine stateMachine = new DatanodeStateMachine(conf);
|
||||||
|
Runnable startStateMachineTask = () -> {
|
||||||
|
try {
|
||||||
|
stateMachine.start();
|
||||||
|
} catch (IOException ex) {
|
||||||
|
}
|
||||||
|
};
|
||||||
|
Thread thread1 = new Thread(startStateMachineTask);
|
||||||
|
thread1.setDaemon(true);
|
||||||
|
thread1.start();
|
||||||
|
|
||||||
|
SCMConnectionManager connectionManager =
|
||||||
|
stateMachine.getConnectionManager();
|
||||||
|
|
||||||
|
GenericTestUtils.waitFor(() -> connectionManager.getValues().size() == 3 ,
|
||||||
|
100, 1000);
|
||||||
|
|
||||||
|
stateMachine.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This test explores the state machine by invoking each call in sequence just
|
||||||
|
* like as if the state machine would call it. Because this is a test we are
|
||||||
|
* able to verify each of the assumptions.
|
||||||
|
* <p>
|
||||||
|
* Here is what happens at High level.
|
||||||
|
* <p>
|
||||||
|
* 1. We start the datanodeStateMachine in the INIT State.
|
||||||
|
* <p>
|
||||||
|
* 2. We invoke the INIT state task.
|
||||||
|
* <p>
|
||||||
|
* 3. That creates a set of RPC endpoints that are ready to connect to SCMs.
|
||||||
|
* <p>
|
||||||
|
* 4. We assert that we have moved to the running state for the
|
||||||
|
* DatanodeStateMachine.
|
||||||
|
* <p>
|
||||||
|
* 5. We get the task for the Running State - Executing that running state,
|
||||||
|
* makes the first network call in of the state machine. The Endpoint is in
|
||||||
|
* the GETVERSION State and we invoke the task.
|
||||||
|
* <p>
|
||||||
|
* 6. We assert that this call was a success by checking that each of the
|
||||||
|
* endponts now have version response that it got from the SCM server that it
|
||||||
|
* was talking to and also each of the mock server serviced one RPC call.
|
||||||
|
* <p>
|
||||||
|
* 7. Since the Register is done now, next calls to get task will return
|
||||||
|
* HeartbeatTask, which sends heartbeats to SCM. We assert that we get right
|
||||||
|
* task from sub-system below.
|
||||||
|
*
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testDatanodeStateContext() throws IOException,
|
||||||
|
InterruptedException, ExecutionException, TimeoutException {
|
||||||
|
final DatanodeStateMachine stateMachine = new DatanodeStateMachine(conf);
|
||||||
|
DatanodeStateMachine.DatanodeStates currentState =
|
||||||
|
stateMachine.getContext().getState();
|
||||||
|
Assert.assertEquals(DatanodeStateMachine.DatanodeStates.INIT,
|
||||||
|
currentState);
|
||||||
|
|
||||||
|
DatanodeState<DatanodeStateMachine.DatanodeStates> task =
|
||||||
|
stateMachine.getContext().getTask();
|
||||||
|
Assert.assertEquals(InitDatanodeState.class, task.getClass());
|
||||||
|
|
||||||
|
task.execute(executorService);
|
||||||
|
DatanodeStateMachine.DatanodeStates newState =
|
||||||
|
task.await(2, TimeUnit.SECONDS);
|
||||||
|
|
||||||
|
for (EndpointStateMachine endpoint :
|
||||||
|
stateMachine.getConnectionManager().getValues()) {
|
||||||
|
// We assert that each of the is in State GETVERSION.
|
||||||
|
Assert.assertEquals(EndpointStateMachine.EndPointStates.GETVERSION,
|
||||||
|
endpoint.getState());
|
||||||
|
}
|
||||||
|
|
||||||
|
// The Datanode has moved into Running State, since endpoints are created.
|
||||||
|
// We move to running state when we are ready to issue RPC calls to SCMs.
|
||||||
|
Assert.assertEquals(DatanodeStateMachine.DatanodeStates.RUNNING,
|
||||||
|
newState);
|
||||||
|
|
||||||
|
// If we had called context.execute instead of calling into each state
|
||||||
|
// this would have happened automatically.
|
||||||
|
stateMachine.getContext().setState(newState);
|
||||||
|
task = stateMachine.getContext().getTask();
|
||||||
|
Assert.assertEquals(RunningDatanodeState.class, task.getClass());
|
||||||
|
|
||||||
|
// This execute will invoke getVersion calls against all SCM endpoints
|
||||||
|
// that we know of.
|
||||||
|
task.execute(executorService);
|
||||||
|
newState = task.await(2, TimeUnit.SECONDS);
|
||||||
|
|
||||||
|
// If we are in running state, we should be in running.
|
||||||
|
Assert.assertEquals(DatanodeStateMachine.DatanodeStates.RUNNING,
|
||||||
|
newState);
|
||||||
|
|
||||||
|
for (EndpointStateMachine endpoint :
|
||||||
|
stateMachine.getConnectionManager().getValues()) {
|
||||||
|
|
||||||
|
// Since the earlier task.execute called into GetVersion, the
|
||||||
|
// endPointState Machine should move to REGISTER state.
|
||||||
|
Assert.assertEquals(EndpointStateMachine.EndPointStates.REGISTER,
|
||||||
|
endpoint.getState());
|
||||||
|
|
||||||
|
// We assert that each of the end points have gotten a version from the
|
||||||
|
// SCM Server.
|
||||||
|
Assert.assertNotNull(endpoint.getVersion());
|
||||||
|
}
|
||||||
|
|
||||||
|
// We can also assert that all mock servers have received only one RPC
|
||||||
|
// call at this point of time.
|
||||||
|
for (ScmTestMock mock : mockServers) {
|
||||||
|
Assert.assertEquals(1, mock.getRpcCount());
|
||||||
|
}
|
||||||
|
|
||||||
|
// This task is the Running task, but running task executes tasks based
|
||||||
|
// on the state of Endpoints, hence this next call will be a Register at
|
||||||
|
// the endpoint RPC level.
|
||||||
|
task = stateMachine.getContext().getTask();
|
||||||
|
task.execute(executorService);
|
||||||
|
newState = task.await(2, TimeUnit.SECONDS);
|
||||||
|
|
||||||
|
// If we are in running state, we should be in running.
|
||||||
|
Assert.assertEquals(DatanodeStateMachine.DatanodeStates.RUNNING,
|
||||||
|
newState);
|
||||||
|
|
||||||
|
for (ScmTestMock mock : mockServers) {
|
||||||
|
Assert.assertEquals(2, mock.getRpcCount());
|
||||||
|
}
|
||||||
|
|
||||||
|
// This task is the Running task, but running task executes tasks based
|
||||||
|
// on the state of Endpoints, hence this next call will be a
|
||||||
|
// HeartbeatTask at the endpoint RPC level.
|
||||||
|
task = stateMachine.getContext().getTask();
|
||||||
|
task.execute(executorService);
|
||||||
|
newState = task.await(2, TimeUnit.SECONDS);
|
||||||
|
|
||||||
|
// If we are in running state, we should be in running.
|
||||||
|
Assert.assertEquals(DatanodeStateMachine.DatanodeStates.RUNNING,
|
||||||
|
newState);
|
||||||
|
|
||||||
|
for (ScmTestMock mock : mockServers) {
|
||||||
|
Assert.assertEquals(1, mock.getHeartbeatCount());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,314 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with this
|
||||||
|
* work for additional information regarding copyright ownership. The ASF
|
||||||
|
* licenses this file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
* <p>
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
* <p>
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations under
|
||||||
|
* the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.ozone.container.common;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.DatanodeID;
|
||||||
|
import org.apache.hadoop.ipc.RPC;
|
||||||
|
import org.apache.hadoop.ozone.container.common.statemachine
|
||||||
|
.EndpointStateMachine;
|
||||||
|
import org.apache.hadoop.ozone.container.common.states.endpoint
|
||||||
|
.HeartbeatEndpointTask;
|
||||||
|
import org.apache.hadoop.ozone.container.common.states.endpoint
|
||||||
|
.RegisterEndpointTask;
|
||||||
|
import org.apache.hadoop.ozone.container.common.states.endpoint
|
||||||
|
.VersionEndpointTask;
|
||||||
|
import org.apache.hadoop.ozone.protocol.proto
|
||||||
|
.StorageContainerDatanodeProtocolProtos.ContainerNodeIDProto;
|
||||||
|
import org.apache.hadoop.ozone.protocol.proto
|
||||||
|
.StorageContainerDatanodeProtocolProtos.SCMHeartbeatResponseProto;
|
||||||
|
import org.apache.hadoop.ozone.protocol.proto
|
||||||
|
.StorageContainerDatanodeProtocolProtos.SCMRegisteredCmdResponseProto;
|
||||||
|
import org.apache.hadoop.ozone.protocol.proto
|
||||||
|
.StorageContainerDatanodeProtocolProtos.SCMVersionResponseProto;
|
||||||
|
import org.apache.hadoop.ozone.protocol.proto
|
||||||
|
.StorageContainerDatanodeProtocolProtos.Type;
|
||||||
|
import org.apache.hadoop.ozone.scm.VersionInfo;
|
||||||
|
import org.apache.hadoop.util.Time;
|
||||||
|
import org.junit.AfterClass;
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
import org.junit.Test;
|
||||||
|
import org.mockito.internal.matchers.LessOrEqual;
|
||||||
|
|
||||||
|
import java.net.InetSocketAddress;
|
||||||
|
import java.util.UUID;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests the endpoints.
|
||||||
|
*/
|
||||||
|
public class TestEndPoint {
|
||||||
|
private static InetSocketAddress serverAddress;
|
||||||
|
private static RPC.Server scmServer;
|
||||||
|
private static ScmTestMock scmServerImpl;
|
||||||
|
|
||||||
|
@Test
|
||||||
|
/**
|
||||||
|
* This test asserts that we are able to make a version call to SCM server
|
||||||
|
* and gets back the expected values.
|
||||||
|
*/
|
||||||
|
public void testGetVersion() throws Exception {
|
||||||
|
try (EndpointStateMachine rpcEndPoint =
|
||||||
|
SCMTestUtils.createEndpoint(SCMTestUtils.getConf(),
|
||||||
|
serverAddress, 1000)) {
|
||||||
|
SCMVersionResponseProto responseProto = rpcEndPoint.getEndPoint()
|
||||||
|
.getVersion();
|
||||||
|
Assert.assertNotNull(responseProto);
|
||||||
|
Assert.assertEquals(responseProto.getKeys(0).getKey(),
|
||||||
|
VersionInfo.DESCRIPTION_KEY);
|
||||||
|
Assert.assertEquals(responseProto.getKeys(0).getValue(),
|
||||||
|
VersionInfo.getLatestVersion().getDescription());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
/**
|
||||||
|
* We make getVersion RPC call, but via the VersionEndpointTask which is
|
||||||
|
* how the state machine would make the call.
|
||||||
|
*/
|
||||||
|
public void testGetVersionTask() throws Exception {
|
||||||
|
Configuration conf = SCMTestUtils.getConf();
|
||||||
|
try (EndpointStateMachine rpcEndPoint = SCMTestUtils.createEndpoint(conf,
|
||||||
|
serverAddress, 1000)) {
|
||||||
|
rpcEndPoint.setState(EndpointStateMachine.EndPointStates.GETVERSION);
|
||||||
|
VersionEndpointTask versionTask = new VersionEndpointTask(rpcEndPoint,
|
||||||
|
conf);
|
||||||
|
EndpointStateMachine.EndPointStates newState = versionTask.call();
|
||||||
|
|
||||||
|
// if version call worked the endpoint should automatically move to the
|
||||||
|
// next state.
|
||||||
|
Assert.assertEquals(EndpointStateMachine.EndPointStates.REGISTER,
|
||||||
|
newState);
|
||||||
|
|
||||||
|
// Now rpcEndpoint should remember the version it got from SCM
|
||||||
|
Assert.assertNotNull(rpcEndPoint.getVersion());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
/**
|
||||||
|
* This test makes a call to end point where there is no SCM server. We
|
||||||
|
* expect that versionTask should be able to handle it.
|
||||||
|
*/
|
||||||
|
public void testGetVersionToInvalidEndpoint() throws Exception {
|
||||||
|
Configuration conf = SCMTestUtils.getConf();
|
||||||
|
InetSocketAddress nonExistentServerAddress = SCMTestUtils
|
||||||
|
.getReuseableAddress();
|
||||||
|
try (EndpointStateMachine rpcEndPoint = SCMTestUtils.createEndpoint(conf,
|
||||||
|
nonExistentServerAddress, 1000)) {
|
||||||
|
rpcEndPoint.setState(EndpointStateMachine.EndPointStates.GETVERSION);
|
||||||
|
VersionEndpointTask versionTask = new VersionEndpointTask(rpcEndPoint,
|
||||||
|
conf);
|
||||||
|
EndpointStateMachine.EndPointStates newState = versionTask.call();
|
||||||
|
|
||||||
|
// This version call did NOT work, so endpoint should remain in the same
|
||||||
|
// state.
|
||||||
|
Assert.assertEquals(EndpointStateMachine.EndPointStates.GETVERSION,
|
||||||
|
newState);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
/**
|
||||||
|
* This test makes a getVersionRPC call, but the DummyStorageServer is
|
||||||
|
* going to respond little slowly. We will assert that we are still in the
|
||||||
|
* GETVERSION state after the timeout.
|
||||||
|
*/
|
||||||
|
public void testGetVersionAssertRpcTimeOut() throws Exception {
|
||||||
|
final long rpcTimeout = 1000;
|
||||||
|
final long tolerance = 100;
|
||||||
|
Configuration conf = SCMTestUtils.getConf();
|
||||||
|
|
||||||
|
try (EndpointStateMachine rpcEndPoint = SCMTestUtils.createEndpoint(conf,
|
||||||
|
serverAddress, (int) rpcTimeout)) {
|
||||||
|
rpcEndPoint.setState(EndpointStateMachine.EndPointStates.GETVERSION);
|
||||||
|
VersionEndpointTask versionTask = new VersionEndpointTask(rpcEndPoint,
|
||||||
|
conf);
|
||||||
|
|
||||||
|
scmServerImpl.setRpcResponseDelay(1500);
|
||||||
|
long start = Time.monotonicNow();
|
||||||
|
EndpointStateMachine.EndPointStates newState = versionTask.call();
|
||||||
|
long end = Time.monotonicNow();
|
||||||
|
scmServerImpl.setRpcResponseDelay(0);
|
||||||
|
Assert.assertThat(end - start, new LessOrEqual<>(rpcTimeout + tolerance));
|
||||||
|
Assert.assertEquals(EndpointStateMachine.EndPointStates.GETVERSION,
|
||||||
|
newState);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRegister() throws Exception {
|
||||||
|
String[] scmAddressArray = new String[1];
|
||||||
|
scmAddressArray[0] = serverAddress.toString();
|
||||||
|
DatanodeID nodeToRegister = SCMTestUtils.getDatanodeID();
|
||||||
|
try (EndpointStateMachine rpcEndPoint =
|
||||||
|
SCMTestUtils.createEndpoint(
|
||||||
|
SCMTestUtils.getConf(), serverAddress, 1000)) {
|
||||||
|
SCMRegisteredCmdResponseProto responseProto = rpcEndPoint.getEndPoint()
|
||||||
|
.register(nodeToRegister, scmAddressArray);
|
||||||
|
Assert.assertNotNull(responseProto);
|
||||||
|
Assert.assertEquals(responseProto.getDatanodeUUID(),
|
||||||
|
nodeToRegister.getDatanodeUuid());
|
||||||
|
Assert.assertNotNull(responseProto.getClusterID());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private EndpointStateMachine registerTaskHelper(InetSocketAddress scmAddress,
|
||||||
|
int rpcTimeout, boolean clearContainerID) throws Exception {
|
||||||
|
EndpointStateMachine rpcEndPoint =
|
||||||
|
SCMTestUtils.createEndpoint(SCMTestUtils.getConf(),
|
||||||
|
scmAddress, rpcTimeout);
|
||||||
|
rpcEndPoint.setState(EndpointStateMachine.EndPointStates.REGISTER);
|
||||||
|
RegisterEndpointTask endpointTask =
|
||||||
|
new RegisterEndpointTask(rpcEndPoint, SCMTestUtils.getConf());
|
||||||
|
if (!clearContainerID) {
|
||||||
|
ContainerNodeIDProto containerNodeID = ContainerNodeIDProto.newBuilder()
|
||||||
|
.setClusterID(UUID.randomUUID().toString())
|
||||||
|
.setDatanodeID(SCMTestUtils.getDatanodeID().getProtoBufMessage())
|
||||||
|
.build();
|
||||||
|
endpointTask.setContainerNodeIDProto(containerNodeID);
|
||||||
|
}
|
||||||
|
endpointTask.call();
|
||||||
|
return rpcEndPoint;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRegisterTask() throws Exception {
|
||||||
|
try (EndpointStateMachine rpcEndpoint =
|
||||||
|
registerTaskHelper(serverAddress, 1000, false)) {
|
||||||
|
// Successful register should move us to Heartbeat state.
|
||||||
|
Assert.assertEquals(EndpointStateMachine.EndPointStates.HEARTBEAT,
|
||||||
|
rpcEndpoint.getState());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRegisterToInvalidEndpoint() throws Exception {
|
||||||
|
InetSocketAddress address = SCMTestUtils.getReuseableAddress();
|
||||||
|
try (EndpointStateMachine rpcEndpoint =
|
||||||
|
registerTaskHelper(address, 1000, false)) {
|
||||||
|
Assert.assertEquals(EndpointStateMachine.EndPointStates.REGISTER,
|
||||||
|
rpcEndpoint.getState());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRegisterNoContainerID() throws Exception {
|
||||||
|
InetSocketAddress address = SCMTestUtils.getReuseableAddress();
|
||||||
|
try (EndpointStateMachine rpcEndpoint =
|
||||||
|
registerTaskHelper(address, 1000, true)) {
|
||||||
|
// No Container ID, therefore we tell the datanode that we would like to
|
||||||
|
// shutdown.
|
||||||
|
Assert.assertEquals(EndpointStateMachine.EndPointStates.SHUTDOWN,
|
||||||
|
rpcEndpoint.getState());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRegisterRpcTimeout() throws Exception {
|
||||||
|
final long rpcTimeout = 1000;
|
||||||
|
final long tolerance = 200;
|
||||||
|
scmServerImpl.setRpcResponseDelay(1500);
|
||||||
|
long start = Time.monotonicNow();
|
||||||
|
registerTaskHelper(serverAddress, 1000, false).close();
|
||||||
|
long end = Time.monotonicNow();
|
||||||
|
scmServerImpl.setRpcResponseDelay(0);
|
||||||
|
Assert.assertThat(end - start, new LessOrEqual<>(rpcTimeout + tolerance));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testHeartbeat() throws Exception {
|
||||||
|
DatanodeID dataNode = SCMTestUtils.getDatanodeID();
|
||||||
|
try (EndpointStateMachine rpcEndPoint =
|
||||||
|
SCMTestUtils.createEndpoint(SCMTestUtils.getConf(),
|
||||||
|
serverAddress, 1000)) {
|
||||||
|
SCMHeartbeatResponseProto responseProto = rpcEndPoint.getEndPoint()
|
||||||
|
.sendHeartbeat(dataNode);
|
||||||
|
Assert.assertNotNull(responseProto);
|
||||||
|
Assert.assertEquals(1, responseProto.getCommandsCount());
|
||||||
|
Assert.assertNotNull(responseProto.getCommandsList().get(0));
|
||||||
|
Assert.assertEquals(responseProto.getCommandsList().get(0).getCmdType(),
|
||||||
|
Type.nullCmd);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private EndpointStateMachine heartbeatTaskHelper(InetSocketAddress scmAddress,
|
||||||
|
int rpcTimeout) throws Exception {
|
||||||
|
EndpointStateMachine rpcEndPoint = SCMTestUtils.createEndpoint(
|
||||||
|
SCMTestUtils.getConf(),
|
||||||
|
scmAddress, rpcTimeout);
|
||||||
|
ContainerNodeIDProto containerNodeID = ContainerNodeIDProto.newBuilder()
|
||||||
|
.setClusterID(UUID.randomUUID().toString())
|
||||||
|
.setDatanodeID(SCMTestUtils.getDatanodeID().getProtoBufMessage())
|
||||||
|
.build();
|
||||||
|
rpcEndPoint.setState(EndpointStateMachine.EndPointStates.HEARTBEAT);
|
||||||
|
HeartbeatEndpointTask endpointTask =
|
||||||
|
new HeartbeatEndpointTask(rpcEndPoint, SCMTestUtils.getConf());
|
||||||
|
endpointTask.setContainerNodeIDProto(containerNodeID);
|
||||||
|
endpointTask.call();
|
||||||
|
Assert.assertNotNull(endpointTask.getContainerNodeIDProto());
|
||||||
|
return rpcEndPoint;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void heartbeatTaskHelper(InetSocketAddress address)
|
||||||
|
throws Exception {
|
||||||
|
try (EndpointStateMachine rpcEndpoint =
|
||||||
|
heartbeatTaskHelper(address, 1000)) {
|
||||||
|
Assert.assertEquals(EndpointStateMachine.EndPointStates.HEARTBEAT,
|
||||||
|
rpcEndpoint.getState());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testHeartbeatTask() throws Exception {
|
||||||
|
heartbeatTaskHelper(serverAddress);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testHeartbeatTaskToInvalidNode() throws Exception {
|
||||||
|
InetSocketAddress invalidAddress = SCMTestUtils.getReuseableAddress();
|
||||||
|
heartbeatTaskHelper(invalidAddress);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testHeartbeatTaskRpcTimeOut() throws Exception {
|
||||||
|
final long rpcTimeout = 1000;
|
||||||
|
final long tolerance = 200;
|
||||||
|
scmServerImpl.setRpcResponseDelay(1500);
|
||||||
|
long start = Time.monotonicNow();
|
||||||
|
InetSocketAddress invalidAddress = SCMTestUtils.getReuseableAddress();
|
||||||
|
heartbeatTaskHelper(invalidAddress);
|
||||||
|
long end = Time.monotonicNow();
|
||||||
|
scmServerImpl.setRpcResponseDelay(0);
|
||||||
|
Assert.assertThat(end - start, new LessOrEqual<>(rpcTimeout + tolerance));
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterClass
|
||||||
|
public static void tearDown() throws Exception {
|
||||||
|
if (scmServer != null) {
|
||||||
|
scmServer.stop();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public static void setUp() throws Exception {
|
||||||
|
serverAddress = SCMTestUtils.getReuseableAddress();
|
||||||
|
scmServerImpl = new ScmTestMock();
|
||||||
|
scmServer = SCMTestUtils.startScmRpcServer(SCMTestUtils.getConf(),
|
||||||
|
scmServerImpl, serverAddress, 10);
|
||||||
|
}
|
||||||
|
}
|
|
@ -21,7 +21,7 @@ import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.hdfs.protocol.DatanodeID;
|
import org.apache.hadoop.hdfs.protocol.DatanodeID;
|
||||||
import org.apache.hadoop.ozone.OzoneConfigKeys;
|
import org.apache.hadoop.ozone.OzoneConfigKeys;
|
||||||
import org.apache.hadoop.ozone.OzoneConfiguration;
|
import org.apache.hadoop.ozone.OzoneConfiguration;
|
||||||
import org.apache.hadoop.ozone.protocol.commands.RegisteredCommand;
|
import org.apache.hadoop.ozone.container.common.SCMTestUtils;
|
||||||
import org.apache.hadoop.test.GenericTestUtils;
|
import org.apache.hadoop.test.GenericTestUtils;
|
||||||
import org.hamcrest.CoreMatchers;
|
import org.hamcrest.CoreMatchers;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
|
@ -33,7 +33,6 @@ import org.junit.rules.ExpectedException;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.LinkedList;
|
import java.util.LinkedList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Random;
|
|
||||||
import java.util.UUID;
|
import java.util.UUID;
|
||||||
import java.util.concurrent.TimeoutException;
|
import java.util.concurrent.TimeoutException;
|
||||||
|
|
||||||
|
@ -72,37 +71,6 @@ public class TestNodeManager {
|
||||||
return new OzoneConfiguration();
|
return new OzoneConfiguration();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Create a new datanode ID.
|
|
||||||
*
|
|
||||||
* @return DatanodeID
|
|
||||||
*/
|
|
||||||
DatanodeID getDatanodeID(SCMNodeManager nodeManager) {
|
|
||||||
|
|
||||||
return getDatanodeID(nodeManager, UUID.randomUUID().toString());
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create a new DatanodeID with NodeID set to the string.
|
|
||||||
*
|
|
||||||
* @param uuid - node ID, it is generally UUID.
|
|
||||||
* @return DatanodeID.
|
|
||||||
*/
|
|
||||||
DatanodeID getDatanodeID(SCMNodeManager nodeManager, String uuid) {
|
|
||||||
Random random = new Random();
|
|
||||||
String ipAddress = random.nextInt(256) + "."
|
|
||||||
+ random.nextInt(256) + "."
|
|
||||||
+ random.nextInt(256) + "."
|
|
||||||
+ random.nextInt(256);
|
|
||||||
|
|
||||||
String hostName = uuid;
|
|
||||||
DatanodeID tempDataNode = new DatanodeID(ipAddress,
|
|
||||||
hostName, uuid, 0, 0, 0, 0);
|
|
||||||
RegisteredCommand command =
|
|
||||||
(RegisteredCommand) nodeManager.register(tempDataNode);
|
|
||||||
return new DatanodeID(command.getDatanodeUUID(), tempDataNode);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a NodeManager.
|
* Creates a NodeManager.
|
||||||
*
|
*
|
||||||
|
@ -134,7 +102,7 @@ public class TestNodeManager {
|
||||||
try (SCMNodeManager nodeManager = createNodeManager(getConf())) {
|
try (SCMNodeManager nodeManager = createNodeManager(getConf())) {
|
||||||
// Send some heartbeats from different nodes.
|
// Send some heartbeats from different nodes.
|
||||||
for (int x = 0; x < nodeManager.getMinimumChillModeNodes(); x++) {
|
for (int x = 0; x < nodeManager.getMinimumChillModeNodes(); x++) {
|
||||||
DatanodeID datanodeID = getDatanodeID(nodeManager);
|
DatanodeID datanodeID = SCMTestUtils.getDatanodeID(nodeManager);
|
||||||
nodeManager.sendHeartbeat(datanodeID);
|
nodeManager.sendHeartbeat(datanodeID);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -181,7 +149,7 @@ public class TestNodeManager {
|
||||||
|
|
||||||
// Need 100 nodes to come out of chill mode, only one node is sending HB.
|
// Need 100 nodes to come out of chill mode, only one node is sending HB.
|
||||||
nodeManager.setMinimumChillModeNodes(100);
|
nodeManager.setMinimumChillModeNodes(100);
|
||||||
nodeManager.sendHeartbeat(getDatanodeID(nodeManager));
|
nodeManager.sendHeartbeat(SCMTestUtils.getDatanodeID(nodeManager));
|
||||||
GenericTestUtils.waitFor(() -> nodeManager.waitForHeartbeatThead(), 100,
|
GenericTestUtils.waitFor(() -> nodeManager.waitForHeartbeatThead(), 100,
|
||||||
4 * 1000);
|
4 * 1000);
|
||||||
assertFalse("Not enough heartbeat, Node manager should have been in " +
|
assertFalse("Not enough heartbeat, Node manager should have been in " +
|
||||||
|
@ -203,7 +171,7 @@ public class TestNodeManager {
|
||||||
|
|
||||||
try (SCMNodeManager nodeManager = createNodeManager(getConf())) {
|
try (SCMNodeManager nodeManager = createNodeManager(getConf())) {
|
||||||
nodeManager.setMinimumChillModeNodes(3);
|
nodeManager.setMinimumChillModeNodes(3);
|
||||||
DatanodeID datanodeID = getDatanodeID(nodeManager);
|
DatanodeID datanodeID = SCMTestUtils.getDatanodeID(nodeManager);
|
||||||
|
|
||||||
// Send 10 heartbeat from same node, and assert we never leave chill mode.
|
// Send 10 heartbeat from same node, and assert we never leave chill mode.
|
||||||
for (int x = 0; x < 10; x++) {
|
for (int x = 0; x < 10; x++) {
|
||||||
|
@ -232,7 +200,7 @@ public class TestNodeManager {
|
||||||
Configuration conf = getConf();
|
Configuration conf = getConf();
|
||||||
conf.setInt(OzoneConfigKeys.OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL_MS, 100);
|
conf.setInt(OzoneConfigKeys.OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL_MS, 100);
|
||||||
SCMNodeManager nodeManager = createNodeManager(conf);
|
SCMNodeManager nodeManager = createNodeManager(conf);
|
||||||
DatanodeID datanodeID = getDatanodeID(nodeManager);
|
DatanodeID datanodeID = SCMTestUtils.getDatanodeID(nodeManager);
|
||||||
nodeManager.close();
|
nodeManager.close();
|
||||||
|
|
||||||
// These should never be processed.
|
// These should never be processed.
|
||||||
|
@ -262,7 +230,7 @@ public class TestNodeManager {
|
||||||
try (SCMNodeManager nodeManager = createNodeManager(conf)) {
|
try (SCMNodeManager nodeManager = createNodeManager(conf)) {
|
||||||
|
|
||||||
for (int x = 0; x < count; x++) {
|
for (int x = 0; x < count; x++) {
|
||||||
DatanodeID datanodeID = getDatanodeID(nodeManager);
|
DatanodeID datanodeID = SCMTestUtils.getDatanodeID(nodeManager);
|
||||||
nodeManager.sendHeartbeat(datanodeID);
|
nodeManager.sendHeartbeat(datanodeID);
|
||||||
}
|
}
|
||||||
GenericTestUtils.waitFor(() -> nodeManager.waitForHeartbeatThead(), 100,
|
GenericTestUtils.waitFor(() -> nodeManager.waitForHeartbeatThead(), 100,
|
||||||
|
@ -346,7 +314,7 @@ public class TestNodeManager {
|
||||||
try (SCMNodeManager nodeManager = createNodeManager(conf)) {
|
try (SCMNodeManager nodeManager = createNodeManager(conf)) {
|
||||||
List<DatanodeID> nodeList = createNodeSet(nodeManager, nodeCount,
|
List<DatanodeID> nodeList = createNodeSet(nodeManager, nodeCount,
|
||||||
"staleNode");
|
"staleNode");
|
||||||
DatanodeID staleNode = getDatanodeID(nodeManager);
|
DatanodeID staleNode = SCMTestUtils.getDatanodeID(nodeManager);
|
||||||
|
|
||||||
// Heartbeat once
|
// Heartbeat once
|
||||||
nodeManager.sendHeartbeat(staleNode);
|
nodeManager.sendHeartbeat(staleNode);
|
||||||
|
@ -396,7 +364,7 @@ public class TestNodeManager {
|
||||||
List<DatanodeID> nodeList = createNodeSet(nodeManager, nodeCount,
|
List<DatanodeID> nodeList = createNodeSet(nodeManager, nodeCount,
|
||||||
"Node");
|
"Node");
|
||||||
|
|
||||||
DatanodeID deadNode = getDatanodeID(nodeManager);
|
DatanodeID deadNode = SCMTestUtils.getDatanodeID(nodeManager);
|
||||||
|
|
||||||
// Heartbeat once
|
// Heartbeat once
|
||||||
nodeManager.sendHeartbeat(deadNode);
|
nodeManager.sendHeartbeat(deadNode);
|
||||||
|
@ -427,28 +395,6 @@ public class TestNodeManager {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Asserts that if we get duplicate registration calls for a datanode, we will
|
|
||||||
* ignore it and LOG the error.
|
|
||||||
*
|
|
||||||
* @throws IOException
|
|
||||||
* @throws InterruptedException
|
|
||||||
* @throws TimeoutException
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void testScmDuplicateRegistrationLogsError() throws IOException,
|
|
||||||
InterruptedException, TimeoutException {
|
|
||||||
try (SCMNodeManager nodeManager = createNodeManager(getConf())) {
|
|
||||||
GenericTestUtils.LogCapturer logCapturer =
|
|
||||||
GenericTestUtils.LogCapturer.captureLogs(SCMNodeManager.LOG);
|
|
||||||
DatanodeID duplicateNodeID = getDatanodeID(nodeManager);
|
|
||||||
nodeManager.register(duplicateNodeID);
|
|
||||||
logCapturer.stopCapturing();
|
|
||||||
assertThat(logCapturer.getOutput(), containsString("Datanode is already" +
|
|
||||||
" registered."));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Asserts that we log an error for null in datanode ID.
|
* Asserts that we log an error for null in datanode ID.
|
||||||
*
|
*
|
||||||
|
@ -532,9 +478,12 @@ public class TestNodeManager {
|
||||||
* Cluster state: Healthy: All nodes are heartbeat-ing like normal.
|
* Cluster state: Healthy: All nodes are heartbeat-ing like normal.
|
||||||
*/
|
*/
|
||||||
try (SCMNodeManager nodeManager = createNodeManager(conf)) {
|
try (SCMNodeManager nodeManager = createNodeManager(conf)) {
|
||||||
DatanodeID healthyNode = getDatanodeID(nodeManager, "HealthyNode");
|
DatanodeID healthyNode =
|
||||||
DatanodeID staleNode = getDatanodeID(nodeManager, "StaleNode");
|
SCMTestUtils.getDatanodeID(nodeManager, "HealthyNode");
|
||||||
DatanodeID deadNode = getDatanodeID(nodeManager, "DeadNode");
|
DatanodeID staleNode =
|
||||||
|
SCMTestUtils.getDatanodeID(nodeManager, "StaleNode");
|
||||||
|
DatanodeID deadNode =
|
||||||
|
SCMTestUtils.getDatanodeID(nodeManager, "DeadNode");
|
||||||
nodeManager.sendHeartbeat(healthyNode);
|
nodeManager.sendHeartbeat(healthyNode);
|
||||||
nodeManager.sendHeartbeat(staleNode);
|
nodeManager.sendHeartbeat(staleNode);
|
||||||
nodeManager.sendHeartbeat(deadNode);
|
nodeManager.sendHeartbeat(deadNode);
|
||||||
|
@ -659,7 +608,7 @@ public class TestNodeManager {
|
||||||
prefix) {
|
prefix) {
|
||||||
List<DatanodeID> list = new LinkedList<>();
|
List<DatanodeID> list = new LinkedList<>();
|
||||||
for (int x = 0; x < count; x++) {
|
for (int x = 0; x < count; x++) {
|
||||||
list.add(getDatanodeID(nodeManager, prefix + x));
|
list.add(SCMTestUtils.getDatanodeID(nodeManager, prefix + x));
|
||||||
}
|
}
|
||||||
return list;
|
return list;
|
||||||
}
|
}
|
||||||
|
@ -878,7 +827,7 @@ public class TestNodeManager {
|
||||||
|
|
||||||
try (SCMNodeManager nodeManager = createNodeManager(conf)) {
|
try (SCMNodeManager nodeManager = createNodeManager(conf)) {
|
||||||
nodeManager.setMinimumChillModeNodes(10);
|
nodeManager.setMinimumChillModeNodes(10);
|
||||||
DatanodeID datanodeID = getDatanodeID(nodeManager);
|
DatanodeID datanodeID = SCMTestUtils.getDatanodeID(nodeManager);
|
||||||
nodeManager.sendHeartbeat(datanodeID);
|
nodeManager.sendHeartbeat(datanodeID);
|
||||||
String status = nodeManager.getChillModeStatus();
|
String status = nodeManager.getChillModeStatus();
|
||||||
Assert.assertThat(status, CoreMatchers.containsString("Still in chill " +
|
Assert.assertThat(status, CoreMatchers.containsString("Still in chill " +
|
||||||
|
@ -908,7 +857,7 @@ public class TestNodeManager {
|
||||||
|
|
||||||
// Assert that node manager force enter cannot be overridden by nodes HBs.
|
// Assert that node manager force enter cannot be overridden by nodes HBs.
|
||||||
for(int x= 0; x < 20; x++) {
|
for(int x= 0; x < 20; x++) {
|
||||||
DatanodeID datanode = getDatanodeID(nodeManager);
|
DatanodeID datanode = SCMTestUtils.getDatanodeID(nodeManager);
|
||||||
nodeManager.sendHeartbeat(datanode);
|
nodeManager.sendHeartbeat(datanode);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue