HDFS-13070. Ozone: SCM: Support for container replica reconciliation - 1. Contributed by Nanda kumar.
This commit is contained in:
parent
4c10a849e8
commit
b4a3cf1476
|
@ -223,6 +223,12 @@ public final class ScmConfigKeys {
|
||||||
public static final String
|
public static final String
|
||||||
OZONE_SCM_CONTAINER_REPORT_PROCESSING_INTERVAL_DEFAULT = "60s";
|
OZONE_SCM_CONTAINER_REPORT_PROCESSING_INTERVAL_DEFAULT = "60s";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This determines the total number of pools to be processed in parallel.
|
||||||
|
*/
|
||||||
|
public static final String OZONE_SCM_MAX_NODEPOOL_PROCESSING_THREADS =
|
||||||
|
"ozone.scm.max.nodepool.processing.threads";
|
||||||
|
public static final int OZONE_SCM_MAX_NODEPOOL_PROCESSING_THREADS_DEFAULT = 1;
|
||||||
/**
|
/**
|
||||||
* These 2 settings control the number of threads in executor pool and time
|
* These 2 settings control the number of threads in executor pool and time
|
||||||
* outs for thw container reports from all nodes.
|
* outs for thw container reports from all nodes.
|
||||||
|
|
|
@ -984,9 +984,7 @@ public class StorageContainerManager extends ServiceRuntimeInfoImpl
|
||||||
updateContainerReportMetrics(reports);
|
updateContainerReportMetrics(reports);
|
||||||
|
|
||||||
// should we process container reports async?
|
// should we process container reports async?
|
||||||
scmContainerManager.processContainerReports(
|
scmContainerManager.processContainerReports(reports);
|
||||||
DatanodeID.getFromProtoBuf(reports.getDatanodeID()),
|
|
||||||
reports.getType(), reports.getReportsList());
|
|
||||||
return ContainerReportsResponseProto.newBuilder().build();
|
return ContainerReportsResponseProto.newBuilder().build();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -20,7 +20,6 @@ import com.google.common.annotations.VisibleForTesting;
|
||||||
import com.google.common.base.Preconditions;
|
import com.google.common.base.Preconditions;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.hdfs.DFSUtil;
|
import org.apache.hadoop.hdfs.DFSUtil;
|
||||||
import org.apache.hadoop.hdfs.protocol.DatanodeID;
|
|
||||||
import org.apache.hadoop.ozone.OzoneConsts;
|
import org.apache.hadoop.ozone.OzoneConsts;
|
||||||
import org.apache.hadoop.ozone.lease.Lease;
|
import org.apache.hadoop.ozone.lease.Lease;
|
||||||
import org.apache.hadoop.ozone.lease.LeaseException;
|
import org.apache.hadoop.ozone.lease.LeaseException;
|
||||||
|
@ -29,7 +28,9 @@ import org.apache.hadoop.ozone.protocol.proto.OzoneProtos;
|
||||||
import org.apache.hadoop.ozone.protocol.proto.OzoneProtos.ReplicationFactor;
|
import org.apache.hadoop.ozone.protocol.proto.OzoneProtos.ReplicationFactor;
|
||||||
import org.apache.hadoop.ozone.protocol.proto.OzoneProtos.ReplicationType;
|
import org.apache.hadoop.ozone.protocol.proto.OzoneProtos.ReplicationType;
|
||||||
import org.apache.hadoop.ozone.protocol.proto.StorageContainerDatanodeProtocolProtos;
|
import org.apache.hadoop.ozone.protocol.proto.StorageContainerDatanodeProtocolProtos;
|
||||||
import org.apache.hadoop.ozone.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReportsRequestProto;
|
import org.apache.hadoop.ozone.protocol.proto
|
||||||
|
.StorageContainerDatanodeProtocolProtos.ContainerReportsRequestProto;
|
||||||
|
import org.apache.hadoop.ozone.scm.container.replication.ContainerSupervisor;
|
||||||
import org.apache.hadoop.ozone.scm.exceptions.SCMException;
|
import org.apache.hadoop.ozone.scm.exceptions.SCMException;
|
||||||
import org.apache.hadoop.ozone.scm.node.NodeManager;
|
import org.apache.hadoop.ozone.scm.node.NodeManager;
|
||||||
import org.apache.hadoop.ozone.scm.pipelines.PipelineSelector;
|
import org.apache.hadoop.ozone.scm.pipelines.PipelineSelector;
|
||||||
|
@ -74,6 +75,7 @@ public class ContainerMapping implements Mapping {
|
||||||
private final PipelineSelector pipelineSelector;
|
private final PipelineSelector pipelineSelector;
|
||||||
private final ContainerStateManager containerStateManager;
|
private final ContainerStateManager containerStateManager;
|
||||||
private final LeaseManager<ContainerInfo> containerLeaseManager;
|
private final LeaseManager<ContainerInfo> containerLeaseManager;
|
||||||
|
private final ContainerSupervisor containerSupervisor;
|
||||||
private final float containerCloseThreshold;
|
private final float containerCloseThreshold;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -113,6 +115,9 @@ public class ContainerMapping implements Mapping {
|
||||||
this.pipelineSelector = new PipelineSelector(nodeManager, conf);
|
this.pipelineSelector = new PipelineSelector(nodeManager, conf);
|
||||||
this.containerStateManager =
|
this.containerStateManager =
|
||||||
new ContainerStateManager(conf, this);
|
new ContainerStateManager(conf, this);
|
||||||
|
this.containerSupervisor =
|
||||||
|
new ContainerSupervisor(conf, nodeManager,
|
||||||
|
nodeManager.getNodePoolManager());
|
||||||
this.containerCloseThreshold = conf.getFloat(
|
this.containerCloseThreshold = conf.getFloat(
|
||||||
ScmConfigKeys.OZONE_SCM_CONTAINER_CLOSE_THRESHOLD,
|
ScmConfigKeys.OZONE_SCM_CONTAINER_CLOSE_THRESHOLD,
|
||||||
ScmConfigKeys.OZONE_SCM_CONTAINER_CLOSE_THRESHOLD_DEFAULT);
|
ScmConfigKeys.OZONE_SCM_CONTAINER_CLOSE_THRESHOLD_DEFAULT);
|
||||||
|
@ -347,16 +352,14 @@ public class ContainerMapping implements Mapping {
|
||||||
/**
|
/**
|
||||||
* Process container report from Datanode.
|
* Process container report from Datanode.
|
||||||
*
|
*
|
||||||
* @param datanodeID Datanode ID
|
* @param reports Container report
|
||||||
* @param reportType Type of report
|
|
||||||
* @param containerInfos container details
|
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public void processContainerReports(
|
public void processContainerReports(ContainerReportsRequestProto reports)
|
||||||
DatanodeID datanodeID,
|
throws IOException {
|
||||||
ContainerReportsRequestProto.reportType reportType,
|
|
||||||
List<StorageContainerDatanodeProtocolProtos.ContainerInfo>
|
List<StorageContainerDatanodeProtocolProtos.ContainerInfo>
|
||||||
containerInfos) throws IOException {
|
containerInfos = reports.getReportsList();
|
||||||
|
containerSupervisor.handleContainerReport(reports);
|
||||||
for (StorageContainerDatanodeProtocolProtos.ContainerInfo containerInfo :
|
for (StorageContainerDatanodeProtocolProtos.ContainerInfo containerInfo :
|
||||||
containerInfos) {
|
containerInfos) {
|
||||||
byte[] dbKey = containerInfo.getContainerNameBytes().toByteArray();
|
byte[] dbKey = containerInfo.getContainerNameBytes().toByteArray();
|
||||||
|
@ -395,7 +398,7 @@ public class ContainerMapping implements Mapping {
|
||||||
// TODO: Handling of containers which are already in close queue.
|
// TODO: Handling of containers which are already in close queue.
|
||||||
if (containerUsedPercentage >= containerCloseThreshold) {
|
if (containerUsedPercentage >= containerCloseThreshold) {
|
||||||
// TODO: The container has to be moved to close container queue.
|
// TODO: The container has to be moved to close container queue.
|
||||||
// For now, we are just updating the container state to CLOSED.
|
// For now, we are just updating the container state to CLOSING.
|
||||||
// Close container implementation can decide on how to maintain
|
// Close container implementation can decide on how to maintain
|
||||||
// list of containers to be closed, this is the place where we
|
// list of containers to be closed, this is the place where we
|
||||||
// have to add the containers to that list.
|
// have to add the containers to that list.
|
||||||
|
@ -412,7 +415,7 @@ public class ContainerMapping implements Mapping {
|
||||||
// Container not found in our container db.
|
// Container not found in our container db.
|
||||||
LOG.error("Error while processing container report from datanode :" +
|
LOG.error("Error while processing container report from datanode :" +
|
||||||
" {}, for container: {}, reason: container doesn't exist in" +
|
" {}, for container: {}, reason: container doesn't exist in" +
|
||||||
"container database.", datanodeID,
|
"container database.", reports.getDatanodeID(),
|
||||||
containerInfo.getContainerName());
|
containerInfo.getContainerName());
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
|
|
|
@ -17,10 +17,7 @@
|
||||||
package org.apache.hadoop.ozone.scm.container;
|
package org.apache.hadoop.ozone.scm.container;
|
||||||
|
|
||||||
|
|
||||||
import org.apache.hadoop.hdfs.protocol.DatanodeID;
|
|
||||||
import org.apache.hadoop.ozone.protocol.proto.OzoneProtos;
|
import org.apache.hadoop.ozone.protocol.proto.OzoneProtos;
|
||||||
import org.apache.hadoop.ozone.protocol.proto
|
|
||||||
.StorageContainerDatanodeProtocolProtos;
|
|
||||||
import org.apache.hadoop.ozone.protocol.proto
|
import org.apache.hadoop.ozone.protocol.proto
|
||||||
.StorageContainerDatanodeProtocolProtos.ContainerReportsRequestProto;
|
.StorageContainerDatanodeProtocolProtos.ContainerReportsRequestProto;
|
||||||
import org.apache.hadoop.scm.container.common.helpers.ContainerInfo;
|
import org.apache.hadoop.scm.container.common.helpers.ContainerInfo;
|
||||||
|
@ -102,14 +99,9 @@ public interface Mapping extends Closeable {
|
||||||
/**
|
/**
|
||||||
* Process container report from Datanode.
|
* Process container report from Datanode.
|
||||||
*
|
*
|
||||||
* @param datanodeID Datanode ID
|
* @param reports Container report
|
||||||
* @param reportType Type of report
|
|
||||||
* @param containerInfos container details
|
|
||||||
*/
|
*/
|
||||||
void processContainerReports(
|
void processContainerReports(ContainerReportsRequestProto reports)
|
||||||
DatanodeID datanodeID,
|
throws IOException;
|
||||||
ContainerReportsRequestProto.reportType reportType,
|
|
||||||
List<StorageContainerDatanodeProtocolProtos.ContainerInfo>
|
|
||||||
containerInfos) throws IOException;
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,12 +19,11 @@ package org.apache.hadoop.ozone.scm.container.replication;
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
import com.google.common.base.Preconditions;
|
import com.google.common.base.Preconditions;
|
||||||
import com.google.common.util.concurrent.ThreadFactoryBuilder;
|
import com.google.common.util.concurrent.ThreadFactoryBuilder;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.hdfs.protocol.DatanodeID;
|
import org.apache.hadoop.hdfs.protocol.DatanodeID;
|
||||||
import org.apache.hadoop.conf.OzoneConfiguration;
|
|
||||||
import org.apache.hadoop.ozone.protocol.proto
|
import org.apache.hadoop.ozone.protocol.proto
|
||||||
.StorageContainerDatanodeProtocolProtos.ContainerReportsRequestProto;
|
.StorageContainerDatanodeProtocolProtos.ContainerReportsRequestProto;
|
||||||
import org.apache.hadoop.ozone.scm.exceptions.SCMException;
|
import org.apache.hadoop.ozone.scm.exceptions.SCMException;
|
||||||
import org.apache.hadoop.ozone.scm.node.CommandQueue;
|
|
||||||
import org.apache.hadoop.ozone.scm.node.NodeManager;
|
import org.apache.hadoop.ozone.scm.node.NodeManager;
|
||||||
import org.apache.hadoop.ozone.scm.node.NodePoolManager;
|
import org.apache.hadoop.ozone.scm.node.NodePoolManager;
|
||||||
import org.apache.hadoop.util.Time;
|
import org.apache.hadoop.util.Time;
|
||||||
|
@ -43,6 +42,8 @@ import java.util.concurrent.ExecutorService;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
import java.util.concurrent.atomic.AtomicBoolean;
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
import java.util.concurrent.locks.ReadWriteLock;
|
||||||
|
import java.util.concurrent.locks.ReentrantReadWriteLock;
|
||||||
|
|
||||||
import static com.google.common.util.concurrent.Uninterruptibles
|
import static com.google.common.util.concurrent.Uninterruptibles
|
||||||
.sleepUninterruptibly;
|
.sleepUninterruptibly;
|
||||||
|
@ -58,17 +59,20 @@ import static org.apache.hadoop.scm.ScmConfigKeys
|
||||||
.OZONE_SCM_MAX_CONTAINER_REPORT_THREADS;
|
.OZONE_SCM_MAX_CONTAINER_REPORT_THREADS;
|
||||||
import static org.apache.hadoop.scm.ScmConfigKeys
|
import static org.apache.hadoop.scm.ScmConfigKeys
|
||||||
.OZONE_SCM_MAX_CONTAINER_REPORT_THREADS_DEFAULT;
|
.OZONE_SCM_MAX_CONTAINER_REPORT_THREADS_DEFAULT;
|
||||||
|
import static org.apache.hadoop.scm.ScmConfigKeys
|
||||||
|
.OZONE_SCM_MAX_NODEPOOL_PROCESSING_THREADS;
|
||||||
|
import static org.apache.hadoop.scm.ScmConfigKeys
|
||||||
|
.OZONE_SCM_MAX_NODEPOOL_PROCESSING_THREADS_DEFAULT;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This class takes a set of container reports that belong to a pool and then
|
* This class takes a set of container reports that belong to a pool and then
|
||||||
* computes the replication levels for each container.
|
* computes the replication levels for each container.
|
||||||
*/
|
*/
|
||||||
public class ContainerReplicationManager implements Closeable {
|
public class ContainerSupervisor implements Closeable {
|
||||||
public static final Logger LOG =
|
public static final Logger LOG =
|
||||||
LoggerFactory.getLogger(ContainerReplicationManager.class);
|
LoggerFactory.getLogger(ContainerSupervisor.class);
|
||||||
|
|
||||||
private final NodePoolManager poolManager;
|
private final NodePoolManager poolManager;
|
||||||
private final CommandQueue commandQueue;
|
|
||||||
private final HashSet<String> poolNames;
|
private final HashSet<String> poolNames;
|
||||||
private final PriorityQueue<PeriodicPool> poolQueue;
|
private final PriorityQueue<PeriodicPool> poolQueue;
|
||||||
private final NodeManager nodeManager;
|
private final NodeManager nodeManager;
|
||||||
|
@ -79,6 +83,9 @@ public class ContainerReplicationManager implements Closeable {
|
||||||
private long poolProcessCount;
|
private long poolProcessCount;
|
||||||
private final List<InProgressPool> inProgressPoolList;
|
private final List<InProgressPool> inProgressPoolList;
|
||||||
private final AtomicInteger threadFaultCount;
|
private final AtomicInteger threadFaultCount;
|
||||||
|
private final int inProgressPoolMaxCount;
|
||||||
|
|
||||||
|
private final ReadWriteLock inProgressPoolListLock;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the number of times we have processed pools.
|
* Returns the number of times we have processed pools.
|
||||||
|
@ -95,13 +102,10 @@ public class ContainerReplicationManager implements Closeable {
|
||||||
* @param conf - OzoneConfiguration
|
* @param conf - OzoneConfiguration
|
||||||
* @param nodeManager - Node Manager
|
* @param nodeManager - Node Manager
|
||||||
* @param poolManager - Pool Manager
|
* @param poolManager - Pool Manager
|
||||||
* @param commandQueue - Datanodes Command Queue.
|
|
||||||
*/
|
*/
|
||||||
public ContainerReplicationManager(OzoneConfiguration conf,
|
public ContainerSupervisor(Configuration conf, NodeManager nodeManager,
|
||||||
NodeManager nodeManager, NodePoolManager poolManager,
|
NodePoolManager poolManager) {
|
||||||
CommandQueue commandQueue) {
|
|
||||||
Preconditions.checkNotNull(poolManager);
|
Preconditions.checkNotNull(poolManager);
|
||||||
Preconditions.checkNotNull(commandQueue);
|
|
||||||
Preconditions.checkNotNull(nodeManager);
|
Preconditions.checkNotNull(nodeManager);
|
||||||
this.containerProcessingLag =
|
this.containerProcessingLag =
|
||||||
conf.getTimeDuration(OZONE_SCM_CONTAINER_REPORT_PROCESSING_INTERVAL,
|
conf.getTimeDuration(OZONE_SCM_CONTAINER_REPORT_PROCESSING_INTERVAL,
|
||||||
|
@ -116,18 +120,21 @@ public class ContainerReplicationManager implements Closeable {
|
||||||
conf.getTimeDuration(OZONE_SCM_CONTAINER_REPORTS_WAIT_TIMEOUT,
|
conf.getTimeDuration(OZONE_SCM_CONTAINER_REPORTS_WAIT_TIMEOUT,
|
||||||
OZONE_SCM_CONTAINER_REPORTS_WAIT_TIMEOUT_DEFAULT,
|
OZONE_SCM_CONTAINER_REPORTS_WAIT_TIMEOUT_DEFAULT,
|
||||||
TimeUnit.MILLISECONDS);
|
TimeUnit.MILLISECONDS);
|
||||||
|
this.inProgressPoolMaxCount = conf.getInt(
|
||||||
|
OZONE_SCM_MAX_NODEPOOL_PROCESSING_THREADS,
|
||||||
|
OZONE_SCM_MAX_NODEPOOL_PROCESSING_THREADS_DEFAULT);
|
||||||
this.poolManager = poolManager;
|
this.poolManager = poolManager;
|
||||||
this.commandQueue = commandQueue;
|
|
||||||
this.nodeManager = nodeManager;
|
this.nodeManager = nodeManager;
|
||||||
this.poolNames = new HashSet<>();
|
this.poolNames = new HashSet<>();
|
||||||
this.poolQueue = new PriorityQueue<>();
|
this.poolQueue = new PriorityQueue<>();
|
||||||
runnable = new AtomicBoolean(true);
|
this.runnable = new AtomicBoolean(true);
|
||||||
this.threadFaultCount = new AtomicInteger(0);
|
this.threadFaultCount = new AtomicInteger(0);
|
||||||
executorService = HadoopExecutors.newCachedThreadPool(
|
this.executorService = HadoopExecutors.newCachedThreadPool(
|
||||||
new ThreadFactoryBuilder().setDaemon(true)
|
new ThreadFactoryBuilder().setDaemon(true)
|
||||||
.setNameFormat("Container Reports Processing Thread - %d")
|
.setNameFormat("Container Reports Processing Thread - %d")
|
||||||
.build(), maxContainerReportThreads);
|
.build(), maxContainerReportThreads);
|
||||||
inProgressPoolList = new LinkedList<>();
|
this.inProgressPoolList = new LinkedList<>();
|
||||||
|
this.inProgressPoolListLock = new ReentrantReadWriteLock();
|
||||||
|
|
||||||
initPoolProcessThread();
|
initPoolProcessThread();
|
||||||
}
|
}
|
||||||
|
@ -211,31 +218,49 @@ public class ContainerReplicationManager implements Closeable {
|
||||||
while (runnable.get()) {
|
while (runnable.get()) {
|
||||||
// Make sure that we don't have any new pools.
|
// Make sure that we don't have any new pools.
|
||||||
refreshPools();
|
refreshPools();
|
||||||
|
while (inProgressPoolList.size() < inProgressPoolMaxCount) {
|
||||||
PeriodicPool pool = poolQueue.poll();
|
PeriodicPool pool = poolQueue.poll();
|
||||||
if (pool != null) {
|
if (pool != null) {
|
||||||
if (pool.getLastProcessedTime() + this.containerProcessingLag <
|
if (pool.getLastProcessedTime() + this.containerProcessingLag >
|
||||||
Time.monotonicNow()) {
|
Time.monotonicNow()) {
|
||||||
LOG.debug("Adding pool {} to container processing queue", pool
|
|
||||||
.getPoolName());
|
|
||||||
InProgressPool inProgressPool = new InProgressPool(maxPoolWait,
|
|
||||||
pool, this.nodeManager, this.poolManager, this.commandQueue,
|
|
||||||
this.executorService);
|
|
||||||
inProgressPool.startReconciliation();
|
|
||||||
inProgressPoolList.add(inProgressPool);
|
|
||||||
poolProcessCount++;
|
|
||||||
|
|
||||||
} else {
|
|
||||||
|
|
||||||
LOG.debug("Not within the time window for processing: {}",
|
LOG.debug("Not within the time window for processing: {}",
|
||||||
pool.getPoolName());
|
pool.getPoolName());
|
||||||
// Put back this pool since we are not planning to process it.
|
|
||||||
poolQueue.add(pool);
|
|
||||||
// we might over sleep here, not a big deal.
|
// we might over sleep here, not a big deal.
|
||||||
sleepUninterruptibly(this.containerProcessingLag,
|
sleepUninterruptibly(this.containerProcessingLag,
|
||||||
TimeUnit.MILLISECONDS);
|
TimeUnit.MILLISECONDS);
|
||||||
}
|
}
|
||||||
|
LOG.debug("Adding pool {} to container processing queue",
|
||||||
|
pool.getPoolName());
|
||||||
|
InProgressPool inProgressPool = new InProgressPool(maxPoolWait,
|
||||||
|
pool, this.nodeManager, this.poolManager, this.executorService);
|
||||||
|
inProgressPool.startReconciliation();
|
||||||
|
inProgressPoolListLock.writeLock().lock();
|
||||||
|
try {
|
||||||
|
inProgressPoolList.add(inProgressPool);
|
||||||
|
} finally {
|
||||||
|
inProgressPoolListLock.writeLock().unlock();
|
||||||
|
}
|
||||||
|
poolProcessCount++;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
sleepUninterruptibly(this.maxPoolWait, TimeUnit.MILLISECONDS);
|
sleepUninterruptibly(this.maxPoolWait, TimeUnit.MILLISECONDS);
|
||||||
|
inProgressPoolListLock.readLock().lock();
|
||||||
|
try {
|
||||||
|
for (InProgressPool inProgressPool : inProgressPoolList) {
|
||||||
|
inProgressPool.finalizeReconciliation();
|
||||||
|
poolQueue.add(inProgressPool.getPool());
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
inProgressPoolListLock.readLock().unlock();
|
||||||
|
}
|
||||||
|
inProgressPoolListLock.writeLock().lock();
|
||||||
|
try {
|
||||||
|
inProgressPoolList.clear();
|
||||||
|
} finally {
|
||||||
|
inProgressPoolListLock.writeLock().unlock();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -263,18 +288,11 @@ public class ContainerReplicationManager implements Closeable {
|
||||||
*/
|
*/
|
||||||
public void handleContainerReport(
|
public void handleContainerReport(
|
||||||
ContainerReportsRequestProto containerReport) {
|
ContainerReportsRequestProto containerReport) {
|
||||||
String poolName = null;
|
DatanodeID datanodeID = DatanodeID.getFromProtoBuf(
|
||||||
DatanodeID datanodeID = DatanodeID
|
containerReport.getDatanodeID());
|
||||||
.getFromProtoBuf(containerReport.getDatanodeID());
|
inProgressPoolListLock.readLock().lock();
|
||||||
try {
|
try {
|
||||||
poolName = poolManager.getNodePool(datanodeID);
|
String poolName = poolManager.getNodePool(datanodeID);
|
||||||
} catch (SCMException e) {
|
|
||||||
LOG.warn("Skipping processing container report from datanode {}, "
|
|
||||||
+ "cause: failed to get the corresponding node pool",
|
|
||||||
datanodeID.toString(), e);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (InProgressPool ppool : inProgressPoolList) {
|
for (InProgressPool ppool : inProgressPoolList) {
|
||||||
if (ppool.getPoolName().equalsIgnoreCase(poolName)) {
|
if (ppool.getPoolName().equalsIgnoreCase(poolName)) {
|
||||||
ppool.handleContainerReport(containerReport);
|
ppool.handleContainerReport(containerReport);
|
||||||
|
@ -282,9 +300,16 @@ public class ContainerReplicationManager implements Closeable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// TODO: Decide if we can do anything else with this report.
|
// TODO: Decide if we can do anything else with this report.
|
||||||
LOG.debug("Discarding the container report for pool {}. That pool is not " +
|
LOG.debug("Discarding the container report for pool {}. " +
|
||||||
"currently in the pool reconciliation process. Container Name: {}",
|
"That pool is not currently in the pool reconciliation process." +
|
||||||
poolName, containerReport.getDatanodeID());
|
" Container Name: {}", poolName, containerReport.getDatanodeID());
|
||||||
|
} catch (SCMException e) {
|
||||||
|
LOG.warn("Skipping processing container report from datanode {}, "
|
||||||
|
+ "cause: failed to get the corresponding node pool",
|
||||||
|
datanodeID.toString(), e);
|
||||||
|
} finally {
|
||||||
|
inProgressPoolListLock.readLock().unlock();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
|
@ -21,9 +21,10 @@ import com.google.common.base.Preconditions;
|
||||||
import org.apache.hadoop.hdfs.protocol.DatanodeID;
|
import org.apache.hadoop.hdfs.protocol.DatanodeID;
|
||||||
import org.apache.hadoop.ozone.protocol.commands.SendContainerCommand;
|
import org.apache.hadoop.ozone.protocol.commands.SendContainerCommand;
|
||||||
import org.apache.hadoop.ozone.protocol.proto.OzoneProtos.NodeState;
|
import org.apache.hadoop.ozone.protocol.proto.OzoneProtos.NodeState;
|
||||||
import org.apache.hadoop.ozone.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerInfo;
|
import org.apache.hadoop.ozone.protocol.proto
|
||||||
import org.apache.hadoop.ozone.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReportsRequestProto;
|
.StorageContainerDatanodeProtocolProtos.ContainerInfo;
|
||||||
import org.apache.hadoop.ozone.scm.node.CommandQueue;
|
import org.apache.hadoop.ozone.protocol.proto
|
||||||
|
.StorageContainerDatanodeProtocolProtos.ContainerReportsRequestProto;
|
||||||
import org.apache.hadoop.ozone.scm.node.NodeManager;
|
import org.apache.hadoop.ozone.scm.node.NodeManager;
|
||||||
import org.apache.hadoop.ozone.scm.node.NodePoolManager;
|
import org.apache.hadoop.ozone.scm.node.NodePoolManager;
|
||||||
import org.apache.hadoop.util.Time;
|
import org.apache.hadoop.util.Time;
|
||||||
|
@ -39,10 +40,14 @@ import java.util.concurrent.atomic.AtomicInteger;
|
||||||
import java.util.function.Predicate;
|
import java.util.function.Predicate;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import static com.google.common.util.concurrent.Uninterruptibles.sleepUninterruptibly;
|
import static com.google.common.util.concurrent.Uninterruptibles
|
||||||
import static org.apache.hadoop.ozone.protocol.proto.OzoneProtos.NodeState.HEALTHY;
|
.sleepUninterruptibly;
|
||||||
import static org.apache.hadoop.ozone.protocol.proto.OzoneProtos.NodeState.STALE;
|
import static org.apache.hadoop.ozone.protocol.proto.OzoneProtos
|
||||||
import static org.apache.hadoop.ozone.protocol.proto.OzoneProtos.NodeState.UNKNOWN;
|
.NodeState.HEALTHY;
|
||||||
|
import static org.apache.hadoop.ozone.protocol.proto.OzoneProtos
|
||||||
|
.NodeState.STALE;
|
||||||
|
import static org.apache.hadoop.ozone.protocol.proto.OzoneProtos
|
||||||
|
.NodeState.UNKNOWN;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* These are pools that are actively checking for replication status of the
|
* These are pools that are actively checking for replication status of the
|
||||||
|
@ -51,8 +56,8 @@ import static org.apache.hadoop.ozone.protocol.proto.OzoneProtos.NodeState.UNKNO
|
||||||
public final class InProgressPool {
|
public final class InProgressPool {
|
||||||
public static final Logger LOG =
|
public static final Logger LOG =
|
||||||
LoggerFactory.getLogger(InProgressPool.class);
|
LoggerFactory.getLogger(InProgressPool.class);
|
||||||
|
|
||||||
private final PeriodicPool pool;
|
private final PeriodicPool pool;
|
||||||
private final CommandQueue commandQueue;
|
|
||||||
private final NodeManager nodeManager;
|
private final NodeManager nodeManager;
|
||||||
private final NodePoolManager poolManager;
|
private final NodePoolManager poolManager;
|
||||||
private final ExecutorService executorService;
|
private final ExecutorService executorService;
|
||||||
|
@ -70,22 +75,19 @@ public final class InProgressPool {
|
||||||
* @param pool - Pool that we are working against
|
* @param pool - Pool that we are working against
|
||||||
* @param nodeManager - Nodemanager
|
* @param nodeManager - Nodemanager
|
||||||
* @param poolManager - pool manager
|
* @param poolManager - pool manager
|
||||||
* @param commandQueue - Command queue
|
|
||||||
* @param executorService - Shared Executor service.
|
* @param executorService - Shared Executor service.
|
||||||
*/
|
*/
|
||||||
InProgressPool(long maxWaitTime, PeriodicPool pool,
|
InProgressPool(long maxWaitTime, PeriodicPool pool,
|
||||||
NodeManager nodeManager, NodePoolManager poolManager,
|
NodeManager nodeManager, NodePoolManager poolManager,
|
||||||
CommandQueue commandQueue, ExecutorService executorService) {
|
ExecutorService executorService) {
|
||||||
Preconditions.checkNotNull(pool);
|
Preconditions.checkNotNull(pool);
|
||||||
Preconditions.checkNotNull(nodeManager);
|
Preconditions.checkNotNull(nodeManager);
|
||||||
Preconditions.checkNotNull(poolManager);
|
Preconditions.checkNotNull(poolManager);
|
||||||
Preconditions.checkNotNull(commandQueue);
|
|
||||||
Preconditions.checkNotNull(executorService);
|
Preconditions.checkNotNull(executorService);
|
||||||
Preconditions.checkArgument(maxWaitTime > 0);
|
Preconditions.checkArgument(maxWaitTime > 0);
|
||||||
this.pool = pool;
|
this.pool = pool;
|
||||||
this.nodeManager = nodeManager;
|
this.nodeManager = nodeManager;
|
||||||
this.poolManager = poolManager;
|
this.poolManager = poolManager;
|
||||||
this.commandQueue = commandQueue;
|
|
||||||
this.executorService = executorService;
|
this.executorService = executorService;
|
||||||
this.containerCountMap = new ConcurrentHashMap<>();
|
this.containerCountMap = new ConcurrentHashMap<>();
|
||||||
this.processedNodeSet = new ConcurrentHashMap<>();
|
this.processedNodeSet = new ConcurrentHashMap<>();
|
||||||
|
@ -186,7 +188,7 @@ public final class InProgressPool {
|
||||||
// Queue commands to all datanodes in this pool to send us container
|
// Queue commands to all datanodes in this pool to send us container
|
||||||
// report. Since we ignore dead nodes, it is possible that we would have
|
// report. Since we ignore dead nodes, it is possible that we would have
|
||||||
// over replicated the container if the node comes back.
|
// over replicated the container if the node comes back.
|
||||||
commandQueue.addCommand(id, cmd);
|
nodeManager.addDatanodeCommand(id, cmd);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
this.status = ProgressStatus.InProgress;
|
this.status = ProgressStatus.InProgress;
|
||||||
|
@ -235,7 +237,12 @@ public final class InProgressPool {
|
||||||
*/
|
*/
|
||||||
public void handleContainerReport(
|
public void handleContainerReport(
|
||||||
ContainerReportsRequestProto containerReport) {
|
ContainerReportsRequestProto containerReport) {
|
||||||
|
if (status == ProgressStatus.InProgress) {
|
||||||
executorService.submit(processContainerReport(containerReport));
|
executorService.submit(processContainerReport(containerReport));
|
||||||
|
} else {
|
||||||
|
LOG.debug("Cannot handle container report when the pool is in {} status.",
|
||||||
|
status);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private Runnable processContainerReport(
|
private Runnable processContainerReport(
|
||||||
|
@ -292,6 +299,11 @@ public final class InProgressPool {
|
||||||
return pool.getPoolName();
|
return pool.getPoolName();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void finalizeReconciliation() {
|
||||||
|
status = ProgressStatus.Done;
|
||||||
|
//TODO: Add finalizing logic. This is where actual reconciliation happens.
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Current status of the computing replication status.
|
* Current status of the computing replication status.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -122,6 +122,12 @@ public interface NodeManager extends StorageContainerNodeProtocol,
|
||||||
*/
|
*/
|
||||||
SCMNodeMetric getNodeStat(DatanodeID datanodeID);
|
SCMNodeMetric getNodeStat(DatanodeID datanodeID);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the NodePoolManager associated with the NodeManager.
|
||||||
|
* @return NodePoolManager
|
||||||
|
*/
|
||||||
|
NodePoolManager getNodePoolManager();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Wait for the heartbeat is processed by NodeManager.
|
* Wait for the heartbeat is processed by NodeManager.
|
||||||
* @return true if heartbeat has been processed.
|
* @return true if heartbeat has been processed.
|
||||||
|
|
|
@ -857,6 +857,11 @@ public class SCMNodeManager
|
||||||
return new SCMNodeMetric(nodeStats.get(datanodeID.getDatanodeUuid()));
|
return new SCMNodeMetric(nodeStats.get(datanodeID.getDatanodeUuid()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public NodePoolManager getNodePoolManager() {
|
||||||
|
return nodePoolManager;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Map<String, Integer> getNodeCount() {
|
public Map<String, Integer> getNodeCount() {
|
||||||
Map<String, Integer> nodeCountMap = new HashMap<String, Integer>();
|
Map<String, Integer> nodeCountMap = new HashMap<String, Integer>();
|
||||||
|
|
|
@ -29,26 +29,32 @@ import org.apache.hadoop.ozone.protocol.proto
|
||||||
.StorageContainerDatanodeProtocolProtos.SCMVersionRequestProto;
|
.StorageContainerDatanodeProtocolProtos.SCMVersionRequestProto;
|
||||||
import org.apache.hadoop.ozone.scm.container.placement.metrics.SCMNodeMetric;
|
import org.apache.hadoop.ozone.scm.container.placement.metrics.SCMNodeMetric;
|
||||||
import org.apache.hadoop.ozone.scm.container.placement.metrics.SCMNodeStat;
|
import org.apache.hadoop.ozone.scm.container.placement.metrics.SCMNodeStat;
|
||||||
|
import org.apache.hadoop.ozone.scm.node.CommandQueue;
|
||||||
import org.apache.hadoop.ozone.scm.node.NodeManager;
|
import org.apache.hadoop.ozone.scm.node.NodeManager;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import org.apache.hadoop.ozone.protocol.proto.OzoneProtos.NodeState;
|
import org.apache.hadoop.ozone.protocol.proto.OzoneProtos.NodeState;
|
||||||
|
import org.apache.hadoop.ozone.scm.node.NodePoolManager;
|
||||||
|
import org.mockito.Mockito;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A Node Manager to test replication.
|
* A Node Manager to test replication.
|
||||||
*/
|
*/
|
||||||
public class ReplicationNodeManagerMock implements NodeManager {
|
public class ReplicationNodeManagerMock implements NodeManager {
|
||||||
private final Map<DatanodeID, NodeState> nodeStateMap;
|
private final Map<DatanodeID, NodeState> nodeStateMap;
|
||||||
|
private final CommandQueue commandQueue;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A list of Datanodes and current states.
|
* A list of Datanodes and current states.
|
||||||
* @param nodeState A node state map.
|
* @param nodeState A node state map.
|
||||||
*/
|
*/
|
||||||
public ReplicationNodeManagerMock(Map<DatanodeID, NodeState> nodeState) {
|
public ReplicationNodeManagerMock(Map<DatanodeID, NodeState> nodeState,
|
||||||
|
CommandQueue commandQueue) {
|
||||||
Preconditions.checkNotNull(nodeState);
|
Preconditions.checkNotNull(nodeState);
|
||||||
nodeStateMap = nodeState;
|
this.nodeStateMap = nodeState;
|
||||||
|
this.commandQueue = commandQueue;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -194,6 +200,11 @@ public class ReplicationNodeManagerMock implements NodeManager {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public NodePoolManager getNodePoolManager() {
|
||||||
|
return Mockito.mock(NodePoolManager.class);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Wait for the heartbeat is processed by NodeManager.
|
* Wait for the heartbeat is processed by NodeManager.
|
||||||
*
|
*
|
||||||
|
@ -304,4 +315,9 @@ public class ReplicationNodeManagerMock implements NodeManager {
|
||||||
nodeStateMap.put(id, state);
|
nodeStateMap.put(id, state);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void addDatanodeCommand(DatanodeID id, SCMCommand command) {
|
||||||
|
this.commandQueue.addCommand(id, command);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -28,8 +28,7 @@ import org.apache.hadoop.ozone.container.common.SCMTestUtils;
|
||||||
import org.apache.hadoop.ozone.protocol.proto.OzoneProtos.NodeState;
|
import org.apache.hadoop.ozone.protocol.proto.OzoneProtos.NodeState;
|
||||||
import org.apache.hadoop.ozone.protocol.proto
|
import org.apache.hadoop.ozone.protocol.proto
|
||||||
.StorageContainerDatanodeProtocolProtos.ContainerReportsRequestProto;
|
.StorageContainerDatanodeProtocolProtos.ContainerReportsRequestProto;
|
||||||
import org.apache.hadoop.ozone.scm.container.replication
|
import org.apache.hadoop.ozone.scm.container.replication.ContainerSupervisor;
|
||||||
.ContainerReplicationManager;
|
|
||||||
import org.apache.hadoop.ozone.scm.container.replication.InProgressPool;
|
import org.apache.hadoop.ozone.scm.container.replication.InProgressPool;
|
||||||
import org.apache.hadoop.ozone.scm.node.CommandQueue;
|
import org.apache.hadoop.ozone.scm.node.CommandQueue;
|
||||||
import org.apache.hadoop.ozone.scm.node.NodeManager;
|
import org.apache.hadoop.ozone.scm.node.NodeManager;
|
||||||
|
@ -53,35 +52,37 @@ import java.util.concurrent.TimeoutException;
|
||||||
import static org.apache.hadoop.ozone.protocol.proto.OzoneProtos.NodeState.HEALTHY;
|
import static org.apache.hadoop.ozone.protocol.proto.OzoneProtos.NodeState.HEALTHY;
|
||||||
import static org.apache.hadoop.scm.ScmConfigKeys
|
import static org.apache.hadoop.scm.ScmConfigKeys
|
||||||
.OZONE_SCM_CONTAINER_REPORTS_WAIT_TIMEOUT;
|
.OZONE_SCM_CONTAINER_REPORTS_WAIT_TIMEOUT;
|
||||||
|
import static org.apache.hadoop.scm.ScmConfigKeys
|
||||||
|
.OZONE_SCM_CONTAINER_REPORT_PROCESSING_INTERVAL;
|
||||||
import static org.apache.ratis.shaded.com.google.common.util.concurrent
|
import static org.apache.ratis.shaded.com.google.common.util.concurrent
|
||||||
.Uninterruptibles.sleepUninterruptibly;
|
.Uninterruptibles.sleepUninterruptibly;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tests for the container manager.
|
* Tests for the container manager.
|
||||||
*/
|
*/
|
||||||
public class TestContainerReplicationManager {
|
public class TestContainerSupervisor {
|
||||||
final static String POOL_NAME_TEMPLATE = "Pool%d";
|
final static String POOL_NAME_TEMPLATE = "Pool%d";
|
||||||
static final int MAX_DATANODES = 72;
|
static final int MAX_DATANODES = 72;
|
||||||
static final int POOL_SIZE = 24;
|
static final int POOL_SIZE = 24;
|
||||||
static final int POOL_COUNT = 3;
|
static final int POOL_COUNT = 3;
|
||||||
private LogCapturer logCapturer = LogCapturer.captureLogs(
|
private LogCapturer logCapturer = LogCapturer.captureLogs(
|
||||||
LogFactory.getLog(ContainerReplicationManager.class));
|
LogFactory.getLog(ContainerSupervisor.class));
|
||||||
private List<DatanodeID> datanodes = new LinkedList<>();
|
private List<DatanodeID> datanodes = new LinkedList<>();
|
||||||
private NodeManager nodeManager;
|
private NodeManager nodeManager;
|
||||||
private NodePoolManager poolManager;
|
private NodePoolManager poolManager;
|
||||||
private CommandQueue commandQueue;
|
private CommandQueue commandQueue;
|
||||||
private ContainerReplicationManager replicationManager;
|
private ContainerSupervisor containerSupervisor;
|
||||||
private ReplicationDatanodeStateManager datanodeStateManager;
|
private ReplicationDatanodeStateManager datanodeStateManager;
|
||||||
|
|
||||||
@After
|
@After
|
||||||
public void tearDown() throws Exception {
|
public void tearDown() throws Exception {
|
||||||
logCapturer.stopCapturing();
|
logCapturer.stopCapturing();
|
||||||
GenericTestUtils.setLogLevel(ContainerReplicationManager.LOG, Level.INFO);
|
GenericTestUtils.setLogLevel(ContainerSupervisor.LOG, Level.INFO);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Before
|
@Before
|
||||||
public void setUp() throws Exception {
|
public void setUp() throws Exception {
|
||||||
GenericTestUtils.setLogLevel(ContainerReplicationManager.LOG, Level.DEBUG);
|
GenericTestUtils.setLogLevel(ContainerSupervisor.LOG, Level.DEBUG);
|
||||||
Map<DatanodeID, NodeState> nodeStateMap = new HashMap<>();
|
Map<DatanodeID, NodeState> nodeStateMap = new HashMap<>();
|
||||||
// We are setting up 3 pools with 24 nodes each in this cluster.
|
// We are setting up 3 pools with 24 nodes each in this cluster.
|
||||||
// First we create 72 Datanodes.
|
// First we create 72 Datanodes.
|
||||||
|
@ -91,11 +92,13 @@ public class TestContainerReplicationManager {
|
||||||
nodeStateMap.put(datanode, HEALTHY);
|
nodeStateMap.put(datanode, HEALTHY);
|
||||||
}
|
}
|
||||||
|
|
||||||
// All nodes in this cluster are healthy for time being.
|
|
||||||
nodeManager = new ReplicationNodeManagerMock(nodeStateMap);
|
|
||||||
poolManager = new ReplicationNodePoolManagerMock();
|
|
||||||
commandQueue = new CommandQueue();
|
commandQueue = new CommandQueue();
|
||||||
|
|
||||||
|
// All nodes in this cluster are healthy for time being.
|
||||||
|
nodeManager = new ReplicationNodeManagerMock(nodeStateMap, commandQueue);
|
||||||
|
poolManager = new ReplicationNodePoolManagerMock();
|
||||||
|
|
||||||
|
|
||||||
Assert.assertEquals("Max datanodes should be equal to POOL_SIZE * " +
|
Assert.assertEquals("Max datanodes should be equal to POOL_SIZE * " +
|
||||||
"POOL_COUNT", POOL_COUNT * POOL_SIZE, MAX_DATANODES);
|
"POOL_COUNT", POOL_COUNT * POOL_SIZE, MAX_DATANODES);
|
||||||
|
|
||||||
|
@ -108,10 +111,12 @@ public class TestContainerReplicationManager {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
OzoneConfiguration config = SCMTestUtils.getOzoneConf();
|
OzoneConfiguration config = SCMTestUtils.getOzoneConf();
|
||||||
config.setTimeDuration(OZONE_SCM_CONTAINER_REPORTS_WAIT_TIMEOUT, 1,
|
config.setTimeDuration(OZONE_SCM_CONTAINER_REPORTS_WAIT_TIMEOUT, 2,
|
||||||
TimeUnit.SECONDS);
|
TimeUnit.SECONDS);
|
||||||
replicationManager = new ContainerReplicationManager(config,
|
config.setTimeDuration(OZONE_SCM_CONTAINER_REPORT_PROCESSING_INTERVAL, 1,
|
||||||
nodeManager, poolManager, commandQueue);
|
TimeUnit.SECONDS);
|
||||||
|
containerSupervisor = new ContainerSupervisor(config,
|
||||||
|
nodeManager, poolManager);
|
||||||
datanodeStateManager = new ReplicationDatanodeStateManager(nodeManager,
|
datanodeStateManager = new ReplicationDatanodeStateManager(nodeManager,
|
||||||
poolManager);
|
poolManager);
|
||||||
// Sleep for one second to make sure all threads get time to run.
|
// Sleep for one second to make sure all threads get time to run.
|
||||||
|
@ -125,13 +130,13 @@ public class TestContainerReplicationManager {
|
||||||
public void testAssertPoolsAreProcessed() {
|
public void testAssertPoolsAreProcessed() {
|
||||||
// This asserts that replication manager has started processing at least
|
// This asserts that replication manager has started processing at least
|
||||||
// one pool.
|
// one pool.
|
||||||
Assert.assertTrue(replicationManager.getInProgressPoolCount() > 0);
|
Assert.assertTrue(containerSupervisor.getInProgressPoolCount() > 0);
|
||||||
|
|
||||||
// Since all datanodes are flagged as healthy in this test, for each
|
// Since all datanodes are flagged as healthy in this test, for each
|
||||||
// datanode we must have queued a command.
|
// datanode we must have queued a command.
|
||||||
Assert.assertEquals("Commands are in queue :", commandQueue
|
Assert.assertEquals("Commands are in queue :",
|
||||||
.getCommandsInQueue(), POOL_SIZE * replicationManager
|
POOL_SIZE * containerSupervisor.getInProgressPoolCount(),
|
||||||
.getInProgressPoolCount());
|
commandQueue.getCommandsInQueue());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -144,7 +149,7 @@ public class TestContainerReplicationManager {
|
||||||
InterruptedException {
|
InterruptedException {
|
||||||
String singleNodeContainer = "SingleNodeContainer";
|
String singleNodeContainer = "SingleNodeContainer";
|
||||||
String threeNodeContainer = "ThreeNodeContainer";
|
String threeNodeContainer = "ThreeNodeContainer";
|
||||||
InProgressPool ppool = replicationManager.getInProcessPoolList().get(0);
|
InProgressPool ppool = containerSupervisor.getInProcessPoolList().get(0);
|
||||||
// Only single datanode reporting that "SingleNodeContainer" exists.
|
// Only single datanode reporting that "SingleNodeContainer" exists.
|
||||||
List<ContainerReportsRequestProto> clist =
|
List<ContainerReportsRequestProto> clist =
|
||||||
datanodeStateManager.getContainerReport(singleNodeContainer,
|
datanodeStateManager.getContainerReport(singleNodeContainer,
|
||||||
|
@ -180,7 +185,7 @@ public class TestContainerReplicationManager {
|
||||||
String normalContainer = "NormalContainer";
|
String normalContainer = "NormalContainer";
|
||||||
String overReplicated = "OverReplicatedContainer";
|
String overReplicated = "OverReplicatedContainer";
|
||||||
String wayOverReplicated = "WayOverReplicated";
|
String wayOverReplicated = "WayOverReplicated";
|
||||||
InProgressPool ppool = replicationManager.getInProcessPoolList().get(0);
|
InProgressPool ppool = containerSupervisor.getInProcessPoolList().get(0);
|
||||||
|
|
||||||
List<ContainerReportsRequestProto> clist =
|
List<ContainerReportsRequestProto> clist =
|
||||||
datanodeStateManager.getContainerReport(normalContainer,
|
datanodeStateManager.getContainerReport(normalContainer,
|
||||||
|
@ -221,7 +226,7 @@ public class TestContainerReplicationManager {
|
||||||
public void testAllPoolsAreProcessed() throws TimeoutException,
|
public void testAllPoolsAreProcessed() throws TimeoutException,
|
||||||
InterruptedException {
|
InterruptedException {
|
||||||
// Verify that we saw all three pools being picked up for processing.
|
// Verify that we saw all three pools being picked up for processing.
|
||||||
GenericTestUtils.waitFor(() -> replicationManager.getPoolProcessCount()
|
GenericTestUtils.waitFor(() -> containerSupervisor.getPoolProcessCount()
|
||||||
>= 3, 200, 15 * 1000);
|
>= 3, 200, 15 * 1000);
|
||||||
Assert.assertTrue(logCapturer.getOutput().contains("Pool1") &&
|
Assert.assertTrue(logCapturer.getOutput().contains("Pool1") &&
|
||||||
logCapturer.getOutput().contains("Pool2") &&
|
logCapturer.getOutput().contains("Pool2") &&
|
||||||
|
@ -253,7 +258,7 @@ public class TestContainerReplicationManager {
|
||||||
List<ContainerReportsRequestProto> clist =
|
List<ContainerReportsRequestProto> clist =
|
||||||
datanodeStateManager.getContainerReport("NewContainer1",
|
datanodeStateManager.getContainerReport("NewContainer1",
|
||||||
"PoolNew", 1);
|
"PoolNew", 1);
|
||||||
replicationManager.handleContainerReport(clist.get(0));
|
containerSupervisor.handleContainerReport(clist.get(0));
|
||||||
GenericTestUtils.waitFor(() ->
|
GenericTestUtils.waitFor(() ->
|
||||||
inProgressLog.getOutput().contains("NewContainer1") && inProgressLog
|
inProgressLog.getOutput().contains("NewContainer1") && inProgressLog
|
||||||
.getOutput().contains(id.getDatanodeUuid()), 200, 10 * 1000);
|
.getOutput().contains(id.getDatanodeUuid()), 200, 10 * 1000);
|
|
@ -35,6 +35,8 @@ import org.apache.hadoop.ozone.protocol.proto
|
||||||
import org.apache.hadoop.ozone.scm.container.placement.metrics.SCMNodeMetric;
|
import org.apache.hadoop.ozone.scm.container.placement.metrics.SCMNodeMetric;
|
||||||
import org.apache.hadoop.ozone.scm.container.placement.metrics.SCMNodeStat;
|
import org.apache.hadoop.ozone.scm.container.placement.metrics.SCMNodeStat;
|
||||||
import org.apache.hadoop.ozone.scm.node.NodeManager;
|
import org.apache.hadoop.ozone.scm.node.NodeManager;
|
||||||
|
import org.apache.hadoop.ozone.scm.node.NodePoolManager;
|
||||||
|
import org.mockito.Mockito;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
@ -269,6 +271,11 @@ public class MockNodeManager implements NodeManager {
|
||||||
return new SCMNodeMetric(nodeMetricMap.get(datanodeID.toString()));
|
return new SCMNodeMetric(nodeMetricMap.get(datanodeID.toString()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public NodePoolManager getNodePoolManager() {
|
||||||
|
return Mockito.mock(NodePoolManager.class);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Used for testing.
|
* Used for testing.
|
||||||
*
|
*
|
||||||
|
|
|
@ -203,8 +203,7 @@ public class TestContainerMapping {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testFullContainerReport() throws IOException,
|
public void testFullContainerReport() throws IOException {
|
||||||
InterruptedException {
|
|
||||||
String containerName = UUID.randomUUID().toString();
|
String containerName = UUID.randomUUID().toString();
|
||||||
ContainerInfo info = createContainer(containerName);
|
ContainerInfo info = createContainer(containerName);
|
||||||
DatanodeID datanodeID = SCMTestUtils.getDatanodeID();
|
DatanodeID datanodeID = SCMTestUtils.getDatanodeID();
|
||||||
|
@ -227,7 +226,13 @@ public class TestContainerMapping {
|
||||||
.setContainerID(info.getContainerID());
|
.setContainerID(info.getContainerID());
|
||||||
|
|
||||||
reports.add(ciBuilder.build());
|
reports.add(ciBuilder.build());
|
||||||
mapping.processContainerReports(datanodeID, reportType, reports);
|
|
||||||
|
ContainerReportsRequestProto.Builder crBuilder =
|
||||||
|
ContainerReportsRequestProto.newBuilder();
|
||||||
|
crBuilder.setDatanodeID(datanodeID.getProtoBufMessage())
|
||||||
|
.setType(reportType).addAllReports(reports);
|
||||||
|
|
||||||
|
mapping.processContainerReports(crBuilder.build());
|
||||||
|
|
||||||
ContainerInfo updatedContainer = mapping.getContainer(containerName);
|
ContainerInfo updatedContainer = mapping.getContainer(containerName);
|
||||||
Assert.assertEquals(100000000L, updatedContainer.getNumberOfKeys());
|
Assert.assertEquals(100000000L, updatedContainer.getNumberOfKeys());
|
||||||
|
@ -260,7 +265,12 @@ public class TestContainerMapping {
|
||||||
|
|
||||||
reports.add(ciBuilder.build());
|
reports.add(ciBuilder.build());
|
||||||
|
|
||||||
mapping.processContainerReports(datanodeID, reportType, reports);
|
ContainerReportsRequestProto.Builder crBuilder =
|
||||||
|
ContainerReportsRequestProto.newBuilder();
|
||||||
|
crBuilder.setDatanodeID(datanodeID.getProtoBufMessage())
|
||||||
|
.setType(reportType).addAllReports(reports);
|
||||||
|
|
||||||
|
mapping.processContainerReports(crBuilder.build());
|
||||||
|
|
||||||
ContainerInfo updatedContainer = mapping.getContainer(containerName);
|
ContainerInfo updatedContainer = mapping.getContainer(containerName);
|
||||||
Assert.assertEquals(500000000L, updatedContainer.getNumberOfKeys());
|
Assert.assertEquals(500000000L, updatedContainer.getNumberOfKeys());
|
||||||
|
|
Loading…
Reference in New Issue