HDFS-12787. Ozone: SCM: Aggregate the metrics from all the container reports. Contributed by Yiqun Lin.

This commit is contained in:
Xiaoyu Yao 2017-11-20 11:03:17 -08:00 committed by Owen O'Malley
parent cbe4f314b3
commit c8d8270f72
11 changed files with 438 additions and 44 deletions

View File

@ -18,6 +18,8 @@
package org.apache.hadoop.ozone.scm;
import java.util.Map;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.jmx.ServiceRuntimeInfo;
@ -39,4 +41,10 @@ public interface SCMMXBean extends ServiceRuntimeInfo {
* @return SCM client RPC server port
*/
String getClientRpcPort();
/**
* Get container report info that includes container IO stats of nodes.
* @return The datanodeUUid to report json string mapping
*/
Map<String, String> getContainerReport();
}

View File

@ -19,6 +19,10 @@ package org.apache.hadoop.ozone.scm;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.RemovalListener;
import com.google.common.cache.RemovalNotification;
import com.google.protobuf.BlockingService;
import com.google.protobuf.InvalidProtocolBufferException;
import org.apache.hadoop.classification.InterfaceAudience;
@ -104,6 +108,8 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.TimeUnit;
import java.util.Collections;
import java.util.stream.Collectors;
@ -204,6 +210,9 @@ public class StorageContainerManager extends ServiceRuntimeInfoImpl
/** SCM metrics. */
private static SCMMetrics metrics;
/** Key = DatanodeUuid, value = ContainerStat. */
private Cache<String, ContainerStat> containerReportCache;
private static final String USAGE =
"Usage: \n hdfs scm [ " + StartupOption.INIT.getName() + " [ "
@ -225,13 +234,15 @@ public class StorageContainerManager extends ServiceRuntimeInfoImpl
OZONE_SCM_DB_CACHE_SIZE_DEFAULT);
StorageContainerManager.initMetrics();
initContainerReportCache(conf);
scmStorage = new SCMStorage(conf);
String clusterId = scmStorage.getClusterID();
if (clusterId == null) {
throw new SCMException("clusterId not found",
ResultCodes.SCM_NOT_INITIALIZED);
}
scmNodeManager = new SCMNodeManager(conf, scmStorage.getClusterID());
scmNodeManager = new SCMNodeManager(conf, scmStorage.getClusterID(), this);
scmContainerManager = new ContainerMapping(conf, scmNodeManager, cacheSize);
scmBlockManager = new BlockManagerImpl(conf, scmNodeManager,
scmContainerManager, cacheSize);
@ -297,6 +308,31 @@ public class StorageContainerManager extends ServiceRuntimeInfoImpl
registerMXBean();
}
/**
* Initialize container reports cache that sent from datanodes.
*
* @param conf
*/
private void initContainerReportCache(OzoneConfiguration conf) {
containerReportCache = CacheBuilder.newBuilder()
.expireAfterAccess(Long.MAX_VALUE, TimeUnit.MILLISECONDS)
.maximumSize(Integer.MAX_VALUE)
.removalListener(new RemovalListener<String, ContainerStat>() {
@Override
public void onRemoval(
RemovalNotification<String, ContainerStat> removalNotification) {
synchronized (containerReportCache) {
ContainerStat stat = removalNotification.getValue();
// remove invalid container report
metrics.decrContainerStat(stat);
LOG.debug(
"Remove expired container stat entry for datanode: {}.",
removalNotification.getKey());
}
}
}).build();
}
/**
* Builds a message for logging startup information about an RPC server.
*
@ -836,7 +872,15 @@ public class StorageContainerManager extends ServiceRuntimeInfoImpl
LOG.error("SCM block manager service stop failed.", ex);
}
if (containerReportCache != null) {
containerReportCache.invalidateAll();
containerReportCache.cleanUp();
}
if (metrics != null) {
metrics.unRegister();
}
unregisterMXBean();
IOUtils.cleanupWithLogger(LOG, scmContainerManager);
IOUtils.cleanupWithLogger(LOG, scmBlockManager);
@ -917,27 +961,7 @@ public class StorageContainerManager extends ServiceRuntimeInfoImpl
@Override
public ContainerReportsResponseProto sendContainerReport(
ContainerReportsRequestProto reports) throws IOException {
// TODO: We should update the logic once incremental container report
// type is supported.
if (reports.getType() ==
ContainerReportsRequestProto.reportType.fullReport) {
ContainerStat stat = new ContainerStat();
for (StorageContainerDatanodeProtocolProtos.ContainerInfo info : reports
.getReportsList()) {
stat.add(new ContainerStat(info.getSize(), info.getUsed(),
info.getKeyCount(), info.getReadBytes(), info.getWriteBytes(),
info.getReadCount(), info.getWriteCount()));
}
// update container metrics
metrics.setLastContainerReportSize(stat.getSize().get());
metrics.setLastContainerReportUsed(stat.getUsed().get());
metrics.setLastContainerReportKeyCount(stat.getKeyCount().get());
metrics.setLastContainerReportReadBytes(stat.getReadBytes().get());
metrics.setLastContainerReportWriteBytes(stat.getWriteBytes().get());
metrics.setLastContainerReportReadCount(stat.getReadCount().get());
metrics.setLastContainerReportWriteCount(stat.getWriteCount().get());
}
updateContainerReportMetrics(reports);
// should we process container reports async?
scmContainerManager.processContainerReports(
@ -946,6 +970,37 @@ public class StorageContainerManager extends ServiceRuntimeInfoImpl
return ContainerReportsResponseProto.newBuilder().build();
}
private void updateContainerReportMetrics(
ContainerReportsRequestProto reports) {
ContainerStat newStat = null;
// TODO: We should update the logic once incremental container report
// type is supported.
if (reports
.getType() == ContainerReportsRequestProto.reportType.fullReport) {
newStat = new ContainerStat();
for (StorageContainerDatanodeProtocolProtos.ContainerInfo info : reports
.getReportsList()) {
newStat.add(new ContainerStat(info.getSize(), info.getUsed(),
info.getKeyCount(), info.getReadBytes(), info.getWriteBytes(),
info.getReadCount(), info.getWriteCount()));
}
// update container metrics
metrics.setLastContainerStat(newStat);
}
// Update container stat entry, this will trigger a removal operation if it
// exists in cache.
synchronized (containerReportCache) {
String datanodeUuid = reports.getDatanodeID().getDatanodeUuid();
if (datanodeUuid != null && newStat != null) {
containerReportCache.put(datanodeUuid, newStat);
// update global view container metrics
metrics.incrContainerStat(newStat);
}
}
}
/**
* Handles the block deletion ACKs sent by datanodes. Once ACKs recieved,
* SCM considers the blocks are deleted and update the metadata in SCM DB.
@ -1124,4 +1179,53 @@ public class StorageContainerManager extends ServiceRuntimeInfoImpl
public static SCMMetrics getMetrics() {
return metrics == null ? SCMMetrics.create() : metrics;
}
/**
* Invalidate container stat entry for given datanode.
*
* @param datanodeUuid
*/
public void removeContainerReport(String datanodeUuid) {
synchronized (containerReportCache) {
containerReportCache.invalidate(datanodeUuid);
}
}
/**
* Get container stat of specified datanode.
*
* @param datanodeUuid
* @return
*/
public ContainerStat getContainerReport(String datanodeUuid) {
ContainerStat stat = null;
synchronized (containerReportCache) {
stat = containerReportCache.getIfPresent(datanodeUuid);
}
return stat;
}
/**
* Returns a view of the container stat entries. Modifications made to the
* map will directly affect the cache.
*
* @return
*/
public ConcurrentMap<String, ContainerStat> getContainerReportCache() {
return containerReportCache.asMap();
}
@Override
public Map<String, String> getContainerReport() {
Map<String, String> id2StatMap = new HashMap<>();
synchronized (containerReportCache) {
ConcurrentMap<String, ContainerStat> map = containerReportCache.asMap();
for (Map.Entry<String, ContainerStat> entry : map.entrySet()) {
id2StatMap.put(entry.getKey(), entry.getValue().toJsonString());
}
}
return id2StatMap;
}
}

View File

@ -17,6 +17,11 @@
*/
package org.apache.hadoop.ozone.scm.container.placement.metrics;
import java.io.IOException;
import org.apache.hadoop.ozone.web.utils.JsonUtils;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Preconditions;
/**
@ -26,36 +31,43 @@ public class ContainerStat {
/**
* The maximum container size.
*/
@JsonProperty("Size")
private LongMetric size;
/**
* The number of bytes used by the container.
*/
@JsonProperty("Used")
private LongMetric used;
/**
* The number of keys in the container.
*/
@JsonProperty("KeyCount")
private LongMetric keyCount;
/**
* The number of bytes read from the container.
*/
@JsonProperty("ReadBytes")
private LongMetric readBytes;
/**
* The number of bytes write into the container.
*/
@JsonProperty("WriteBytes")
private LongMetric writeBytes;
/**
* The number of times the container is read.
*/
@JsonProperty("ReadCount")
private LongMetric readCount;
/**
* The number of times the container is written into.
*/
@JsonProperty("WriteCount")
private LongMetric writeCount;
public ContainerStat() {
@ -117,6 +129,10 @@ public class ContainerStat {
}
public void add(ContainerStat stat) {
if (stat == null) {
return;
}
this.size.add(stat.getSize().get());
this.used.add(stat.getUsed().get());
this.keyCount.add(stat.getKeyCount().get());
@ -125,4 +141,26 @@ public class ContainerStat {
this.readCount.add(stat.getReadCount().get());
this.writeCount.add(stat.getWriteCount().get());
}
public void subtract(ContainerStat stat) {
if (stat == null) {
return;
}
this.size.subtract(stat.getSize().get());
this.used.subtract(stat.getUsed().get());
this.keyCount.subtract(stat.getKeyCount().get());
this.readBytes.subtract(stat.getReadBytes().get());
this.writeBytes.subtract(stat.getWriteBytes().get());
this.readCount.subtract(stat.getReadCount().get());
this.writeCount.subtract(stat.getWriteCount().get());
}
public String toJsonString() {
try {
return JsonUtils.toJsonString(this);
} catch (IOException ignored) {
return null;
}
}
}

View File

@ -16,9 +16,13 @@
*/
package org.apache.hadoop.ozone.scm.container.placement.metrics;
import com.fasterxml.jackson.annotation.JsonAutoDetect;
import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility;
/**
* An helper class for all metrics based on Longs.
*/
@JsonAutoDetect(fieldVisibility = Visibility.ANY)
public class LongMetric implements DatanodeMetric<Long, Long> {
private Long value;

View File

@ -21,6 +21,7 @@ import org.apache.hadoop.metrics2.MetricsSystem;
import org.apache.hadoop.metrics2.annotation.Metric;
import org.apache.hadoop.metrics2.annotation.Metrics;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.metrics2.lib.MutableCounterLong;
import org.apache.hadoop.metrics2.lib.MutableGaugeLong;
/**
@ -43,6 +44,14 @@ public class SCMMetrics {
@Metric private MutableGaugeLong lastContainerReportReadCount;
@Metric private MutableGaugeLong lastContainerReportWriteCount;
@Metric private MutableCounterLong containerReportSize;
@Metric private MutableCounterLong containerReportUsed;
@Metric private MutableCounterLong containerReportKeyCount;
@Metric private MutableCounterLong containerReportReadBytes;
@Metric private MutableCounterLong containerReportWriteBytes;
@Metric private MutableCounterLong containerReportReadCount;
@Metric private MutableCounterLong containerReportWriteCount;
public SCMMetrics() {
}
@ -80,6 +89,64 @@ public class SCMMetrics {
this.lastContainerReportWriteCount.set(writeCount);
}
public void incrContainerReportSize(long size) {
this.containerReportSize.incr(size);
}
public void incrContainerReportUsed(long used) {
this.containerReportUsed.incr(used);
}
public void incrContainerReportKeyCount(long keyCount) {
this.containerReportKeyCount.incr(keyCount);
}
public void incrContainerReportReadBytes(long readBytes) {
this.containerReportReadBytes.incr(readBytes);
}
public void incrContainerReportWriteBytes(long writeBytes) {
this.containerReportWriteBytes.incr(writeBytes);
}
public void incrContainerReportReadCount(long readCount) {
this.containerReportReadCount.incr(readCount);
}
public void incrContainerReportWriteCount(long writeCount) {
this.containerReportWriteCount.incr(writeCount);
}
public void setLastContainerStat(ContainerStat newStat) {
this.lastContainerReportSize.set(newStat.getSize().get());
this.lastContainerReportUsed.set(newStat.getUsed().get());
this.lastContainerReportKeyCount.set(newStat.getKeyCount().get());
this.lastContainerReportReadBytes.set(newStat.getReadBytes().get());
this.lastContainerReportWriteBytes.set(newStat.getWriteBytes().get());
this.lastContainerReportReadCount.set(newStat.getReadCount().get());
this.lastContainerReportWriteCount.set(newStat.getWriteCount().get());
}
public void incrContainerStat(ContainerStat deltaStat) {
this.containerReportSize.incr(deltaStat.getSize().get());
this.containerReportUsed.incr(deltaStat.getUsed().get());
this.containerReportKeyCount.incr(deltaStat.getKeyCount().get());
this.containerReportReadBytes.incr(deltaStat.getReadBytes().get());
this.containerReportWriteBytes.incr(deltaStat.getWriteBytes().get());
this.containerReportReadCount.incr(deltaStat.getReadCount().get());
this.containerReportWriteCount.incr(deltaStat.getWriteCount().get());
}
public void decrContainerStat(ContainerStat deltaStat) {
this.containerReportSize.incr(-1 * deltaStat.getSize().get());
this.containerReportUsed.incr(-1 * deltaStat.getUsed().get());
this.containerReportKeyCount.incr(-1 * deltaStat.getKeyCount().get());
this.containerReportReadBytes.incr(-1 * deltaStat.getReadBytes().get());
this.containerReportWriteBytes.incr(-1 * deltaStat.getWriteBytes().get());
this.containerReportReadCount.incr(-1 * deltaStat.getReadCount().get());
this.containerReportWriteCount.incr(-1 * deltaStat.getWriteCount().get());
}
public void unRegister() {
MetricsSystem ms = DefaultMetricsSystem.instance();
ms.unregisterSource(SOURCE_NAME);

View File

@ -43,7 +43,7 @@ import org.apache.hadoop.ozone.protocol
.proto.StorageContainerDatanodeProtocolProtos.SCMNodeReport;
import org.apache.hadoop.ozone.protocol
.proto.StorageContainerDatanodeProtocolProtos.SCMStorageReport;
import org.apache.hadoop.ozone.scm.StorageContainerManager;
import org.apache.hadoop.ozone.scm.VersionInfo;
import org.apache.hadoop.ozone.scm.container.placement.metrics.SCMNodeMetric;
import org.apache.hadoop.ozone.scm.container.placement.metrics.SCMNodeStat;
@ -121,7 +121,7 @@ public class SCMNodeManager
private final AtomicInteger staleNodeCount;
private final AtomicInteger deadNodeCount;
private final AtomicInteger totalNodes;
private final long staleNodeIntervalMs;
private long staleNodeIntervalMs;
private final long deadNodeIntervalMs;
private final long heartbeatCheckerIntervalMs;
private final long datanodeHBIntervalSeconds;
@ -150,12 +150,13 @@ public class SCMNodeManager
// Node pool manager.
private final SCMNodePoolManager nodePoolManager;
private final StorageContainerManager scmManager;
/**
* Constructs SCM machine Manager.
*/
public SCMNodeManager(OzoneConfiguration conf, String clusterID)
throws IOException {
public SCMNodeManager(OzoneConfiguration conf, String clusterID,
StorageContainerManager scmManager) throws IOException {
heartbeatQueue = new ConcurrentLinkedQueue<>();
healthyNodes = new ConcurrentHashMap<>();
deadNodes = new ConcurrentHashMap<>();
@ -197,6 +198,7 @@ public class SCMNodeManager
registerMXBean();
this.nodePoolManager = new SCMNodePoolManager(conf);
this.scmManager = scmManager;
}
private void registerMXBean() {
@ -551,6 +553,11 @@ public class SCMNodeManager
healthyNodeCount.decrementAndGet();
staleNodes.put(entry.getKey(), entry.getValue());
staleNodeCount.incrementAndGet();
if (scmManager != null) {
// remove stale node's container report
scmManager.removeContainerReport(entry.getKey());
}
}
/**
@ -863,4 +870,9 @@ public class SCMNodeManager
public void addDatanodeCommand(DatanodeID id, SCMCommand command) {
this.commandQueue.addCommand(id, command);
}
@VisibleForTesting
public void setStaleNodeIntervalMs(long interval) {
this.staleNodeIntervalMs = interval;
}
}

View File

@ -110,13 +110,20 @@ Following are the counters for containers:
| Name | Description |
|:---- |:---- |
| `LastContainerReportSize` | Total size in bytes of all containers |
| `LastContainerReportUsed` | Total number of bytes used by all containers |
| `LastContainerReportKeyCount` | Total number of keys in all containers |
| `LastContainerReportReadBytes` | Total number of bytes have been read from all containers |
| `LastContainerReportWriteBytes` | Total number of bytes have been written into all containers |
| `LastContainerReportReadCount` | Total number of times containers have been read from |
| `LastContainerReportWriteCount` | Total number of times containers have been written to |
| `LastContainerReportSize` | Total size in bytes of all containers in latest container report that SCM received from datanode |
| `LastContainerReportUsed` | Total number of bytes used by all containers in latest container report that SCM received from datanode |
| `LastContainerReportKeyCount` | Total number of keys in all containers in latest container report that SCM received from datanode |
| `LastContainerReportReadBytes` | Total number of bytes have been read from all containers in latest container report that SCM received from datanode |
| `LastContainerReportWriteBytes` | Total number of bytes have been written into all containers in latest container report that SCM received from datanode |
| `LastContainerReportReadCount` | Total number of times containers have been read from in latest container report that SCM received from datanode |
| `LastContainerReportWriteCount` | Total number of times containers have been written to in latest container report that SCM received from datanode |
| `ContainerReportSize` | Total size in bytes of all containers over whole cluster |
| `ContainerReportUsed` | Total number of bytes used by all containers over whole cluster |
| `ContainerReportKeyCount` | Total number of keys in all containers over whole cluster |
| `ContainerReportReadBytes` | Total number of bytes have been read from all containers over whole cluster |
| `ContainerReportWriteBytes` | Total number of bytes have been written into all containers over whole cluster |
| `ContainerReportReadCount` | Total number of times containers have been read from over whole cluster |
| `ContainerReportWriteCount` | Total number of times containers have been written to over whole cluster |
### Key Space Metrics

View File

@ -25,6 +25,7 @@ import org.apache.hadoop.ozone.MiniOzoneClassicCluster;
import org.apache.hadoop.ozone.MiniOzoneCluster;
import org.apache.hadoop.conf.OzoneConfiguration;
import org.apache.hadoop.ozone.OzoneConsts;
import org.apache.hadoop.ozone.scm.container.placement.metrics.ContainerStat;
import org.apache.hadoop.ozone.scm.node.NodeManager;
import org.junit.BeforeClass;
import org.junit.AfterClass;
@ -40,6 +41,7 @@ import java.io.IOException;
import java.lang.management.ManagementFactory;
import java.util.Map;
import java.util.Iterator;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.TimeoutException;
import javax.management.openmbean.CompositeData;
@ -91,6 +93,24 @@ public class TestSCMMXBean {
String clientRpcPort = (String)mbs.getAttribute(bean,
"ClientRpcPort");
assertEquals(scm.getClientRpcPort(), clientRpcPort);
ConcurrentMap<String, ContainerStat> map = scm.getContainerReportCache();
ContainerStat stat = new ContainerStat(1, 2, 3, 4, 5, 6, 7);
map.put("nodeID", stat);
TabularData data = (TabularData) mbs.getAttribute(
bean, "ContainerReport");
// verify report info
assertEquals(1, data.values().size());
for (Object obj : data.values()) {
assertTrue(obj instanceof CompositeData);
CompositeData d = (CompositeData) obj;
Iterator<?> it = d.values().iterator();
String key = it.next().toString();
String value = it.next().toString();
assertEquals("nodeID", key);
assertEquals(stat.toJsonString(), value);
}
}
@Test

View File

@ -17,6 +17,7 @@
*/
package org.apache.hadoop.ozone.scm;
import static org.apache.hadoop.test.MetricsAsserts.getLongCounter;
import static org.apache.hadoop.test.MetricsAsserts.getLongGauge;
import static org.apache.hadoop.test.MetricsAsserts.getMetrics;
import static org.junit.Assert.assertEquals;
@ -25,9 +26,10 @@ import java.util.UUID;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.hadoop.conf.OzoneConfiguration;
import org.apache.hadoop.hdfs.protocol.DatanodeID;
import org.apache.hadoop.hdfs.server.datanode.DataNode;
import org.apache.hadoop.metrics2.MetricsRecordBuilder;
import org.apache.hadoop.ozone.MiniOzoneClassicCluster;
import org.apache.hadoop.ozone.MiniOzoneCluster;
import org.apache.hadoop.ozone.OzoneConsts;
import org.apache.hadoop.ozone.container.common.SCMTestUtils;
import org.apache.hadoop.ozone.container.common.helpers.ContainerReport;
@ -35,13 +37,23 @@ import org.apache.hadoop.ozone.protocol.proto.StorageContainerDatanodeProtocolPr
import org.apache.hadoop.ozone.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReportsRequestProto;
import org.apache.hadoop.ozone.scm.container.placement.metrics.ContainerStat;
import org.apache.hadoop.ozone.scm.container.placement.metrics.SCMMetrics;
import org.apache.hadoop.ozone.scm.node.SCMNodeManager;
import org.apache.hadoop.test.GenericTestUtils;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.Timeout;
/**
* This class tests the metrics of Storage Container Manager.
*/
public class TestSCMMetrics {
private static MiniOzoneCluster cluster = null;
/**
* Set the timeout for each test.
*/
@Rule
public Timeout testTimeout = new Timeout(90000);
private static MiniOzoneClassicCluster cluster = null;
@Test
public void testContainerMetrics() throws Exception {
@ -64,7 +76,11 @@ public class TestSCMMetrics {
ContainerStat stat = new ContainerStat(size, used, keyCount, readBytes,
writeBytes, readCount, writeCount);
StorageContainerManager scmManager = cluster.getStorageContainerManager();
scmManager.sendContainerReport(createContainerReport(numReport, stat));
ContainerReportsRequestProto request = createContainerReport(numReport,
stat, null);
String fstDatanodeID = request.getDatanodeID().getDatanodeUuid();
scmManager.sendContainerReport(request);
// verify container stat metrics
MetricsRecordBuilder scmMetrics = getMetrics(SCMMetrics.SOURCE_NAME);
@ -83,6 +99,117 @@ public class TestSCMMetrics {
getLongGauge("LastContainerReportReadCount", scmMetrics));
assertEquals(writeCount * numReport,
getLongGauge("LastContainerReportWriteCount", scmMetrics));
// add one new report
request = createContainerReport(1, stat, null);
String sndDatanodeID = request.getDatanodeID().getDatanodeUuid();
scmManager.sendContainerReport(request);
scmMetrics = getMetrics(SCMMetrics.SOURCE_NAME);
assertEquals(size * (numReport + 1),
getLongCounter("ContainerReportSize", scmMetrics));
assertEquals(used * (numReport + 1),
getLongCounter("ContainerReportUsed", scmMetrics));
assertEquals(readBytes * (numReport + 1),
getLongCounter("ContainerReportReadBytes", scmMetrics));
assertEquals(writeBytes * (numReport + 1),
getLongCounter("ContainerReportWriteBytes", scmMetrics));
assertEquals(keyCount * (numReport + 1),
getLongCounter("ContainerReportKeyCount", scmMetrics));
assertEquals(readCount * (numReport + 1),
getLongCounter("ContainerReportReadCount", scmMetrics));
assertEquals(writeCount * (numReport + 1),
getLongCounter("ContainerReportWriteCount", scmMetrics));
// Re-send reports but with different value for validating
// the aggregation.
stat = new ContainerStat(100, 50, 3, 50, 60, 5, 6);
scmManager.sendContainerReport(createContainerReport(1, stat,
fstDatanodeID));
stat = new ContainerStat(1, 1, 1, 1, 1, 1, 1);
scmManager.sendContainerReport(createContainerReport(1, stat,
sndDatanodeID));
// the global container metrics value should be updated
scmMetrics = getMetrics(SCMMetrics.SOURCE_NAME);
assertEquals(101, getLongCounter("ContainerReportSize", scmMetrics));
assertEquals(51, getLongCounter("ContainerReportUsed", scmMetrics));
assertEquals(51, getLongCounter("ContainerReportReadBytes", scmMetrics));
assertEquals(61, getLongCounter("ContainerReportWriteBytes", scmMetrics));
assertEquals(4, getLongCounter("ContainerReportKeyCount", scmMetrics));
assertEquals(6, getLongCounter("ContainerReportReadCount", scmMetrics));
assertEquals(7, getLongCounter("ContainerReportWriteCount", scmMetrics));
} finally {
if (cluster != null) {
cluster.shutdown();
}
}
}
@Test
public void testStaleNodeContainerReport() throws Exception {
int nodeCount = 2;
int numReport = 2;
long size = OzoneConsts.GB * 5;
long used = OzoneConsts.GB * 2;
long readBytes = OzoneConsts.GB * 1;
long writeBytes = OzoneConsts.GB * 2;
int keyCount = 1000;
int readCount = 100;
int writeCount = 50;
OzoneConfiguration conf = new OzoneConfiguration();
try {
cluster = new MiniOzoneClassicCluster.Builder(conf)
.setHandlerType(OzoneConsts.OZONE_HANDLER_DISTRIBUTED)
.numDataNodes(nodeCount).build();
ContainerStat stat = new ContainerStat(size, used, keyCount, readBytes,
writeBytes, readCount, writeCount);
StorageContainerManager scmManager = cluster.getStorageContainerManager();
DataNode dataNode = cluster.getDataNodes().get(0);
String datanodeUuid = dataNode.getDatanodeId().getDatanodeUuid();
ContainerReportsRequestProto request = createContainerReport(numReport,
stat, datanodeUuid);
scmManager.sendContainerReport(request);
MetricsRecordBuilder scmMetrics = getMetrics(SCMMetrics.SOURCE_NAME);
assertEquals(size * numReport,
getLongCounter("ContainerReportSize", scmMetrics));
assertEquals(used * numReport,
getLongCounter("ContainerReportUsed", scmMetrics));
assertEquals(readBytes * numReport,
getLongCounter("ContainerReportReadBytes", scmMetrics));
assertEquals(writeBytes * numReport,
getLongCounter("ContainerReportWriteBytes", scmMetrics));
assertEquals(keyCount * numReport,
getLongCounter("ContainerReportKeyCount", scmMetrics));
assertEquals(readCount * numReport,
getLongCounter("ContainerReportReadCount", scmMetrics));
assertEquals(writeCount * numReport,
getLongCounter("ContainerReportWriteCount", scmMetrics));
// reset stale interval time to move node from healthy to stale
SCMNodeManager nodeManager = (SCMNodeManager) cluster
.getStorageContainerManager().getScmNodeManager();
nodeManager.setStaleNodeIntervalMs(100);
// verify the metrics when node becomes stale
GenericTestUtils.waitFor(() -> {
MetricsRecordBuilder metrics = getMetrics(SCMMetrics.SOURCE_NAME);
return 0 == getLongCounter("ContainerReportSize", metrics)
&& 0 == getLongCounter("ContainerReportUsed", metrics)
&& 0 == getLongCounter("ContainerReportReadBytes", metrics)
&& 0 == getLongCounter("ContainerReportWriteBytes", metrics)
&& 0 == getLongCounter("ContainerReportKeyCount", metrics)
&& 0 == getLongCounter("ContainerReportReadCount", metrics)
&& 0 == getLongCounter("ContainerReportWriteCount", metrics);
}, 1000, 60000);
} finally {
if (cluster != null) {
cluster.shutdown();
@ -91,7 +218,7 @@ public class TestSCMMetrics {
}
private ContainerReportsRequestProto createContainerReport(int numReport,
ContainerStat stat) {
ContainerStat stat, String datanodeUuid) {
StorageContainerDatanodeProtocolProtos.ContainerReportsRequestProto.Builder
reportsBuilder = StorageContainerDatanodeProtocolProtos
.ContainerReportsRequestProto.newBuilder();
@ -108,8 +235,15 @@ public class TestSCMMetrics {
report.setWriteBytes(stat.getWriteBytes().get());
reportsBuilder.addReports(report.getProtoBufMessage());
}
reportsBuilder.setDatanodeID(SCMTestUtils.getDatanodeID()
.getProtoBufMessage());
DatanodeID datanodeID;
if (datanodeUuid == null) {
datanodeID = SCMTestUtils.getDatanodeID();
} else {
datanodeID = new DatanodeID("null", "null", datanodeUuid, 0, 0, 0, 0);
}
reportsBuilder.setDatanodeID(datanodeID.getProtoBufMessage());
reportsBuilder.setType(StorageContainerDatanodeProtocolProtos
.ContainerReportsRequestProto.reportType.fullReport);
return reportsBuilder.build();

View File

@ -93,7 +93,7 @@ public class TestContainerPlacement {
SCMNodeManager createNodeManager(OzoneConfiguration config)
throws IOException {
SCMNodeManager nodeManager = new SCMNodeManager(config,
UUID.randomUUID().toString());
UUID.randomUUID().toString(), null);
assertFalse("Node manager should be in chill mode",
nodeManager.isOutOfChillMode());
return nodeManager;

View File

@ -125,7 +125,7 @@ public class TestNodeManager {
SCMNodeManager createNodeManager(OzoneConfiguration config)
throws IOException {
SCMNodeManager nodeManager = new SCMNodeManager(config,
UUID.randomUUID().toString());
UUID.randomUUID().toString(), null);
assertFalse("Node manager should be in chill mode",
nodeManager.isOutOfChillMode());
return nodeManager;