From 6783dad76618a4905f19f2a550b906e1a401f666 Mon Sep 17 00:00:00 2001 From: Yiqun Lin Date: Wed, 25 Oct 2017 10:34:47 +0800 Subject: [PATCH] HDFS-11468. Ozone: SCM: Add Node Metrics for SCM. Contributed by Yiqun Lin. --- .../ozone/scm/StorageContainerManager.java | 41 ++++++ .../placement/metrics/ContainerStat.java | 128 ++++++++++++++++++ .../placement/metrics/SCMMetrics.java | 87 ++++++++++++ .../src/site/markdown/OzoneMetrics.md | 20 +++ .../hadoop/ozone/scm/TestSCMMetrics.java | 116 ++++++++++++++++ 5 files changed, 392 insertions(+) create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/ozone/scm/container/placement/metrics/ContainerStat.java create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/ozone/scm/container/placement/metrics/SCMMetrics.java create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/ozone/scm/TestSCMMetrics.java diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/ozone/scm/StorageContainerManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/ozone/scm/StorageContainerManager.java index b6c1f497168..dcd764bcac2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/ozone/scm/StorageContainerManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/ozone/scm/StorageContainerManager.java @@ -69,6 +69,8 @@ import org.apache.hadoop.ozone.scm.block.BlockManager; import org.apache.hadoop.ozone.scm.block.BlockManagerImpl; import org.apache.hadoop.ozone.scm.container.ContainerMapping; import org.apache.hadoop.ozone.scm.container.Mapping; +import org.apache.hadoop.ozone.scm.container.placement.metrics.ContainerStat; +import org.apache.hadoop.ozone.scm.container.placement.metrics.SCMMetrics; import org.apache.hadoop.ozone.scm.exceptions.SCMException; import org.apache.hadoop.ozone.scm.node.NodeManager; import org.apache.hadoop.ozone.scm.node.SCMNodeManager; @@ -163,6 +165,9 @@ public class StorageContainerManager extends ServiceRuntimeInfoImpl private final String scmUsername; private final Collection scmAdminUsernames; + /** SCM metrics. */ + private static SCMMetrics metrics; + /** * Creates a new StorageContainerManager. Configuration will be updated with * information on the actual listening addresses used for RPC servers. @@ -177,6 +182,7 @@ public class StorageContainerManager extends ServiceRuntimeInfoImpl final int cacheSize = conf.getInt(OZONE_SCM_DB_CACHE_SIZE_MB, OZONE_SCM_DB_CACHE_SIZE_DEFAULT); + StorageContainerManager.initMetrics(); // TODO : Fix the ClusterID generation code. scmNodeManager = new SCMNodeManager(conf, UUID.randomUUID().toString()); scmContainerManager = new ContainerMapping(conf, scmNodeManager, cacheSize); @@ -673,6 +679,7 @@ public class StorageContainerManager extends ServiceRuntimeInfoImpl LOG.error("SCM block manager service stop failed.", ex); } + metrics.unRegister(); unregisterMXBean(); IOUtils.cleanupWithLogger(LOG, scmContainerManager); IOUtils.cleanupWithLogger(LOG, scmBlockManager); @@ -752,6 +759,27 @@ public class StorageContainerManager extends ServiceRuntimeInfoImpl @Override public ContainerReportsResponseProto sendContainerReport( ContainerReportsRequestProto reports) throws IOException { + // TODO: We should update the logic once incremental container report + // type is supported. + if (reports.getType() == + ContainerReportsRequestProto.reportType.fullReport) { + ContainerStat stat = new ContainerStat(); + for (StorageContainerDatanodeProtocolProtos.ContainerInfo info : reports + .getReportsList()) { + stat.add(new ContainerStat(info.getSize(), info.getUsed(), + info.getKeyCount(), info.getReadBytes(), info.getWriteBytes(), + info.getReadCount(), info.getWriteCount())); + } + + // update container metrics + metrics.setLastContainerReportSize(stat.getSize().get()); + metrics.setLastContainerReportUsed(stat.getUsed().get()); + metrics.setLastContainerReportKeyCount(stat.getKeyCount().get()); + metrics.setLastContainerReportReadBytes(stat.getReadBytes().get()); + metrics.setLastContainerReportWriteBytes(stat.getWriteBytes().get()); + metrics.setLastContainerReportReadCount(stat.getReadCount().get()); + metrics.setLastContainerReportWriteCount(stat.getWriteCount().get()); + } // TODO: handle the container reports either here or add container report // handler. @@ -914,4 +942,17 @@ public class StorageContainerManager extends ServiceRuntimeInfoImpl } } + /** + * Initialize SCM metrics. + */ + public static void initMetrics() { + metrics = SCMMetrics.create(); + } + + /** + * Return SCM metrics instance. + */ + public static SCMMetrics getMetrics() { + return metrics == null ? SCMMetrics.create() : metrics; + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/ozone/scm/container/placement/metrics/ContainerStat.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/ozone/scm/container/placement/metrics/ContainerStat.java new file mode 100644 index 00000000000..65e96c33f19 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/ozone/scm/container/placement/metrics/ContainerStat.java @@ -0,0 +1,128 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.scm.container.placement.metrics; + +import com.google.common.base.Preconditions; + +/** + * This class represents the SCM container stat. + */ +public class ContainerStat { + /** + * The maximum container size. + */ + private LongMetric size; + + /** + * The number of bytes used by the container. + */ + private LongMetric used; + + /** + * The number of keys in the container. + */ + private LongMetric keyCount; + + /** + * The number of bytes read from the container. + */ + private LongMetric readBytes; + + /** + * The number of bytes write into the container. + */ + private LongMetric writeBytes; + + /** + * The number of times the container is read. + */ + private LongMetric readCount; + + /** + * The number of times the container is written into . + */ + private LongMetric writeCount; + + public ContainerStat() { + this(0L, 0L, 0L, 0L, 0L, 0L, 0L); + } + + public ContainerStat(long size, long used, long keyCount, long readBytes, + long writeBytes, long readCount, long writeCount) { + Preconditions.checkArgument(size >= 0, + "Container size cannot be " + "negative."); + Preconditions.checkArgument(used >= 0, + "Used space cannot be " + "negative."); + Preconditions.checkArgument(keyCount >= 0, + "Key count cannot be " + "negative"); + Preconditions.checkArgument(readBytes >= 0, + "Read bytes read cannot be " + "negative."); + Preconditions.checkArgument(readBytes >= 0, + "Write bytes cannot be " + "negative."); + Preconditions.checkArgument(readCount >= 0, + "Read count cannot be " + "negative."); + Preconditions.checkArgument(writeCount >= 0, + "Write count cannot be " + "negative"); + + this.size = new LongMetric(size); + this.used = new LongMetric(used); + this.keyCount = new LongMetric(keyCount); + this.readBytes = new LongMetric(readBytes); + this.writeBytes = new LongMetric(writeBytes); + this.readCount = new LongMetric(readCount); + this.writeCount = new LongMetric(writeCount); + } + + public LongMetric getSize() { + return size; + } + + public LongMetric getUsed() { + return used; + } + + public LongMetric getKeyCount() { + return keyCount; + } + + public LongMetric getReadBytes() { + return readBytes; + } + + public LongMetric getWriteBytes() { + return writeBytes; + } + + public LongMetric getReadCount() { + return readCount; + } + + public LongMetric getWriteCount() { + return writeCount; + } + + public void add(ContainerStat stat) { + this.size.add(stat.getSize().get()); + this.used.add(stat.getUsed().get()); + this.keyCount.add(stat.getKeyCount().get()); + this.readBytes.add(stat.getReadBytes().get()); + this.writeBytes.add(stat.getWriteBytes().get()); + this.readCount.add(stat.getReadCount().get()); + this.writeCount.add(stat.getWriteCount().get()); + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/ozone/scm/container/placement/metrics/SCMMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/ozone/scm/container/placement/metrics/SCMMetrics.java new file mode 100644 index 00000000000..17892dd5bc6 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/ozone/scm/container/placement/metrics/SCMMetrics.java @@ -0,0 +1,87 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.scm.container.placement.metrics; + +import org.apache.hadoop.metrics2.MetricsSystem; +import org.apache.hadoop.metrics2.annotation.Metric; +import org.apache.hadoop.metrics2.annotation.Metrics; +import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; +import org.apache.hadoop.metrics2.lib.MutableGaugeLong; + +/** + * This class is for maintaining StorageContainerManager statistics. + */ +@Metrics(about="Storage Container Manager Metrics", context="dfs") +public class SCMMetrics { + public static final String SOURCE_NAME = + SCMMetrics.class.getSimpleName(); + + /** + * Container stat metrics, the meaning of following metrics + * can be found in {@link ContainerStat}. + */ + @Metric private MutableGaugeLong lastContainerReportSize; + @Metric private MutableGaugeLong lastContainerReportUsed; + @Metric private MutableGaugeLong lastContainerReportKeyCount; + @Metric private MutableGaugeLong lastContainerReportReadBytes; + @Metric private MutableGaugeLong lastContainerReportWriteBytes; + @Metric private MutableGaugeLong lastContainerReportReadCount; + @Metric private MutableGaugeLong lastContainerReportWriteCount; + + public SCMMetrics() { + } + + public static SCMMetrics create() { + MetricsSystem ms = DefaultMetricsSystem.instance(); + return ms.register(SOURCE_NAME, "Storage Container Manager Metrics", + new SCMMetrics()); + } + + public void setLastContainerReportSize(long size) { + this.lastContainerReportSize.set(size); + } + + public void setLastContainerReportUsed(long used) { + this.lastContainerReportUsed.set(used); + } + + public void setLastContainerReportKeyCount(long keyCount) { + this.lastContainerReportKeyCount.set(keyCount); + } + + public void setLastContainerReportReadBytes(long readBytes) { + this.lastContainerReportReadBytes.set(readBytes); + } + + public void setLastContainerReportWriteBytes(long writeBytes) { + this.lastContainerReportWriteBytes.set(writeBytes); + } + + public void setLastContainerReportReadCount(long readCount) { + this.lastContainerReportReadCount.set(readCount); + } + + public void setLastContainerReportWriteCount(long writeCount) { + this.lastContainerReportWriteCount.set(writeCount); + } + + public void unRegister() { + MetricsSystem ms = DefaultMetricsSystem.instance(); + ms.unregisterSource(SOURCE_NAME); + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/OzoneMetrics.md b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/OzoneMetrics.md index 2548959bfcb..cd153eef6dd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/OzoneMetrics.md +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/OzoneMetrics.md @@ -98,6 +98,26 @@ RPC operations. | `GetSmallFile` | Get small file operations | | `CloseContainer` | Close container operations | +### Storage Container Manager Metrics + +The metrics for containers that managed by Storage Container Manager. + +Storage Container Manager (SCM) is a master service which keeps track of +replicas of storage containers. It also manages all data nodes and their +states, dealing with container reports and dispatching commands for execution. + +Following are the counters for containers: + +| Name | Description | +|:---- |:---- | +| `LastContainerReportSize` | Total size in bytes of all containers | +| `LastContainerReportUsed` | Total number of bytes used by all containers | +| `LastContainerReportKeyCount` | Total number of keys in all containers | +| `LastContainerReportReadBytes` | Total number of bytes have been read from all containers | +| `LastContainerReportWriteBytes` | Total number of bytes have been written into all containers | +| `LastContainerReportReadCount` | Total number of times containers have been read from | +| `LastContainerReportWriteCount` | Total number of times containers have been written to | + ### Key Space Metrics The metrics for various key space manager operations in HDFS Ozone. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/ozone/scm/TestSCMMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/ozone/scm/TestSCMMetrics.java new file mode 100644 index 00000000000..ab64b16a064 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/ozone/scm/TestSCMMetrics.java @@ -0,0 +1,116 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.scm; + +import static org.apache.hadoop.test.MetricsAsserts.getLongGauge; +import static org.apache.hadoop.test.MetricsAsserts.getMetrics; +import static org.junit.Assert.assertEquals; + +import java.util.UUID; + +import org.apache.commons.codec.digest.DigestUtils; +import org.apache.hadoop.conf.OzoneConfiguration; +import org.apache.hadoop.metrics2.MetricsRecordBuilder; +import org.apache.hadoop.ozone.MiniOzoneCluster; +import org.apache.hadoop.ozone.OzoneConsts; +import org.apache.hadoop.ozone.container.common.SCMTestUtils; +import org.apache.hadoop.ozone.container.common.helpers.ContainerReport; +import org.apache.hadoop.ozone.protocol.proto.StorageContainerDatanodeProtocolProtos; +import org.apache.hadoop.ozone.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReportsRequestProto; +import org.apache.hadoop.ozone.scm.container.placement.metrics.ContainerStat; +import org.apache.hadoop.ozone.scm.container.placement.metrics.SCMMetrics; +import org.junit.Test; + +/** + * This class tests the metrics of Storage Container Manager. + */ +public class TestSCMMetrics { + private static MiniOzoneCluster cluster = null; + + @Test + public void testContainerMetrics() throws Exception { + int nodeCount = 2; + int numReport = 2; + long size = OzoneConsts.GB * 5; + long used = OzoneConsts.GB * 2; + long readBytes = OzoneConsts.GB * 1; + long writeBytes = OzoneConsts.GB * 2; + int keyCount = 1000; + int readCount = 100; + int writeCount = 50; + OzoneConfiguration conf = new OzoneConfiguration(); + + try { + cluster = new MiniOzoneCluster.Builder(conf) + .setHandlerType(OzoneConsts.OZONE_HANDLER_DISTRIBUTED) + .numDataNodes(nodeCount).build(); + + ContainerStat stat = new ContainerStat(size, used, keyCount, readBytes, + writeBytes, readCount, writeCount); + StorageContainerManager scmManager = cluster.getStorageContainerManager(); + scmManager.sendContainerReport(createContainerReport(numReport, stat)); + + // verify container stat metrics + MetricsRecordBuilder scmMetrics = getMetrics(SCMMetrics.SOURCE_NAME); + assertEquals(size * numReport, + getLongGauge("LastContainerReportSize", scmMetrics)); + assertEquals(used * numReport, + getLongGauge("LastContainerReportUsed", scmMetrics)); + assertEquals(readBytes * numReport, + getLongGauge("LastContainerReportReadBytes", scmMetrics)); + assertEquals(writeBytes * numReport, + getLongGauge("LastContainerReportWriteBytes", scmMetrics)); + + assertEquals(keyCount * numReport, + getLongGauge("LastContainerReportKeyCount", scmMetrics)); + assertEquals(readCount * numReport, + getLongGauge("LastContainerReportReadCount", scmMetrics)); + assertEquals(writeCount * numReport, + getLongGauge("LastContainerReportWriteCount", scmMetrics)); + } finally { + if (cluster != null) { + cluster.shutdown(); + } + } + } + + private ContainerReportsRequestProto createContainerReport(int numReport, + ContainerStat stat) { + StorageContainerDatanodeProtocolProtos.ContainerReportsRequestProto.Builder + reportsBuilder = StorageContainerDatanodeProtocolProtos + .ContainerReportsRequestProto.newBuilder(); + + for (int i = 0; i < numReport; i++) { + ContainerReport report = new ContainerReport( + UUID.randomUUID().toString(), DigestUtils.sha256Hex("Simulated")); + report.setSize(stat.getSize().get()); + report.setBytesUsed(stat.getUsed().get()); + report.setReadCount(stat.getReadCount().get()); + report.setReadBytes(stat.getReadBytes().get()); + report.setKeyCount(stat.getKeyCount().get()); + report.setWriteCount(stat.getWriteCount().get()); + report.setWriteBytes(stat.getWriteBytes().get()); + reportsBuilder.addReports(report.getProtoBufMessage()); + } + reportsBuilder.setDatanodeID(SCMTestUtils.getDatanodeID() + .getProtoBufMessage()); + reportsBuilder.setType(StorageContainerDatanodeProtocolProtos + .ContainerReportsRequestProto.reportType.fullReport); + return reportsBuilder.build(); + } +}