HDD-2193. Adding container related metrics in SCM.

Signed-off-by: Anu Engineer <aengineer@apache.org>
This commit is contained in:
Bharat Viswanadham 2019-09-26 15:22:32 -07:00 committed by Anu Engineer
parent c55ac6a1c7
commit b6ef8cc120
4 changed files with 252 additions and 3 deletions

View File

@ -23,6 +23,7 @@
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ContainerInfoProto; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ContainerInfoProto;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState;
import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.ScmConfigKeys;
import org.apache.hadoop.hdds.scm.container.metrics.SCMContainerManagerMetrics;
import org.apache.hadoop.hdds.scm.exceptions.SCMException; import org.apache.hadoop.hdds.scm.exceptions.SCMException;
import org.apache.hadoop.hdds.scm.node.NodeManager; import org.apache.hadoop.hdds.scm.node.NodeManager;
import org.apache.hadoop.hdds.scm.pipeline.Pipeline; import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
@ -72,6 +73,8 @@ public class SCMContainerManager implements ContainerManager {
private final ContainerStateManager containerStateManager; private final ContainerStateManager containerStateManager;
private final int numContainerPerOwnerInPipeline; private final int numContainerPerOwnerInPipeline;
private final SCMContainerManagerMetrics scmContainerManagerMetrics;
/** /**
* Constructs a mapping class that creates mapping between container names * Constructs a mapping class that creates mapping between container names
* and pipelines. * and pipelines.
@ -109,6 +112,8 @@ public SCMContainerManager(final Configuration conf,
ScmConfigKeys.OZONE_SCM_PIPELINE_OWNER_CONTAINER_COUNT_DEFAULT); ScmConfigKeys.OZONE_SCM_PIPELINE_OWNER_CONTAINER_COUNT_DEFAULT);
loadExistingContainers(); loadExistingContainers();
scmContainerManagerMetrics = SCMContainerManagerMetrics.create();
} }
private void loadExistingContainers() throws IOException { private void loadExistingContainers() throws IOException {
@ -204,6 +209,7 @@ public List<ContainerInfo> listContainer(ContainerID startContainerID,
int count) { int count) {
lock.lock(); lock.lock();
try { try {
scmContainerManagerMetrics.incNumListContainersOps();
final long startId = startContainerID == null ? final long startId = startContainerID == null ?
0 : startContainerID.getId(); 0 : startContainerID.getId();
final List<ContainerID> containersIds = final List<ContainerID> containersIds =
@ -241,11 +247,17 @@ public List<ContainerInfo> listContainer(ContainerID startContainerID,
public ContainerInfo allocateContainer(final ReplicationType type, public ContainerInfo allocateContainer(final ReplicationType type,
final ReplicationFactor replicationFactor, final String owner) final ReplicationFactor replicationFactor, final String owner)
throws IOException { throws IOException {
lock.lock();
try { try {
final ContainerInfo containerInfo = lock.lock();
containerStateManager.allocateContainer(pipelineManager, type, ContainerInfo containerInfo = null;
try {
containerInfo =
containerStateManager.allocateContainer(pipelineManager, type,
replicationFactor, owner); replicationFactor, owner);
} catch (IOException ex) {
scmContainerManagerMetrics.incNumFailureCreateContainers();
throw ex;
}
// Add container to DB. // Add container to DB.
try { try {
addContainerToDB(containerInfo); addContainerToDB(containerInfo);
@ -286,7 +298,9 @@ public void deleteContainer(ContainerID containerID) throws IOException {
LOG.warn("Unable to remove the container {} from container store," + LOG.warn("Unable to remove the container {} from container store," +
" it's missing!", containerID); " it's missing!", containerID);
} }
scmContainerManagerMetrics.incNumSuccessfulDeleteContainers();
} catch (ContainerNotFoundException cnfe) { } catch (ContainerNotFoundException cnfe) {
scmContainerManagerMetrics.incNumFailureDeleteContainers();
throw new SCMException( throw new SCMException(
"Failed to delete container " + containerID + ", reason : " + "Failed to delete container " + containerID + ", reason : " +
"container doesn't exist.", "container doesn't exist.",
@ -447,9 +461,16 @@ private void addContainerToDB(ContainerInfo containerInfo)
containerInfo.getContainerID()); containerInfo.getContainerID());
containerStore.put(containerIDBytes, containerStore.put(containerIDBytes,
containerInfo.getProtobuf().toByteArray()); containerInfo.getProtobuf().toByteArray());
// Incrementing here, as allocateBlock to create a container calls
// getMatchingContainer() and finally calls this API to add newly
// created container to DB.
// Even allocateContainer calls this API to add newly allocated
// container to DB. So we need to increment metrics here.
scmContainerManagerMetrics.incNumSuccessfulCreateContainers();
} catch (IOException ex) { } catch (IOException ex) {
// If adding to containerStore fails, we should remove the container // If adding to containerStore fails, we should remove the container
// from in-memory map. // from in-memory map.
scmContainerManagerMetrics.incNumFailureCreateContainers();
LOG.error("Add Container to DB failed for ContainerID #{}", LOG.error("Add Container to DB failed for ContainerID #{}",
containerInfo.getContainerID()); containerInfo.getContainerID());
try { try {
@ -546,5 +567,9 @@ public void close() throws IOException {
if (containerStore != null) { if (containerStore != null) {
containerStore.close(); containerStore.close();
} }
if (scmContainerManagerMetrics != null) {
this.scmContainerManagerMetrics.unRegister();
}
} }
} }

View File

@ -0,0 +1,90 @@
package org.apache.hadoop.hdds.scm.container.metrics;
import org.apache.hadoop.metrics2.MetricsSystem;
import org.apache.hadoop.metrics2.annotation.Metric;
import org.apache.hadoop.metrics2.annotation.Metrics;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.metrics2.lib.MutableCounterLong;
/**
* Class contains metrics related to ContainerManager.
*/
@Metrics(about = "SCM ContainerManager metrics", context = "ozone")
public final class SCMContainerManagerMetrics {
private static final String SOURCE_NAME =
SCMContainerManagerMetrics.class.getSimpleName();
// These are the metrics which will be reset to zero after restart.
// These metrics capture count of number of successful/failure operations
// of create/delete containers in SCM.
private @Metric MutableCounterLong numSuccessfulCreateContainers;
private @Metric MutableCounterLong numFailureCreateContainers;
private @Metric MutableCounterLong numSuccessfulDeleteContainers;
private @Metric MutableCounterLong numFailureDeleteContainers;
private @Metric MutableCounterLong numListContainerOps;
private SCMContainerManagerMetrics() {
}
/**
* Create and return metrics instance.
* @return SCMContainerManagerMetrics
*/
public static SCMContainerManagerMetrics create() {
MetricsSystem ms = DefaultMetricsSystem.instance();
return ms.register(SOURCE_NAME, "SCM ContainerManager Metrics",
new SCMContainerManagerMetrics());
}
/**
* Unregister metrics.
*/
public void unRegister() {
MetricsSystem ms = DefaultMetricsSystem.instance();
ms.unregisterSource(SOURCE_NAME);
}
public void incNumSuccessfulCreateContainers() {
this.numSuccessfulCreateContainers.incr();
}
public void incNumFailureCreateContainers() {
this.numFailureCreateContainers.incr();
}
public void incNumSuccessfulDeleteContainers() {
this.numSuccessfulDeleteContainers.incr();
}
public void incNumFailureDeleteContainers() {
this.numFailureDeleteContainers.incr();
}
public void incNumListContainersOps() {
this.numListContainerOps.incr();
}
public long getNumSuccessfulCreateContainers() {
return numSuccessfulCreateContainers.value();
}
public long getNumFailureCreateContainers() {
return numFailureCreateContainers.value();
}
public long getNumSuccessfulDeleteContainers() {
return numSuccessfulDeleteContainers.value();
}
public long getNumFailureDeleteContainers() {
return numFailureDeleteContainers.value();
}
public long getNumListContainersOps() {
return numListContainerOps.value();
}
}

View File

@ -0,0 +1,22 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package org.apache.hadoop.hdds.scm.container.metrics;
/*
* This package contains StorageContainerManager metric classes.
*/

View File

@ -0,0 +1,112 @@
package org.apache.hadoop.hdds.scm.container.metrics;
import org.apache.commons.lang3.RandomUtils;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import org.apache.hadoop.hdds.scm.XceiverClientManager;
import org.apache.hadoop.hdds.scm.container.ContainerID;
import org.apache.hadoop.hdds.scm.container.ContainerInfo;
import org.apache.hadoop.hdds.scm.container.ContainerManager;
import org.apache.hadoop.hdds.scm.server.StorageContainerManager;
import org.apache.hadoop.metrics2.MetricsRecordBuilder;
import org.apache.hadoop.ozone.MiniOzoneCluster;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import java.io.IOException;
import static org.apache.hadoop.test.MetricsAsserts.getLongCounter;
import static org.apache.hadoop.test.MetricsAsserts.getMetrics;
import static org.junit.Assert.fail;
/**
* Class used to test {@link SCMContainerManagerMetrics}.
*/
public class TestSCMContainerManagerMetrics {
private MiniOzoneCluster cluster;
private StorageContainerManager scm;
private XceiverClientManager xceiverClientManager;
private String containerOwner = "OZONE";
@Before
public void setup() throws Exception {
OzoneConfiguration conf = new OzoneConfiguration();
cluster = MiniOzoneCluster.newBuilder(conf).setNumDatanodes(1).build();
cluster.waitForClusterToBeReady();
scm = cluster.getStorageContainerManager();
xceiverClientManager = new XceiverClientManager(conf);
}
@After
public void teardown() {
cluster.shutdown();
}
@Test
public void testContainerOpsMetrics() throws IOException {
MetricsRecordBuilder metrics;
ContainerManager containerManager = scm.getContainerManager();
metrics = getMetrics(SCMContainerManagerMetrics.class.getSimpleName());
long numSuccessfulCreateContainers = getLongCounter(
"NumSuccessfulCreateContainers", metrics);
ContainerInfo containerInfo = containerManager.allocateContainer(
HddsProtos.ReplicationType.RATIS,
HddsProtos.ReplicationFactor.ONE, containerOwner);
metrics = getMetrics(SCMContainerManagerMetrics.class.getSimpleName());
Assert.assertEquals(getLongCounter("NumSuccessfulCreateContainers",
metrics), ++numSuccessfulCreateContainers);
try {
containerManager.allocateContainer(
HddsProtos.ReplicationType.RATIS,
HddsProtos.ReplicationFactor.THREE, containerOwner);
fail("testContainerOpsMetrics failed");
} catch (IOException ex) {
// Here it should fail, so it should have the old metric value.
metrics = getMetrics(SCMContainerManagerMetrics.class.getSimpleName());
Assert.assertEquals(getLongCounter("NumSuccessfulCreateContainers",
metrics), numSuccessfulCreateContainers);
Assert.assertEquals(getLongCounter("NumFailureCreateContainers",
metrics), 1);
}
metrics = getMetrics(SCMContainerManagerMetrics.class.getSimpleName());
long numSuccessfulDeleteContainers = getLongCounter(
"NumSuccessfulDeleteContainers", metrics);
containerManager.deleteContainer(
new ContainerID(containerInfo.getContainerID()));
metrics = getMetrics(SCMContainerManagerMetrics.class.getSimpleName());
Assert.assertEquals(getLongCounter("NumSuccessfulDeleteContainers",
metrics), numSuccessfulDeleteContainers + 1);
try {
// Give random container to delete.
containerManager.deleteContainer(
new ContainerID(RandomUtils.nextLong(10000, 20000)));
fail("testContainerOpsMetrics failed");
} catch (IOException ex) {
// Here it should fail, so it should have the old metric value.
metrics = getMetrics(SCMContainerManagerMetrics.class.getSimpleName());
Assert.assertEquals(getLongCounter("NumSuccessfulDeleteContainers",
metrics), numSuccessfulCreateContainers);
Assert.assertEquals(getLongCounter("NumFailureDeleteContainers",
metrics), 1);
}
containerManager.listContainer(
new ContainerID(containerInfo.getContainerID()), 1);
metrics = getMetrics(SCMContainerManagerMetrics.class.getSimpleName());
Assert.assertEquals(getLongCounter("NumListContainerOps",
metrics), 1);
}
}