HDD-2193. Adding container related metrics in SCM.
Signed-off-by: Anu Engineer <aengineer@apache.org>
This commit is contained in:
parent
c55ac6a1c7
commit
b6ef8cc120
|
@ -23,6 +23,7 @@ import org.apache.hadoop.conf.Configuration;
|
|||
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ContainerInfoProto;
|
||||
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState;
|
||||
import org.apache.hadoop.hdds.scm.ScmConfigKeys;
|
||||
import org.apache.hadoop.hdds.scm.container.metrics.SCMContainerManagerMetrics;
|
||||
import org.apache.hadoop.hdds.scm.exceptions.SCMException;
|
||||
import org.apache.hadoop.hdds.scm.node.NodeManager;
|
||||
import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
|
||||
|
@ -72,6 +73,8 @@ public class SCMContainerManager implements ContainerManager {
|
|||
private final ContainerStateManager containerStateManager;
|
||||
private final int numContainerPerOwnerInPipeline;
|
||||
|
||||
private final SCMContainerManagerMetrics scmContainerManagerMetrics;
|
||||
|
||||
/**
|
||||
* Constructs a mapping class that creates mapping between container names
|
||||
* and pipelines.
|
||||
|
@ -109,6 +112,8 @@ public class SCMContainerManager implements ContainerManager {
|
|||
ScmConfigKeys.OZONE_SCM_PIPELINE_OWNER_CONTAINER_COUNT_DEFAULT);
|
||||
|
||||
loadExistingContainers();
|
||||
|
||||
scmContainerManagerMetrics = SCMContainerManagerMetrics.create();
|
||||
}
|
||||
|
||||
private void loadExistingContainers() throws IOException {
|
||||
|
@ -204,6 +209,7 @@ public class SCMContainerManager implements ContainerManager {
|
|||
int count) {
|
||||
lock.lock();
|
||||
try {
|
||||
scmContainerManagerMetrics.incNumListContainersOps();
|
||||
final long startId = startContainerID == null ?
|
||||
0 : startContainerID.getId();
|
||||
final List<ContainerID> containersIds =
|
||||
|
@ -241,11 +247,17 @@ public class SCMContainerManager implements ContainerManager {
|
|||
public ContainerInfo allocateContainer(final ReplicationType type,
|
||||
final ReplicationFactor replicationFactor, final String owner)
|
||||
throws IOException {
|
||||
lock.lock();
|
||||
try {
|
||||
final ContainerInfo containerInfo =
|
||||
lock.lock();
|
||||
ContainerInfo containerInfo = null;
|
||||
try {
|
||||
containerInfo =
|
||||
containerStateManager.allocateContainer(pipelineManager, type,
|
||||
replicationFactor, owner);
|
||||
} catch (IOException ex) {
|
||||
scmContainerManagerMetrics.incNumFailureCreateContainers();
|
||||
throw ex;
|
||||
}
|
||||
// Add container to DB.
|
||||
try {
|
||||
addContainerToDB(containerInfo);
|
||||
|
@ -286,7 +298,9 @@ public class SCMContainerManager implements ContainerManager {
|
|||
LOG.warn("Unable to remove the container {} from container store," +
|
||||
" it's missing!", containerID);
|
||||
}
|
||||
scmContainerManagerMetrics.incNumSuccessfulDeleteContainers();
|
||||
} catch (ContainerNotFoundException cnfe) {
|
||||
scmContainerManagerMetrics.incNumFailureDeleteContainers();
|
||||
throw new SCMException(
|
||||
"Failed to delete container " + containerID + ", reason : " +
|
||||
"container doesn't exist.",
|
||||
|
@ -447,9 +461,16 @@ public class SCMContainerManager implements ContainerManager {
|
|||
containerInfo.getContainerID());
|
||||
containerStore.put(containerIDBytes,
|
||||
containerInfo.getProtobuf().toByteArray());
|
||||
// Incrementing here, as allocateBlock to create a container calls
|
||||
// getMatchingContainer() and finally calls this API to add newly
|
||||
// created container to DB.
|
||||
// Even allocateContainer calls this API to add newly allocated
|
||||
// container to DB. So we need to increment metrics here.
|
||||
scmContainerManagerMetrics.incNumSuccessfulCreateContainers();
|
||||
} catch (IOException ex) {
|
||||
// If adding to containerStore fails, we should remove the container
|
||||
// from in-memory map.
|
||||
scmContainerManagerMetrics.incNumFailureCreateContainers();
|
||||
LOG.error("Add Container to DB failed for ContainerID #{}",
|
||||
containerInfo.getContainerID());
|
||||
try {
|
||||
|
@ -546,5 +567,9 @@ public class SCMContainerManager implements ContainerManager {
|
|||
if (containerStore != null) {
|
||||
containerStore.close();
|
||||
}
|
||||
|
||||
if (scmContainerManagerMetrics != null) {
|
||||
this.scmContainerManagerMetrics.unRegister();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,90 @@
|
|||
package org.apache.hadoop.hdds.scm.container.metrics;
|
||||
|
||||
|
||||
import org.apache.hadoop.metrics2.MetricsSystem;
|
||||
import org.apache.hadoop.metrics2.annotation.Metric;
|
||||
import org.apache.hadoop.metrics2.annotation.Metrics;
|
||||
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
|
||||
import org.apache.hadoop.metrics2.lib.MutableCounterLong;
|
||||
|
||||
/**
|
||||
* Class contains metrics related to ContainerManager.
|
||||
*/
|
||||
@Metrics(about = "SCM ContainerManager metrics", context = "ozone")
|
||||
public final class SCMContainerManagerMetrics {
|
||||
|
||||
private static final String SOURCE_NAME =
|
||||
SCMContainerManagerMetrics.class.getSimpleName();
|
||||
|
||||
// These are the metrics which will be reset to zero after restart.
|
||||
// These metrics capture count of number of successful/failure operations
|
||||
// of create/delete containers in SCM.
|
||||
|
||||
private @Metric MutableCounterLong numSuccessfulCreateContainers;
|
||||
private @Metric MutableCounterLong numFailureCreateContainers;
|
||||
private @Metric MutableCounterLong numSuccessfulDeleteContainers;
|
||||
private @Metric MutableCounterLong numFailureDeleteContainers;
|
||||
private @Metric MutableCounterLong numListContainerOps;
|
||||
|
||||
private SCMContainerManagerMetrics() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Create and return metrics instance.
|
||||
* @return SCMContainerManagerMetrics
|
||||
*/
|
||||
public static SCMContainerManagerMetrics create() {
|
||||
MetricsSystem ms = DefaultMetricsSystem.instance();
|
||||
return ms.register(SOURCE_NAME, "SCM ContainerManager Metrics",
|
||||
new SCMContainerManagerMetrics());
|
||||
}
|
||||
|
||||
/**
|
||||
* Unregister metrics.
|
||||
*/
|
||||
public void unRegister() {
|
||||
MetricsSystem ms = DefaultMetricsSystem.instance();
|
||||
ms.unregisterSource(SOURCE_NAME);
|
||||
}
|
||||
|
||||
public void incNumSuccessfulCreateContainers() {
|
||||
this.numSuccessfulCreateContainers.incr();
|
||||
}
|
||||
|
||||
public void incNumFailureCreateContainers() {
|
||||
this.numFailureCreateContainers.incr();
|
||||
}
|
||||
|
||||
public void incNumSuccessfulDeleteContainers() {
|
||||
this.numSuccessfulDeleteContainers.incr();
|
||||
}
|
||||
|
||||
public void incNumFailureDeleteContainers() {
|
||||
this.numFailureDeleteContainers.incr();
|
||||
}
|
||||
|
||||
public void incNumListContainersOps() {
|
||||
this.numListContainerOps.incr();
|
||||
}
|
||||
|
||||
public long getNumSuccessfulCreateContainers() {
|
||||
return numSuccessfulCreateContainers.value();
|
||||
}
|
||||
|
||||
public long getNumFailureCreateContainers() {
|
||||
return numFailureCreateContainers.value();
|
||||
}
|
||||
|
||||
public long getNumSuccessfulDeleteContainers() {
|
||||
return numSuccessfulDeleteContainers.value();
|
||||
}
|
||||
|
||||
public long getNumFailureDeleteContainers() {
|
||||
return numFailureDeleteContainers.value();
|
||||
}
|
||||
|
||||
public long getNumListContainersOps() {
|
||||
return numListContainerOps.value();
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,22 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with this
|
||||
* work for additional information regarding copyright ownership. The ASF
|
||||
* licenses this file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations under
|
||||
* the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hdds.scm.container.metrics;
|
||||
|
||||
/*
|
||||
* This package contains StorageContainerManager metric classes.
|
||||
*/
|
|
@ -0,0 +1,112 @@
|
|||
package org.apache.hadoop.hdds.scm.container.metrics;
|
||||
|
||||
import org.apache.commons.lang3.RandomUtils;
|
||||
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
|
||||
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
|
||||
import org.apache.hadoop.hdds.scm.XceiverClientManager;
|
||||
import org.apache.hadoop.hdds.scm.container.ContainerID;
|
||||
import org.apache.hadoop.hdds.scm.container.ContainerInfo;
|
||||
import org.apache.hadoop.hdds.scm.container.ContainerManager;
|
||||
import org.apache.hadoop.hdds.scm.server.StorageContainerManager;
|
||||
import org.apache.hadoop.metrics2.MetricsRecordBuilder;
|
||||
import org.apache.hadoop.ozone.MiniOzoneCluster;
|
||||
import org.junit.After;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import static org.apache.hadoop.test.MetricsAsserts.getLongCounter;
|
||||
import static org.apache.hadoop.test.MetricsAsserts.getMetrics;
|
||||
import static org.junit.Assert.fail;
|
||||
|
||||
/**
|
||||
* Class used to test {@link SCMContainerManagerMetrics}.
|
||||
*/
|
||||
public class TestSCMContainerManagerMetrics {
|
||||
|
||||
private MiniOzoneCluster cluster;
|
||||
private StorageContainerManager scm;
|
||||
private XceiverClientManager xceiverClientManager;
|
||||
private String containerOwner = "OZONE";
|
||||
|
||||
@Before
|
||||
public void setup() throws Exception {
|
||||
OzoneConfiguration conf = new OzoneConfiguration();
|
||||
cluster = MiniOzoneCluster.newBuilder(conf).setNumDatanodes(1).build();
|
||||
cluster.waitForClusterToBeReady();
|
||||
scm = cluster.getStorageContainerManager();
|
||||
xceiverClientManager = new XceiverClientManager(conf);
|
||||
}
|
||||
|
||||
|
||||
@After
|
||||
public void teardown() {
|
||||
cluster.shutdown();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testContainerOpsMetrics() throws IOException {
|
||||
MetricsRecordBuilder metrics;
|
||||
ContainerManager containerManager = scm.getContainerManager();
|
||||
metrics = getMetrics(SCMContainerManagerMetrics.class.getSimpleName());
|
||||
|
||||
long numSuccessfulCreateContainers = getLongCounter(
|
||||
"NumSuccessfulCreateContainers", metrics);
|
||||
|
||||
ContainerInfo containerInfo = containerManager.allocateContainer(
|
||||
HddsProtos.ReplicationType.RATIS,
|
||||
HddsProtos.ReplicationFactor.ONE, containerOwner);
|
||||
|
||||
metrics = getMetrics(SCMContainerManagerMetrics.class.getSimpleName());
|
||||
Assert.assertEquals(getLongCounter("NumSuccessfulCreateContainers",
|
||||
metrics), ++numSuccessfulCreateContainers);
|
||||
|
||||
try {
|
||||
containerManager.allocateContainer(
|
||||
HddsProtos.ReplicationType.RATIS,
|
||||
HddsProtos.ReplicationFactor.THREE, containerOwner);
|
||||
fail("testContainerOpsMetrics failed");
|
||||
} catch (IOException ex) {
|
||||
// Here it should fail, so it should have the old metric value.
|
||||
metrics = getMetrics(SCMContainerManagerMetrics.class.getSimpleName());
|
||||
Assert.assertEquals(getLongCounter("NumSuccessfulCreateContainers",
|
||||
metrics), numSuccessfulCreateContainers);
|
||||
Assert.assertEquals(getLongCounter("NumFailureCreateContainers",
|
||||
metrics), 1);
|
||||
}
|
||||
|
||||
metrics = getMetrics(SCMContainerManagerMetrics.class.getSimpleName());
|
||||
long numSuccessfulDeleteContainers = getLongCounter(
|
||||
"NumSuccessfulDeleteContainers", metrics);
|
||||
|
||||
containerManager.deleteContainer(
|
||||
new ContainerID(containerInfo.getContainerID()));
|
||||
|
||||
metrics = getMetrics(SCMContainerManagerMetrics.class.getSimpleName());
|
||||
Assert.assertEquals(getLongCounter("NumSuccessfulDeleteContainers",
|
||||
metrics), numSuccessfulDeleteContainers + 1);
|
||||
|
||||
|
||||
try {
|
||||
// Give random container to delete.
|
||||
containerManager.deleteContainer(
|
||||
new ContainerID(RandomUtils.nextLong(10000, 20000)));
|
||||
fail("testContainerOpsMetrics failed");
|
||||
} catch (IOException ex) {
|
||||
// Here it should fail, so it should have the old metric value.
|
||||
metrics = getMetrics(SCMContainerManagerMetrics.class.getSimpleName());
|
||||
Assert.assertEquals(getLongCounter("NumSuccessfulDeleteContainers",
|
||||
metrics), numSuccessfulCreateContainers);
|
||||
Assert.assertEquals(getLongCounter("NumFailureDeleteContainers",
|
||||
metrics), 1);
|
||||
}
|
||||
|
||||
containerManager.listContainer(
|
||||
new ContainerID(containerInfo.getContainerID()), 1);
|
||||
metrics = getMetrics(SCMContainerManagerMetrics.class.getSimpleName());
|
||||
Assert.assertEquals(getLongCounter("NumListContainerOps",
|
||||
metrics), 1);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue