HDFS-3650. Use MutableQuantiles to provide latency histograms for various operations. Contributed by Andrew Wang.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1366245 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
050df1454f
commit
8a8461ef31
|
@ -893,6 +893,25 @@ public class Configuration implements Iterable<Map.Entry<String,String>>,
|
||||||
}
|
}
|
||||||
return Integer.parseInt(valueString);
|
return Integer.parseInt(valueString);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the value of the <code>name</code> property as a set of comma-delimited
|
||||||
|
* <code>int</code> values.
|
||||||
|
*
|
||||||
|
* If no such property exists, an empty array is returned.
|
||||||
|
*
|
||||||
|
* @param name property name
|
||||||
|
* @return property value interpreted as an array of comma-delimited
|
||||||
|
* <code>int</code> values
|
||||||
|
*/
|
||||||
|
public int[] getInts(String name) {
|
||||||
|
String[] strings = getTrimmedStrings(name);
|
||||||
|
int[] ints = new int[strings.length];
|
||||||
|
for (int i = 0; i < strings.length; i++) {
|
||||||
|
ints[i] = Integer.parseInt(strings[i]);
|
||||||
|
}
|
||||||
|
return ints;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set the value of the <code>name</code> property to an <code>int</code>.
|
* Set the value of the <code>name</code> property to an <code>int</code>.
|
||||||
|
|
|
@ -45,7 +45,8 @@ import com.google.common.annotations.VisibleForTesting;
|
||||||
@InterfaceStability.Evolving
|
@InterfaceStability.Evolving
|
||||||
public class MutableQuantiles extends MutableMetric {
|
public class MutableQuantiles extends MutableMetric {
|
||||||
|
|
||||||
static final Quantile[] quantiles = { new Quantile(0.50, 0.050),
|
@VisibleForTesting
|
||||||
|
public static final Quantile[] quantiles = { new Quantile(0.50, 0.050),
|
||||||
new Quantile(0.75, 0.025), new Quantile(0.90, 0.010),
|
new Quantile(0.75, 0.025), new Quantile(0.90, 0.010),
|
||||||
new Quantile(0.95, 0.005), new Quantile(0.99, 0.001) };
|
new Quantile(0.95, 0.005), new Quantile(0.99, 0.001) };
|
||||||
|
|
||||||
|
@ -90,8 +91,7 @@ public class MutableQuantiles extends MutableMetric {
|
||||||
"Number of %s for %s with %ds interval", lsName, desc, interval));
|
"Number of %s for %s with %ds interval", lsName, desc, interval));
|
||||||
// Construct the MetricsInfos for the quantiles, converting to percentiles
|
// Construct the MetricsInfos for the quantiles, converting to percentiles
|
||||||
quantileInfos = new MetricsInfo[quantiles.length];
|
quantileInfos = new MetricsInfo[quantiles.length];
|
||||||
String nameTemplate = ucName + "%dthPercentile" + interval + "sInterval"
|
String nameTemplate = ucName + "%dthPercentile" + uvName;
|
||||||
+ uvName;
|
|
||||||
String descTemplate = "%d percentile " + lvName + " with " + interval
|
String descTemplate = "%d percentile " + lvName + " with " + interval
|
||||||
+ " second interval for " + desc;
|
+ " second interval for " + desc;
|
||||||
for (int i = 0; i < quantiles.length; i++) {
|
for (int i = 0; i < quantiles.length; i++) {
|
||||||
|
|
|
@ -31,8 +31,8 @@ import java.nio.channels.WritableByteChannel;
|
||||||
|
|
||||||
import org.apache.hadoop.classification.InterfaceAudience;
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
import org.apache.hadoop.classification.InterfaceStability;
|
import org.apache.hadoop.classification.InterfaceStability;
|
||||||
|
import org.apache.hadoop.io.LongWritable;
|
||||||
import org.apache.hadoop.metrics2.lib.MutableRate;
|
import org.apache.hadoop.metrics2.lib.MutableRate;
|
||||||
import org.apache.hadoop.util.Progressable;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This implements an output stream that can have a timeout while writing.
|
* This implements an output stream that can have a timeout while writing.
|
||||||
|
@ -179,9 +179,9 @@ public class SocketOutputStream extends OutputStream
|
||||||
* @param fileCh FileChannel to transfer data from.
|
* @param fileCh FileChannel to transfer data from.
|
||||||
* @param position position within the channel where the transfer begins
|
* @param position position within the channel where the transfer begins
|
||||||
* @param count number of bytes to transfer.
|
* @param count number of bytes to transfer.
|
||||||
* @param waitForWritableTime updated by the nanoseconds spent waiting for
|
* @param waitForWritableTime nanoseconds spent waiting for the socket
|
||||||
* the socket to become writable
|
* to become writable
|
||||||
* @param transferTime updated by the nanoseconds spent transferring data
|
* @param transferTime nanoseconds spent transferring data
|
||||||
*
|
*
|
||||||
* @throws EOFException
|
* @throws EOFException
|
||||||
* If end of input file is reached before requested number of
|
* If end of input file is reached before requested number of
|
||||||
|
@ -195,8 +195,8 @@ public class SocketOutputStream extends OutputStream
|
||||||
* {@link FileChannel#transferTo(long, long, WritableByteChannel)}.
|
* {@link FileChannel#transferTo(long, long, WritableByteChannel)}.
|
||||||
*/
|
*/
|
||||||
public void transferToFully(FileChannel fileCh, long position, int count,
|
public void transferToFully(FileChannel fileCh, long position, int count,
|
||||||
MutableRate waitForWritableTime,
|
LongWritable waitForWritableTime,
|
||||||
MutableRate transferToTime) throws IOException {
|
LongWritable transferToTime) throws IOException {
|
||||||
long waitTime = 0;
|
long waitTime = 0;
|
||||||
long transferTime = 0;
|
long transferTime = 0;
|
||||||
while (count > 0) {
|
while (count > 0) {
|
||||||
|
@ -236,12 +236,12 @@ public class SocketOutputStream extends OutputStream
|
||||||
waitTime += wait - start;
|
waitTime += wait - start;
|
||||||
transferTime += transfer - wait;
|
transferTime += transfer - wait;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (waitForWritableTime != null) {
|
if (waitForWritableTime != null) {
|
||||||
waitForWritableTime.add(waitTime);
|
waitForWritableTime.set(waitTime);
|
||||||
}
|
}
|
||||||
if (transferToTime != null) {
|
if (transferToTime != null) {
|
||||||
transferToTime.add(transferTime);
|
transferToTime.set(transferTime);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -150,7 +150,7 @@ public class TestMutableMetrics {
|
||||||
info("FooNumOps", "Number of ops for stat with 5s interval"),
|
info("FooNumOps", "Number of ops for stat with 5s interval"),
|
||||||
(long) 2000);
|
(long) 2000);
|
||||||
Quantile[] quants = MutableQuantiles.quantiles;
|
Quantile[] quants = MutableQuantiles.quantiles;
|
||||||
String name = "Foo%dthPercentile5sIntervalLatency";
|
String name = "Foo%dthPercentileLatency";
|
||||||
String desc = "%d percentile latency with 5 second interval for stat";
|
String desc = "%d percentile latency with 5 second interval for stat";
|
||||||
for (Quantile q : quants) {
|
for (Quantile q : quants) {
|
||||||
int percentile = (int) (100 * q.quantile);
|
int percentile = (int) (100 * q.quantile);
|
||||||
|
@ -176,7 +176,7 @@ public class TestMutableMetrics {
|
||||||
"Latency", 5);
|
"Latency", 5);
|
||||||
|
|
||||||
Quantile[] quants = MutableQuantiles.quantiles;
|
Quantile[] quants = MutableQuantiles.quantiles;
|
||||||
String name = "Foo%dthPercentile5sIntervalLatency";
|
String name = "Foo%dthPercentileLatency";
|
||||||
String desc = "%d percentile latency with 5 second interval for stat";
|
String desc = "%d percentile latency with 5 second interval for stat";
|
||||||
|
|
||||||
// Push values for three intervals
|
// Push values for three intervals
|
||||||
|
|
|
@ -23,7 +23,9 @@ import static com.google.common.base.Preconditions.*;
|
||||||
import org.hamcrest.Description;
|
import org.hamcrest.Description;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
|
|
||||||
|
import static org.mockito.AdditionalMatchers.geq;
|
||||||
import static org.mockito.Mockito.*;
|
import static org.mockito.Mockito.*;
|
||||||
|
|
||||||
import org.mockito.stubbing.Answer;
|
import org.mockito.stubbing.Answer;
|
||||||
import org.mockito.internal.matchers.GreaterThan;
|
import org.mockito.internal.matchers.GreaterThan;
|
||||||
import org.mockito.invocation.InvocationOnMock;
|
import org.mockito.invocation.InvocationOnMock;
|
||||||
|
@ -39,7 +41,11 @@ import org.apache.hadoop.metrics2.MetricsSource;
|
||||||
import org.apache.hadoop.metrics2.MetricsRecordBuilder;
|
import org.apache.hadoop.metrics2.MetricsRecordBuilder;
|
||||||
import org.apache.hadoop.metrics2.MetricsSystem;
|
import org.apache.hadoop.metrics2.MetricsSystem;
|
||||||
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
|
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
|
||||||
|
import org.apache.hadoop.metrics2.lib.MutableQuantiles;
|
||||||
|
import org.apache.hadoop.metrics2.util.Quantile;
|
||||||
|
|
||||||
import static org.apache.hadoop.metrics2.lib.Interns.*;
|
import static org.apache.hadoop.metrics2.lib.Interns.*;
|
||||||
|
import static org.apache.hadoop.test.MetricsAsserts.eqName;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Helpers for metrics source tests
|
* Helpers for metrics source tests
|
||||||
|
@ -328,4 +334,23 @@ public class MetricsAsserts {
|
||||||
MetricsSource source) {
|
MetricsSource source) {
|
||||||
assertGaugeGt(name, greater, getMetrics(source));
|
assertGaugeGt(name, greater, getMetrics(source));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Asserts that the NumOps and quantiles for a metric have been changed at
|
||||||
|
* some point to a non-zero value.
|
||||||
|
*
|
||||||
|
* @param prefix of the metric
|
||||||
|
* @param rb MetricsRecordBuilder with the metric
|
||||||
|
*/
|
||||||
|
public static void assertQuantileGauges(String prefix,
|
||||||
|
MetricsRecordBuilder rb) {
|
||||||
|
verify(rb).addGauge(eqName(info(prefix + "NumOps", "")), geq(0l));
|
||||||
|
for (Quantile q : MutableQuantiles.quantiles) {
|
||||||
|
String nameTemplate = prefix + "%dthPercentileLatency";
|
||||||
|
int percentile = (int) (100 * q.quantile);
|
||||||
|
verify(rb).addGauge(
|
||||||
|
eqName(info(String.format(nameTemplate, percentile), "")),
|
||||||
|
geq(0l));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -174,6 +174,9 @@ Release 2.0.1-alpha - UNRELEASED
|
||||||
|
|
||||||
HDFS-3711. Manually convert remaining tests to JUnit4. (Andrew Wang via atm)
|
HDFS-3711. Manually convert remaining tests to JUnit4. (Andrew Wang via atm)
|
||||||
|
|
||||||
|
HDFS-3650. Use MutableQuantiles to provide latency histograms for various
|
||||||
|
operations. (Andrew Wang via atm)
|
||||||
|
|
||||||
OPTIMIZATIONS
|
OPTIMIZATIONS
|
||||||
|
|
||||||
HDFS-2982. Startup performance suffers when there are many edit log
|
HDFS-2982. Startup performance suffers when there are many edit log
|
||||||
|
|
|
@ -203,6 +203,7 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
|
||||||
public static final String DFS_CLIENT_READ_PREFETCH_SIZE_KEY = "dfs.client.read.prefetch.size";
|
public static final String DFS_CLIENT_READ_PREFETCH_SIZE_KEY = "dfs.client.read.prefetch.size";
|
||||||
public static final String DFS_CLIENT_RETRY_WINDOW_BASE= "dfs.client.retry.window.base";
|
public static final String DFS_CLIENT_RETRY_WINDOW_BASE= "dfs.client.retry.window.base";
|
||||||
public static final String DFS_METRICS_SESSION_ID_KEY = "dfs.metrics.session-id";
|
public static final String DFS_METRICS_SESSION_ID_KEY = "dfs.metrics.session-id";
|
||||||
|
public static final String DFS_METRICS_PERCENTILES_INTERVALS_KEY = "dfs.metrics.percentiles.intervals";
|
||||||
public static final String DFS_DATANODE_HOST_NAME_KEY = "dfs.datanode.hostname";
|
public static final String DFS_DATANODE_HOST_NAME_KEY = "dfs.datanode.hostname";
|
||||||
public static final String DFS_NAMENODE_HOSTS_KEY = "dfs.namenode.hosts";
|
public static final String DFS_NAMENODE_HOSTS_KEY = "dfs.namenode.hosts";
|
||||||
public static final String DFS_NAMENODE_HOSTS_EXCLUDE_KEY = "dfs.namenode.hosts.exclude";
|
public static final String DFS_NAMENODE_HOSTS_EXCLUDE_KEY = "dfs.namenode.hosts.exclude";
|
||||||
|
|
|
@ -76,6 +76,8 @@ import org.apache.hadoop.util.DataChecksum;
|
||||||
import org.apache.hadoop.util.Progressable;
|
import org.apache.hadoop.util.Progressable;
|
||||||
import org.apache.hadoop.util.Time;
|
import org.apache.hadoop.util.Time;
|
||||||
|
|
||||||
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
|
|
||||||
|
|
||||||
/****************************************************************
|
/****************************************************************
|
||||||
* DFSOutputStream creates files from a stream of bytes.
|
* DFSOutputStream creates files from a stream of bytes.
|
||||||
|
@ -1210,7 +1212,8 @@ public class DFSOutputStream extends FSOutputSummer implements Syncable {
|
||||||
//
|
//
|
||||||
// returns the list of targets, if any, that is being currently used.
|
// returns the list of targets, if any, that is being currently used.
|
||||||
//
|
//
|
||||||
synchronized DatanodeInfo[] getPipeline() {
|
@VisibleForTesting
|
||||||
|
public synchronized DatanodeInfo[] getPipeline() {
|
||||||
if (streamer == null) {
|
if (streamer == null) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
@ -1758,11 +1761,13 @@ public class DFSOutputStream extends FSOutputSummer implements Syncable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void setArtificialSlowdown(long period) {
|
@VisibleForTesting
|
||||||
|
public void setArtificialSlowdown(long period) {
|
||||||
artificialSlowdown = period;
|
artificialSlowdown = period;
|
||||||
}
|
}
|
||||||
|
|
||||||
synchronized void setChunksPerPacket(int value) {
|
@VisibleForTesting
|
||||||
|
public synchronized void setChunksPerPacket(int value) {
|
||||||
chunksPerPacket = Math.min(chunksPerPacket, value);
|
chunksPerPacket = Math.min(chunksPerPacket, value);
|
||||||
packetSize = PacketHeader.PKT_HEADER_LEN +
|
packetSize = PacketHeader.PKT_HEADER_LEN +
|
||||||
(checksum.getBytesPerChecksum() +
|
(checksum.getBytesPerChecksum() +
|
||||||
|
|
|
@ -41,6 +41,7 @@ import org.apache.hadoop.hdfs.protocol.datatransfer.PacketHeader;
|
||||||
import org.apache.hadoop.hdfs.server.common.Util;
|
import org.apache.hadoop.hdfs.server.common.Util;
|
||||||
import org.apache.hadoop.hdfs.util.DataTransferThrottler;
|
import org.apache.hadoop.hdfs.util.DataTransferThrottler;
|
||||||
import org.apache.hadoop.io.IOUtils;
|
import org.apache.hadoop.io.IOUtils;
|
||||||
|
import org.apache.hadoop.io.LongWritable;
|
||||||
import org.apache.hadoop.io.ReadaheadPool;
|
import org.apache.hadoop.io.ReadaheadPool;
|
||||||
import org.apache.hadoop.io.ReadaheadPool.ReadaheadRequest;
|
import org.apache.hadoop.io.ReadaheadPool.ReadaheadRequest;
|
||||||
import org.apache.hadoop.io.nativeio.NativeIO;
|
import org.apache.hadoop.io.nativeio.NativeIO;
|
||||||
|
@ -486,11 +487,14 @@ class BlockSender implements java.io.Closeable {
|
||||||
|
|
||||||
// no need to flush since we know out is not a buffered stream
|
// no need to flush since we know out is not a buffered stream
|
||||||
FileChannel fileCh = ((FileInputStream)blockIn).getChannel();
|
FileChannel fileCh = ((FileInputStream)blockIn).getChannel();
|
||||||
|
LongWritable waitTime = new LongWritable();
|
||||||
|
LongWritable transferTime = new LongWritable();
|
||||||
sockOut.transferToFully(fileCh, blockInPosition, dataLen,
|
sockOut.transferToFully(fileCh, blockInPosition, dataLen,
|
||||||
datanode.metrics.getSendDataPacketBlockedOnNetworkNanos(),
|
waitTime, transferTime);
|
||||||
datanode.metrics.getSendDataPacketTransferNanos());
|
datanode.metrics.addSendDataPacketBlockedOnNetworkNanos(waitTime.get());
|
||||||
|
datanode.metrics.addSendDataPacketTransferNanos(transferTime.get());
|
||||||
blockInPosition += dataLen;
|
blockInPosition += dataLen;
|
||||||
} else {
|
} else {
|
||||||
// normal transfer
|
// normal transfer
|
||||||
out.write(buf, 0, dataOff + dataLen);
|
out.write(buf, 0, dataOff + dataLen);
|
||||||
}
|
}
|
||||||
|
|
|
@ -29,6 +29,7 @@ import org.apache.hadoop.metrics2.annotation.Metrics;
|
||||||
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
|
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
|
||||||
import org.apache.hadoop.metrics2.lib.MetricsRegistry;
|
import org.apache.hadoop.metrics2.lib.MetricsRegistry;
|
||||||
import org.apache.hadoop.metrics2.lib.MutableCounterLong;
|
import org.apache.hadoop.metrics2.lib.MutableCounterLong;
|
||||||
|
import org.apache.hadoop.metrics2.lib.MutableQuantiles;
|
||||||
import org.apache.hadoop.metrics2.lib.MutableRate;
|
import org.apache.hadoop.metrics2.lib.MutableRate;
|
||||||
import org.apache.hadoop.metrics2.source.JvmMetrics;
|
import org.apache.hadoop.metrics2.source.JvmMetrics;
|
||||||
|
|
||||||
|
@ -74,19 +75,54 @@ public class DataNodeMetrics {
|
||||||
@Metric MutableRate heartbeats;
|
@Metric MutableRate heartbeats;
|
||||||
@Metric MutableRate blockReports;
|
@Metric MutableRate blockReports;
|
||||||
@Metric MutableRate packetAckRoundTripTimeNanos;
|
@Metric MutableRate packetAckRoundTripTimeNanos;
|
||||||
|
MutableQuantiles[] packetAckRoundTripTimeNanosQuantiles;
|
||||||
|
|
||||||
@Metric MutableRate flushNanos;
|
@Metric MutableRate flushNanos;
|
||||||
|
MutableQuantiles[] flushNanosQuantiles;
|
||||||
|
|
||||||
@Metric MutableRate fsyncNanos;
|
@Metric MutableRate fsyncNanos;
|
||||||
|
MutableQuantiles[] fsyncNanosQuantiles;
|
||||||
|
|
||||||
@Metric MutableRate sendDataPacketBlockedOnNetworkNanos;
|
@Metric MutableRate sendDataPacketBlockedOnNetworkNanos;
|
||||||
|
MutableQuantiles[] sendDataPacketBlockedOnNetworkNanosQuantiles;
|
||||||
@Metric MutableRate sendDataPacketTransferNanos;
|
@Metric MutableRate sendDataPacketTransferNanos;
|
||||||
|
MutableQuantiles[] sendDataPacketTransferNanosQuantiles;
|
||||||
|
|
||||||
|
|
||||||
final MetricsRegistry registry = new MetricsRegistry("datanode");
|
final MetricsRegistry registry = new MetricsRegistry("datanode");
|
||||||
final String name;
|
final String name;
|
||||||
|
|
||||||
public DataNodeMetrics(String name, String sessionId) {
|
public DataNodeMetrics(String name, String sessionId, int[] intervals) {
|
||||||
this.name = name;
|
this.name = name;
|
||||||
registry.tag(SessionId, sessionId);
|
registry.tag(SessionId, sessionId);
|
||||||
|
|
||||||
|
final int len = intervals.length;
|
||||||
|
packetAckRoundTripTimeNanosQuantiles = new MutableQuantiles[len];
|
||||||
|
flushNanosQuantiles = new MutableQuantiles[len];
|
||||||
|
fsyncNanosQuantiles = new MutableQuantiles[len];
|
||||||
|
sendDataPacketBlockedOnNetworkNanosQuantiles = new MutableQuantiles[len];
|
||||||
|
sendDataPacketTransferNanosQuantiles = new MutableQuantiles[len];
|
||||||
|
|
||||||
|
for (int i = 0; i < len; i++) {
|
||||||
|
int interval = intervals[i];
|
||||||
|
packetAckRoundTripTimeNanosQuantiles[i] = registry.newQuantiles(
|
||||||
|
"packetAckRoundTripTimeNanos" + interval + "s",
|
||||||
|
"Packet Ack RTT in ns", "ops", "latency", interval);
|
||||||
|
flushNanosQuantiles[i] = registry.newQuantiles(
|
||||||
|
"flushNanos" + interval + "s",
|
||||||
|
"Disk flush latency in ns", "ops", "latency", interval);
|
||||||
|
fsyncNanosQuantiles[i] = registry.newQuantiles(
|
||||||
|
"fsyncNanos" + interval + "s", "Disk fsync latency in ns",
|
||||||
|
"ops", "latency", interval);
|
||||||
|
sendDataPacketBlockedOnNetworkNanosQuantiles[i] = registry.newQuantiles(
|
||||||
|
"sendDataPacketBlockedOnNetworkNanos" + interval + "s",
|
||||||
|
"Time blocked on network while sending a packet in ns",
|
||||||
|
"ops", "latency", interval);
|
||||||
|
sendDataPacketTransferNanosQuantiles[i] = registry.newQuantiles(
|
||||||
|
"sendDataPacketTransferNanos" + interval + "s",
|
||||||
|
"Time reading from disk and writing to network while sending " +
|
||||||
|
"a packet in ns", "ops", "latency", interval);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static DataNodeMetrics create(Configuration conf, String dnName) {
|
public static DataNodeMetrics create(Configuration conf, String dnName) {
|
||||||
|
@ -94,8 +130,15 @@ public class DataNodeMetrics {
|
||||||
MetricsSystem ms = DefaultMetricsSystem.instance();
|
MetricsSystem ms = DefaultMetricsSystem.instance();
|
||||||
JvmMetrics.create("DataNode", sessionId, ms);
|
JvmMetrics.create("DataNode", sessionId, ms);
|
||||||
String name = "DataNodeActivity-"+ (dnName.isEmpty()
|
String name = "DataNodeActivity-"+ (dnName.isEmpty()
|
||||||
? "UndefinedDataNodeName"+ DFSUtil.getRandom().nextInt() : dnName.replace(':', '-'));
|
? "UndefinedDataNodeName"+ DFSUtil.getRandom().nextInt()
|
||||||
return ms.register(name, null, new DataNodeMetrics(name, sessionId));
|
: dnName.replace(':', '-'));
|
||||||
|
|
||||||
|
// Percentile measurement is off by default, by watching no intervals
|
||||||
|
int[] intervals =
|
||||||
|
conf.getInts(DFSConfigKeys.DFS_METRICS_PERCENTILES_INTERVALS_KEY);
|
||||||
|
|
||||||
|
return ms.register(name, null, new DataNodeMetrics(name, sessionId,
|
||||||
|
intervals));
|
||||||
}
|
}
|
||||||
|
|
||||||
public String name() { return name; }
|
public String name() { return name; }
|
||||||
|
@ -166,14 +209,23 @@ public class DataNodeMetrics {
|
||||||
|
|
||||||
public void addPacketAckRoundTripTimeNanos(long latencyNanos) {
|
public void addPacketAckRoundTripTimeNanos(long latencyNanos) {
|
||||||
packetAckRoundTripTimeNanos.add(latencyNanos);
|
packetAckRoundTripTimeNanos.add(latencyNanos);
|
||||||
|
for (MutableQuantiles q : packetAckRoundTripTimeNanosQuantiles) {
|
||||||
|
q.add(latencyNanos);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void addFlushNanos(long latencyNanos) {
|
public void addFlushNanos(long latencyNanos) {
|
||||||
flushNanos.add(latencyNanos);
|
flushNanos.add(latencyNanos);
|
||||||
|
for (MutableQuantiles q : flushNanosQuantiles) {
|
||||||
|
q.add(latencyNanos);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void addFsyncNanos(long latencyNanos) {
|
public void addFsyncNanos(long latencyNanos) {
|
||||||
fsyncNanos.add(latencyNanos);
|
fsyncNanos.add(latencyNanos);
|
||||||
|
for (MutableQuantiles q : fsyncNanosQuantiles) {
|
||||||
|
q.add(latencyNanos);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void shutdown() {
|
public void shutdown() {
|
||||||
|
@ -196,12 +248,18 @@ public class DataNodeMetrics {
|
||||||
public void incrBlocksGetLocalPathInfo() {
|
public void incrBlocksGetLocalPathInfo() {
|
||||||
blocksGetLocalPathInfo.incr();
|
blocksGetLocalPathInfo.incr();
|
||||||
}
|
}
|
||||||
|
|
||||||
public MutableRate getSendDataPacketBlockedOnNetworkNanos() {
|
public void addSendDataPacketBlockedOnNetworkNanos(long latencyNanos) {
|
||||||
return sendDataPacketBlockedOnNetworkNanos;
|
sendDataPacketBlockedOnNetworkNanos.add(latencyNanos);
|
||||||
|
for (MutableQuantiles q : sendDataPacketBlockedOnNetworkNanosQuantiles) {
|
||||||
|
q.add(latencyNanos);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public MutableRate getSendDataPacketTransferNanos() {
|
public void addSendDataPacketTransferNanos(long latencyNanos) {
|
||||||
return sendDataPacketTransferNanos;
|
sendDataPacketTransferNanos.add(latencyNanos);
|
||||||
|
for (MutableQuantiles q : sendDataPacketTransferNanosQuantiles) {
|
||||||
|
q.add(latencyNanos);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,17 +17,20 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.hadoop.hdfs.server.namenode.metrics;
|
package org.apache.hadoop.hdfs.server.namenode.metrics;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.metrics2.impl.MsInfo.ProcessName;
|
||||||
|
import static org.apache.hadoop.metrics2.impl.MsInfo.SessionId;
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
|
|
||||||
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
||||||
|
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
|
||||||
import org.apache.hadoop.metrics2.MetricsSystem;
|
import org.apache.hadoop.metrics2.MetricsSystem;
|
||||||
import org.apache.hadoop.metrics2.annotation.Metric;
|
import org.apache.hadoop.metrics2.annotation.Metric;
|
||||||
import org.apache.hadoop.metrics2.annotation.Metrics;
|
import org.apache.hadoop.metrics2.annotation.Metrics;
|
||||||
import static org.apache.hadoop.metrics2.impl.MsInfo.*;
|
|
||||||
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
|
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
|
||||||
import org.apache.hadoop.metrics2.lib.MetricsRegistry;
|
import org.apache.hadoop.metrics2.lib.MetricsRegistry;
|
||||||
import org.apache.hadoop.metrics2.lib.MutableCounterLong;
|
import org.apache.hadoop.metrics2.lib.MutableCounterLong;
|
||||||
import org.apache.hadoop.metrics2.lib.MutableGaugeInt;
|
import org.apache.hadoop.metrics2.lib.MutableGaugeInt;
|
||||||
|
import org.apache.hadoop.metrics2.lib.MutableQuantiles;
|
||||||
import org.apache.hadoop.metrics2.lib.MutableRate;
|
import org.apache.hadoop.metrics2.lib.MutableRate;
|
||||||
import org.apache.hadoop.metrics2.source.JvmMetrics;
|
import org.apache.hadoop.metrics2.source.JvmMetrics;
|
||||||
|
|
||||||
|
@ -57,15 +60,31 @@ public class NameNodeMetrics {
|
||||||
|
|
||||||
@Metric("Journal transactions") MutableRate transactions;
|
@Metric("Journal transactions") MutableRate transactions;
|
||||||
@Metric("Journal syncs") MutableRate syncs;
|
@Metric("Journal syncs") MutableRate syncs;
|
||||||
|
MutableQuantiles[] syncsQuantiles;
|
||||||
@Metric("Journal transactions batched in sync")
|
@Metric("Journal transactions batched in sync")
|
||||||
MutableCounterLong transactionsBatchedInSync;
|
MutableCounterLong transactionsBatchedInSync;
|
||||||
@Metric("Block report") MutableRate blockReport;
|
@Metric("Block report") MutableRate blockReport;
|
||||||
|
MutableQuantiles[] blockReportQuantiles;
|
||||||
|
|
||||||
@Metric("Duration in SafeMode at startup") MutableGaugeInt safeModeTime;
|
@Metric("Duration in SafeMode at startup") MutableGaugeInt safeModeTime;
|
||||||
@Metric("Time loading FS Image at startup") MutableGaugeInt fsImageLoadTime;
|
@Metric("Time loading FS Image at startup") MutableGaugeInt fsImageLoadTime;
|
||||||
|
|
||||||
NameNodeMetrics(String processName, String sessionId) {
|
NameNodeMetrics(String processName, String sessionId, int[] intervals) {
|
||||||
registry.tag(ProcessName, processName).tag(SessionId, sessionId);
|
registry.tag(ProcessName, processName).tag(SessionId, sessionId);
|
||||||
|
|
||||||
|
final int len = intervals.length;
|
||||||
|
syncsQuantiles = new MutableQuantiles[len];
|
||||||
|
blockReportQuantiles = new MutableQuantiles[len];
|
||||||
|
|
||||||
|
for (int i = 0; i < len; i++) {
|
||||||
|
int interval = intervals[i];
|
||||||
|
syncsQuantiles[i] = registry.newQuantiles(
|
||||||
|
"syncs" + interval + "s",
|
||||||
|
"Journal syncs", "ops", "latency", interval);
|
||||||
|
blockReportQuantiles[i] = registry.newQuantiles(
|
||||||
|
"blockReport" + interval + "s",
|
||||||
|
"Block report", "ops", "latency", interval);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static NameNodeMetrics create(Configuration conf, NamenodeRole r) {
|
public static NameNodeMetrics create(Configuration conf, NamenodeRole r) {
|
||||||
|
@ -73,7 +92,11 @@ public class NameNodeMetrics {
|
||||||
String processName = r.toString();
|
String processName = r.toString();
|
||||||
MetricsSystem ms = DefaultMetricsSystem.instance();
|
MetricsSystem ms = DefaultMetricsSystem.instance();
|
||||||
JvmMetrics.create(processName, sessionId, ms);
|
JvmMetrics.create(processName, sessionId, ms);
|
||||||
return ms.register(new NameNodeMetrics(processName, sessionId));
|
|
||||||
|
// Percentile measurement is off by default, by watching no intervals
|
||||||
|
int[] intervals =
|
||||||
|
conf.getInts(DFSConfigKeys.DFS_METRICS_PERCENTILES_INTERVALS_KEY);
|
||||||
|
return ms.register(new NameNodeMetrics(processName, sessionId, intervals));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void shutdown() {
|
public void shutdown() {
|
||||||
|
@ -146,6 +169,9 @@ public class NameNodeMetrics {
|
||||||
|
|
||||||
public void addSync(long elapsed) {
|
public void addSync(long elapsed) {
|
||||||
syncs.add(elapsed);
|
syncs.add(elapsed);
|
||||||
|
for (MutableQuantiles q : syncsQuantiles) {
|
||||||
|
q.add(elapsed);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setFsImageLoadTime(long elapsed) {
|
public void setFsImageLoadTime(long elapsed) {
|
||||||
|
@ -154,6 +180,9 @@ public class NameNodeMetrics {
|
||||||
|
|
||||||
public void addBlockReport(long latency) {
|
public void addBlockReport(long latency) {
|
||||||
blockReport.add(latency);
|
blockReport.add(latency);
|
||||||
|
for (MutableQuantiles q : blockReportQuantiles) {
|
||||||
|
q.add(latency);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setSafeModeTime(long elapsed) {
|
public void setSafeModeTime(long elapsed) {
|
||||||
|
|
|
@ -1004,4 +1004,14 @@
|
||||||
</description>
|
</description>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>dfs.metrics.percentiles.intervals</name>
|
||||||
|
<value></value>
|
||||||
|
<description>
|
||||||
|
Comma-delimited set of integers denoting the desired rollover intervals
|
||||||
|
(in seconds) for percentile latency metrics on the Namenode and Datanode.
|
||||||
|
By default, percentile latency metrics are disabled.
|
||||||
|
</description>
|
||||||
|
</property>
|
||||||
|
|
||||||
</configuration>
|
</configuration>
|
||||||
|
|
|
@ -18,21 +18,26 @@
|
||||||
package org.apache.hadoop.hdfs.server.datanode;
|
package org.apache.hadoop.hdfs.server.datanode;
|
||||||
|
|
||||||
import static org.apache.hadoop.test.MetricsAsserts.assertCounter;
|
import static org.apache.hadoop.test.MetricsAsserts.assertCounter;
|
||||||
|
import static org.apache.hadoop.test.MetricsAsserts.assertQuantileGauges;
|
||||||
import static org.apache.hadoop.test.MetricsAsserts.getLongCounter;
|
import static org.apache.hadoop.test.MetricsAsserts.getLongCounter;
|
||||||
import static org.apache.hadoop.test.MetricsAsserts.getMetrics;
|
import static org.apache.hadoop.test.MetricsAsserts.getMetrics;
|
||||||
import static org.junit.Assert.assertEquals;
|
import static org.junit.Assert.assertEquals;
|
||||||
|
import static org.junit.Assert.assertNotNull;
|
||||||
import static org.junit.Assert.assertTrue;
|
import static org.junit.Assert.assertTrue;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Random;
|
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
||||||
|
import org.apache.hadoop.hdfs.DFSOutputStream;
|
||||||
import org.apache.hadoop.hdfs.DFSTestUtil;
|
import org.apache.hadoop.hdfs.DFSTestUtil;
|
||||||
import org.apache.hadoop.hdfs.DistributedFileSystem;
|
import org.apache.hadoop.hdfs.DistributedFileSystem;
|
||||||
import org.apache.hadoop.hdfs.HdfsConfiguration;
|
import org.apache.hadoop.hdfs.HdfsConfiguration;
|
||||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
||||||
import org.apache.hadoop.metrics2.MetricsRecordBuilder;
|
import org.apache.hadoop.metrics2.MetricsRecordBuilder;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
|
@ -59,8 +64,10 @@ public class TestDataNodeMetrics {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testSendDataPacket() throws Exception {
|
public void testSendDataPacketMetrics() throws Exception {
|
||||||
Configuration conf = new HdfsConfiguration();
|
Configuration conf = new HdfsConfiguration();
|
||||||
|
final int interval = 1;
|
||||||
|
conf.set(DFSConfigKeys.DFS_METRICS_PERCENTILES_INTERVALS_KEY, "" + interval);
|
||||||
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build();
|
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build();
|
||||||
try {
|
try {
|
||||||
FileSystem fs = cluster.getFileSystem();
|
FileSystem fs = cluster.getFileSystem();
|
||||||
|
@ -73,64 +80,110 @@ public class TestDataNodeMetrics {
|
||||||
assertEquals(datanodes.size(), 1);
|
assertEquals(datanodes.size(), 1);
|
||||||
DataNode datanode = datanodes.get(0);
|
DataNode datanode = datanodes.get(0);
|
||||||
MetricsRecordBuilder rb = getMetrics(datanode.getMetrics().name());
|
MetricsRecordBuilder rb = getMetrics(datanode.getMetrics().name());
|
||||||
|
|
||||||
// Expect 2 packets, 1 for the 1 byte read, 1 for the empty packet
|
// Expect 2 packets, 1 for the 1 byte read, 1 for the empty packet
|
||||||
// signaling the end of the block
|
// signaling the end of the block
|
||||||
assertCounter("SendDataPacketTransferNanosNumOps", (long)2, rb);
|
assertCounter("SendDataPacketTransferNanosNumOps", (long)2, rb);
|
||||||
assertCounter("SendDataPacketBlockedOnNetworkNanosNumOps", (long)2, rb);
|
assertCounter("SendDataPacketBlockedOnNetworkNanosNumOps", (long)2, rb);
|
||||||
|
// Wait for at least 1 rollover
|
||||||
|
Thread.sleep((interval + 1) * 1000);
|
||||||
|
// Check that the sendPacket percentiles rolled to non-zero values
|
||||||
|
String sec = interval + "s";
|
||||||
|
assertQuantileGauges("SendDataPacketBlockedOnNetworkNanos" + sec, rb);
|
||||||
|
assertQuantileGauges("SendDataPacketTransferNanos" + sec, rb);
|
||||||
} finally {
|
} finally {
|
||||||
if (cluster != null) {cluster.shutdown();}
|
if (cluster != null) {cluster.shutdown();}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testFlushMetric() throws Exception {
|
public void testReceivePacketMetrics() throws Exception {
|
||||||
Configuration conf = new HdfsConfiguration();
|
Configuration conf = new HdfsConfiguration();
|
||||||
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
|
final int interval = 1;
|
||||||
|
conf.set(DFSConfigKeys.DFS_METRICS_PERCENTILES_INTERVALS_KEY, "" + interval);
|
||||||
|
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build();
|
||||||
try {
|
try {
|
||||||
cluster.waitActive();
|
cluster.waitActive();
|
||||||
DistributedFileSystem fs = (DistributedFileSystem) cluster.getFileSystem();
|
DistributedFileSystem fs = (DistributedFileSystem) cluster.getFileSystem();
|
||||||
|
|
||||||
Path testFile = new Path("/testFlushNanosMetric.txt");
|
Path testFile = new Path("/testFlushNanosMetric.txt");
|
||||||
DFSTestUtil.createFile(fs, testFile, 1, (short)1, new Random().nextLong());
|
FSDataOutputStream fout = fs.create(testFile);
|
||||||
|
fout.write(new byte[1]);
|
||||||
|
fout.hsync();
|
||||||
|
fout.close();
|
||||||
List<DataNode> datanodes = cluster.getDataNodes();
|
List<DataNode> datanodes = cluster.getDataNodes();
|
||||||
DataNode datanode = datanodes.get(0);
|
DataNode datanode = datanodes.get(0);
|
||||||
MetricsRecordBuilder dnMetrics = getMetrics(datanode.getMetrics().name());
|
MetricsRecordBuilder dnMetrics = getMetrics(datanode.getMetrics().name());
|
||||||
// Expect 2 flushes, 1 for the flush that occurs after writing, 1 that occurs
|
// Expect two flushes, 1 for the flush that occurs after writing,
|
||||||
// on closing the data and metadata files.
|
// 1 that occurs on closing the data and metadata files.
|
||||||
assertCounter("FlushNanosNumOps", 2L, dnMetrics);
|
assertCounter("FlushNanosNumOps", 2L, dnMetrics);
|
||||||
|
// Expect two syncs, one from the hsync, one on close.
|
||||||
|
assertCounter("FsyncNanosNumOps", 2L, dnMetrics);
|
||||||
|
// Wait for at least 1 rollover
|
||||||
|
Thread.sleep((interval + 1) * 1000);
|
||||||
|
// Check the receivePacket percentiles that should be non-zero
|
||||||
|
String sec = interval + "s";
|
||||||
|
assertQuantileGauges("FlushNanos" + sec, dnMetrics);
|
||||||
|
assertQuantileGauges("FsyncNanos" + sec, dnMetrics);
|
||||||
} finally {
|
} finally {
|
||||||
if (cluster != null) {cluster.shutdown();}
|
if (cluster != null) {cluster.shutdown();}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests that round-trip acks in a datanode write pipeline are correctly
|
||||||
|
* measured.
|
||||||
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void testRoundTripAckMetric() throws Exception {
|
public void testRoundTripAckMetric() throws Exception {
|
||||||
final int DATANODE_COUNT = 2;
|
final int datanodeCount = 2;
|
||||||
|
final int interval = 1;
|
||||||
Configuration conf = new HdfsConfiguration();
|
Configuration conf = new HdfsConfiguration();
|
||||||
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(DATANODE_COUNT).build();
|
conf.set(DFSConfigKeys.DFS_METRICS_PERCENTILES_INTERVALS_KEY, "" + interval);
|
||||||
|
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(
|
||||||
|
datanodeCount).build();
|
||||||
try {
|
try {
|
||||||
cluster.waitActive();
|
cluster.waitActive();
|
||||||
DistributedFileSystem fs = (DistributedFileSystem) cluster.getFileSystem();
|
FileSystem fs = cluster.getFileSystem();
|
||||||
|
// Open a file and get the head of the pipeline
|
||||||
Path testFile = new Path("/testRoundTripAckMetric.txt");
|
Path testFile = new Path("/testRoundTripAckMetric.txt");
|
||||||
DFSTestUtil.createFile(fs, testFile, 1, (short)DATANODE_COUNT,
|
FSDataOutputStream fsout = fs.create(testFile, (short) datanodeCount);
|
||||||
new Random().nextLong());
|
DFSOutputStream dout = (DFSOutputStream) fsout.getWrappedStream();
|
||||||
|
// Slow down the writes to catch the write pipeline
|
||||||
boolean foundNonzeroPacketAckNumOps = false;
|
dout.setChunksPerPacket(5);
|
||||||
|
dout.setArtificialSlowdown(3000);
|
||||||
|
fsout.write(new byte[10000]);
|
||||||
|
DatanodeInfo[] pipeline = null;
|
||||||
|
int count = 0;
|
||||||
|
while (pipeline == null && count < 5) {
|
||||||
|
pipeline = dout.getPipeline();
|
||||||
|
System.out.println("Waiting for pipeline to be created.");
|
||||||
|
Thread.sleep(1000);
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
// Get the head node that should be receiving downstream acks
|
||||||
|
DatanodeInfo headInfo = pipeline[0];
|
||||||
|
DataNode headNode = null;
|
||||||
for (DataNode datanode : cluster.getDataNodes()) {
|
for (DataNode datanode : cluster.getDataNodes()) {
|
||||||
MetricsRecordBuilder dnMetrics = getMetrics(datanode.getMetrics().name());
|
if (datanode.getDatanodeId().equals(headInfo)) {
|
||||||
if (getLongCounter("PacketAckRoundTripTimeNanosNumOps", dnMetrics) > 0) {
|
headNode = datanode;
|
||||||
foundNonzeroPacketAckNumOps = true;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
assertTrue(
|
assertNotNull("Could not find the head of the datanode write pipeline",
|
||||||
"Expected at least one datanode to have reported PacketAckRoundTripTimeNanos metric",
|
headNode);
|
||||||
foundNonzeroPacketAckNumOps);
|
// Close the file and wait for the metrics to rollover
|
||||||
|
Thread.sleep((interval + 1) * 1000);
|
||||||
|
// Check the ack was received
|
||||||
|
MetricsRecordBuilder dnMetrics = getMetrics(headNode.getMetrics()
|
||||||
|
.name());
|
||||||
|
assertTrue("Expected non-zero number of acks",
|
||||||
|
getLongCounter("PacketAckRoundTripTimeNanosNumOps", dnMetrics) > 0);
|
||||||
|
assertQuantileGauges("PacketAckRoundTripTimeNanos" + interval
|
||||||
|
+ "s", dnMetrics);
|
||||||
} finally {
|
} finally {
|
||||||
if (cluster != null) {cluster.shutdown();}
|
if (cluster != null) {
|
||||||
|
cluster.shutdown();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.hadoop.hdfs.server.namenode.metrics;
|
||||||
|
|
||||||
import static org.apache.hadoop.test.MetricsAsserts.assertCounter;
|
import static org.apache.hadoop.test.MetricsAsserts.assertCounter;
|
||||||
import static org.apache.hadoop.test.MetricsAsserts.assertGauge;
|
import static org.apache.hadoop.test.MetricsAsserts.assertGauge;
|
||||||
|
import static org.apache.hadoop.test.MetricsAsserts.assertQuantileGauges;
|
||||||
import static org.apache.hadoop.test.MetricsAsserts.getMetrics;
|
import static org.apache.hadoop.test.MetricsAsserts.getMetrics;
|
||||||
import static org.junit.Assert.assertTrue;
|
import static org.junit.Assert.assertTrue;
|
||||||
|
|
||||||
|
@ -63,6 +64,9 @@ public class TestNameNodeMetrics {
|
||||||
// Number of datanodes in the cluster
|
// Number of datanodes in the cluster
|
||||||
private static final int DATANODE_COUNT = 3;
|
private static final int DATANODE_COUNT = 3;
|
||||||
private static final int WAIT_GAUGE_VALUE_RETRIES = 20;
|
private static final int WAIT_GAUGE_VALUE_RETRIES = 20;
|
||||||
|
|
||||||
|
// Rollover interval of percentile metrics (in seconds)
|
||||||
|
private static final int PERCENTILES_INTERVAL = 1;
|
||||||
|
|
||||||
static {
|
static {
|
||||||
CONF.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 100);
|
CONF.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 100);
|
||||||
|
@ -71,6 +75,8 @@ public class TestNameNodeMetrics {
|
||||||
DFS_REPLICATION_INTERVAL);
|
DFS_REPLICATION_INTERVAL);
|
||||||
CONF.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_INTERVAL_KEY,
|
CONF.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_INTERVAL_KEY,
|
||||||
DFS_REPLICATION_INTERVAL);
|
DFS_REPLICATION_INTERVAL);
|
||||||
|
CONF.set(DFSConfigKeys.DFS_METRICS_PERCENTILES_INTERVALS_KEY,
|
||||||
|
"" + PERCENTILES_INTERVAL);
|
||||||
|
|
||||||
((Log4JLogger)LogFactory.getLog(MetricsAsserts.class))
|
((Log4JLogger)LogFactory.getLog(MetricsAsserts.class))
|
||||||
.getLogger().setLevel(Level.DEBUG);
|
.getLogger().setLevel(Level.DEBUG);
|
||||||
|
@ -368,4 +374,24 @@ public class TestNameNodeMetrics {
|
||||||
assertGauge("TransactionsSinceLastCheckpoint", 1L, getMetrics(NS_METRICS));
|
assertGauge("TransactionsSinceLastCheckpoint", 1L, getMetrics(NS_METRICS));
|
||||||
assertGauge("TransactionsSinceLastLogRoll", 1L, getMetrics(NS_METRICS));
|
assertGauge("TransactionsSinceLastLogRoll", 1L, getMetrics(NS_METRICS));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests that the sync and block report metrics get updated on cluster
|
||||||
|
* startup.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testSyncAndBlockReportMetric() throws Exception {
|
||||||
|
MetricsRecordBuilder rb = getMetrics(NN_METRICS);
|
||||||
|
// We have one sync when the cluster starts up, just opening the journal
|
||||||
|
assertCounter("SyncsNumOps", 1L, rb);
|
||||||
|
// Each datanode reports in when the cluster comes up
|
||||||
|
assertCounter("BlockReportNumOps", (long)DATANODE_COUNT, rb);
|
||||||
|
|
||||||
|
// Sleep for an interval+slop to let the percentiles rollover
|
||||||
|
Thread.sleep((PERCENTILES_INTERVAL+1)*1000);
|
||||||
|
|
||||||
|
// Check that the percentiles were updated
|
||||||
|
assertQuantileGauges("Syncs1s", rb);
|
||||||
|
assertQuantileGauges("BlockReport1s", rb);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue