HDFS-3899. Add client side metrics for QJM. Contributed by Todd Lipcon.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-3077@1383139 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
959afc0fd3
commit
729ec86907
|
@ -60,3 +60,5 @@ HDFS-3885. QJM: optimize log sync when JN is lagging behind (todd)
|
||||||
HDFS-3900. QJM: avoid validating log segments on log rolls (todd)
|
HDFS-3900. QJM: avoid validating log segments on log rolls (todd)
|
||||||
|
|
||||||
HDFS-3901. QJM: send 'heartbeat' messages to JNs even when they are out-of-sync (todd)
|
HDFS-3901. QJM: send 'heartbeat' messages to JNs even when they are out-of-sync (todd)
|
||||||
|
|
||||||
|
HDFS-3899. QJM: Add client-side metrics (todd)
|
||||||
|
|
|
@ -85,6 +85,8 @@ public class IPCLoggerChannel implements AsyncLogger {
|
||||||
private final NamespaceInfo nsInfo;
|
private final NamespaceInfo nsInfo;
|
||||||
private int httpPort = -1;
|
private int httpPort = -1;
|
||||||
|
|
||||||
|
private final IPCLoggerChannelMetrics metrics;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The number of bytes of edits data still in the queue.
|
* The number of bytes of edits data still in the queue.
|
||||||
*/
|
*/
|
||||||
|
@ -154,6 +156,8 @@ public class IPCLoggerChannel implements AsyncLogger {
|
||||||
|
|
||||||
executor = MoreExecutors.listeningDecorator(
|
executor = MoreExecutors.listeningDecorator(
|
||||||
createExecutor());
|
createExecutor());
|
||||||
|
|
||||||
|
metrics = IPCLoggerChannelMetrics.create(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -262,6 +266,10 @@ public class IPCLoggerChannel implements AsyncLogger {
|
||||||
return queuedEditsSizeBytes;
|
return queuedEditsSizeBytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public InetSocketAddress getRemoteAddress() {
|
||||||
|
return addr;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return true if the server has gotten out of sync from the client,
|
* @return true if the server has gotten out of sync from the client,
|
||||||
* and thus a log roll is required for this logger to successfully start
|
* and thus a log roll is required for this logger to successfully start
|
||||||
|
@ -330,6 +338,7 @@ public class IPCLoggerChannel implements AsyncLogger {
|
||||||
public Void call() throws IOException {
|
public Void call() throws IOException {
|
||||||
throwIfOutOfSync();
|
throwIfOutOfSync();
|
||||||
|
|
||||||
|
long rpcSendTimeNanos = System.nanoTime();
|
||||||
try {
|
try {
|
||||||
getProxy().journal(createReqInfo(),
|
getProxy().journal(createReqInfo(),
|
||||||
segmentTxId, firstTxnId, numTxns, data);
|
segmentTxId, firstTxnId, numTxns, data);
|
||||||
|
@ -343,6 +352,14 @@ public class IPCLoggerChannel implements AsyncLogger {
|
||||||
outOfSync = true;
|
outOfSync = true;
|
||||||
}
|
}
|
||||||
throw e;
|
throw e;
|
||||||
|
} finally {
|
||||||
|
long now = System.nanoTime();
|
||||||
|
long rpcTime = TimeUnit.MICROSECONDS.convert(
|
||||||
|
now - rpcSendTimeNanos, TimeUnit.NANOSECONDS);
|
||||||
|
long endToEndTime = TimeUnit.MICROSECONDS.convert(
|
||||||
|
now - submitNanos, TimeUnit.NANOSECONDS);
|
||||||
|
metrics.addWriteEndToEndLatency(endToEndTime);
|
||||||
|
metrics.addWriteRpcLatency(rpcTime);
|
||||||
}
|
}
|
||||||
synchronized (IPCLoggerChannel.this) {
|
synchronized (IPCLoggerChannel.this) {
|
||||||
highestAckedTxId = firstTxnId + numTxns - 1;
|
highestAckedTxId = firstTxnId + numTxns - 1;
|
||||||
|
@ -547,11 +564,11 @@ public class IPCLoggerChannel implements AsyncLogger {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private long getLagTxns() {
|
public synchronized long getLagTxns() {
|
||||||
return Math.max(committedTxId - highestAckedTxId, 0);
|
return Math.max(committedTxId - highestAckedTxId, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
private long getLagTimeMillis() {
|
public synchronized long getLagTimeMillis() {
|
||||||
return TimeUnit.MILLISECONDS.convert(
|
return TimeUnit.MILLISECONDS.convert(
|
||||||
Math.max(lastCommitNanos - lastAckNanos, 0),
|
Math.max(lastCommitNanos - lastAckNanos, 0),
|
||||||
TimeUnit.NANOSECONDS);
|
TimeUnit.NANOSECONDS);
|
||||||
|
|
|
@ -0,0 +1,154 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hdfs.qjournal.client;
|
||||||
|
|
||||||
|
import java.net.InetSocketAddress;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
||||||
|
import org.apache.hadoop.hdfs.HdfsConfiguration;
|
||||||
|
import org.apache.hadoop.metrics2.annotation.Metric;
|
||||||
|
import org.apache.hadoop.metrics2.annotation.Metrics;
|
||||||
|
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
|
||||||
|
import org.apache.hadoop.metrics2.lib.MetricsRegistry;
|
||||||
|
import org.apache.hadoop.metrics2.lib.MutableQuantiles;
|
||||||
|
|
||||||
|
import com.google.common.collect.Maps;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The metrics for a journal from the writer's perspective.
|
||||||
|
*/
|
||||||
|
@Metrics(about="Journal client metrics", context="dfs")
|
||||||
|
class IPCLoggerChannelMetrics {
|
||||||
|
final MetricsRegistry registry = new MetricsRegistry("NameNode");
|
||||||
|
|
||||||
|
private volatile IPCLoggerChannel ch;
|
||||||
|
|
||||||
|
private final MutableQuantiles[] writeEndToEndLatencyQuantiles;
|
||||||
|
private final MutableQuantiles[] writeRpcLatencyQuantiles;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* In the case of the NN transitioning between states, edit logs are closed
|
||||||
|
* and reopened. Thus, the IPCLoggerChannel instance that writes to a
|
||||||
|
* given JournalNode may change over the lifetime of the process.
|
||||||
|
* However, metrics2 doesn't have a function to unregister a set of metrics
|
||||||
|
* and fails if a new metrics class is registered with the same name
|
||||||
|
* as the existing one. Hence, we have to maintain our own registry
|
||||||
|
* ("multiton") here, so that we have exactly one metrics instance
|
||||||
|
* per JournalNode, and switch out the pointer to the underlying
|
||||||
|
* IPCLoggerChannel instance.
|
||||||
|
*/
|
||||||
|
private static final Map<String, IPCLoggerChannelMetrics> REGISTRY =
|
||||||
|
Maps.newHashMap();
|
||||||
|
|
||||||
|
private IPCLoggerChannelMetrics(IPCLoggerChannel ch) {
|
||||||
|
this.ch = ch;
|
||||||
|
|
||||||
|
Configuration conf = new HdfsConfiguration();
|
||||||
|
int[] intervals =
|
||||||
|
conf.getInts(DFSConfigKeys.DFS_METRICS_PERCENTILES_INTERVALS_KEY);
|
||||||
|
if (intervals != null) {
|
||||||
|
writeEndToEndLatencyQuantiles = new MutableQuantiles[intervals.length];
|
||||||
|
writeRpcLatencyQuantiles = new MutableQuantiles[intervals.length];
|
||||||
|
for (int i = 0; i < writeEndToEndLatencyQuantiles.length; i++) {
|
||||||
|
int interval = intervals[i];
|
||||||
|
writeEndToEndLatencyQuantiles[i] = registry.newQuantiles(
|
||||||
|
"writesE2E" + interval + "s",
|
||||||
|
"End-to-end time for write operations", "ops", "LatencyMicros", interval);
|
||||||
|
writeRpcLatencyQuantiles[i] = registry.newQuantiles(
|
||||||
|
"writesRpc" + interval + "s",
|
||||||
|
"RPC RTT for write operations", "ops", "LatencyMicros", interval);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
writeEndToEndLatencyQuantiles = null;
|
||||||
|
writeRpcLatencyQuantiles = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void setChannel(IPCLoggerChannel ch) {
|
||||||
|
assert ch.getRemoteAddress().equals(this.ch.getRemoteAddress());
|
||||||
|
this.ch = ch;
|
||||||
|
}
|
||||||
|
|
||||||
|
static IPCLoggerChannelMetrics create(IPCLoggerChannel ch) {
|
||||||
|
String name = getName(ch);
|
||||||
|
synchronized (REGISTRY) {
|
||||||
|
IPCLoggerChannelMetrics m = REGISTRY.get(name);
|
||||||
|
if (m != null) {
|
||||||
|
m.setChannel(ch);
|
||||||
|
} else {
|
||||||
|
m = new IPCLoggerChannelMetrics(ch);
|
||||||
|
DefaultMetricsSystem.instance().register(name, null, m);
|
||||||
|
REGISTRY.put(name, m);
|
||||||
|
}
|
||||||
|
return m;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String getName(IPCLoggerChannel ch) {
|
||||||
|
InetSocketAddress addr = ch.getRemoteAddress();
|
||||||
|
String addrStr = addr.getAddress().getHostAddress();
|
||||||
|
|
||||||
|
// IPv6 addresses have colons, which aren't allowed as part of
|
||||||
|
// MBean names. Replace with '.'
|
||||||
|
addrStr = addrStr.replace(':', '.');
|
||||||
|
|
||||||
|
return "IPCLoggerChannel-" + addrStr +
|
||||||
|
"-" + addr.getPort();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Metric("Is the remote logger out of sync with the quorum")
|
||||||
|
public String isOutOfSync() {
|
||||||
|
return Boolean.toString(ch.isOutOfSync());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Metric("The number of transactions the remote log is lagging behind the " +
|
||||||
|
"quorum")
|
||||||
|
public long getCurrentLagTxns() {
|
||||||
|
return ch.getLagTxns();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Metric("The number of milliseconds the remote log is lagging behind the " +
|
||||||
|
"quorum")
|
||||||
|
public long getLagTimeMillis() {
|
||||||
|
return ch.getLagTimeMillis();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Metric("The number of bytes of pending data to be sent to the remote node")
|
||||||
|
public int getQueuedEditsSize() {
|
||||||
|
return ch.getQueuedEditsSize();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addWriteEndToEndLatency(long micros) {
|
||||||
|
if (writeEndToEndLatencyQuantiles != null) {
|
||||||
|
for (MutableQuantiles q : writeEndToEndLatencyQuantiles) {
|
||||||
|
q.add(micros);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addWriteRpcLatency(long micros) {
|
||||||
|
if (writeRpcLatencyQuantiles != null) {
|
||||||
|
for (MutableQuantiles q : writeRpcLatencyQuantiles) {
|
||||||
|
q.add(micros);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -27,6 +27,10 @@ import org.apache.hadoop.metrics2.lib.MutableCounterLong;
|
||||||
import org.apache.hadoop.metrics2.lib.MutableGaugeLong;
|
import org.apache.hadoop.metrics2.lib.MutableGaugeLong;
|
||||||
import org.apache.hadoop.metrics2.lib.MutableQuantiles;
|
import org.apache.hadoop.metrics2.lib.MutableQuantiles;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The server-side metrics for a journal from the JournalNode's
|
||||||
|
* perspective.
|
||||||
|
*/
|
||||||
@Metrics(about="Journal metrics", context="dfs")
|
@Metrics(about="Journal metrics", context="dfs")
|
||||||
class JournalMetrics {
|
class JournalMetrics {
|
||||||
final MetricsRegistry registry = new MetricsRegistry("JournalNode");
|
final MetricsRegistry registry = new MetricsRegistry("JournalNode");
|
||||||
|
|
Loading…
Reference in New Issue