HDFS-5350. Name Node should report fsimage transfer time as a metric. Contributed by Jimmy Xiang.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1551427 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Andrew Wang 2013-12-17 00:48:04 +00:00
parent e3f9b1457e
commit e8b27cc5a2
4 changed files with 57 additions and 0 deletions

View File

@ -156,6 +156,9 @@ Release 2.4.0 - UNRELEASED
HDFS-5652. Refactor invalid block token exception handling in DFSInputStream.
(Liang Xie via junping_du)
HDFS-5350. Name Node should report fsimage transfer time as a metric.
(Jimmy Xiang via wang)
OPTIMIZATIONS
HDFS-5239. Allow FSNamesystem lock fairness to be configurable (daryn)

View File

@ -17,6 +17,8 @@
*/
package org.apache.hadoop.hdfs.server.namenode;
import static org.apache.hadoop.util.Time.now;
import java.security.PrivilegedExceptionAction;
import java.util.*;
import java.io.*;
@ -42,6 +44,7 @@ import org.apache.hadoop.hdfs.HAUtil;
import org.apache.hadoop.hdfs.server.common.JspHelper;
import org.apache.hadoop.hdfs.server.common.Storage;
import org.apache.hadoop.hdfs.server.common.StorageInfo;
import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
import org.apache.hadoop.hdfs.server.protocol.RemoteEditLog;
import org.apache.hadoop.hdfs.util.DataTransferThrottler;
import org.apache.hadoop.hdfs.util.MD5FileUtils;
@ -89,6 +92,7 @@ public class GetImageServlet extends HttpServlet {
final GetImageParams parsedParams = new GetImageParams(request, response);
final Configuration conf =
(Configuration)getServletContext().getAttribute(JspHelper.CURRENT_CONF);
final NameNodeMetrics metrics = NameNode.getNameNodeMetrics();
if (UserGroupInformation.isSecurityEnabled() &&
!isValidRequestor(context, request.getUserPrincipal().getName(), conf)) {
@ -129,14 +133,26 @@ public class GetImageServlet extends HttpServlet {
throw new IOException(errorMessage);
}
CheckpointFaultInjector.getInstance().beforeGetImageSetsHeaders();
long start = now();
serveFile(imageFile);
if (metrics != null) { // Metrics non-null only when used inside name node
long elapsed = now() - start;
metrics.addGetImage(elapsed);
}
} else if (parsedParams.isGetEdit()) {
long startTxId = parsedParams.getStartTxId();
long endTxId = parsedParams.getEndTxId();
File editFile = nnImage.getStorage()
.findFinalizedEditsFile(startTxId, endTxId);
long start = now();
serveFile(editFile);
if (metrics != null) { // Metrics non-null only when used inside name node
long elapsed = now() - start;
metrics.addGetEdit(elapsed);
}
} else if (parsedParams.isPutImage()) {
final long txid = parsedParams.getTxId();
@ -160,12 +176,18 @@ public class GetImageServlet extends HttpServlet {
UserGroupInformation.getCurrentUser().checkTGTAndReloginFromKeytab();
}
long start = now();
// issue a HTTP get request to download the new fsimage
MD5Hash downloadImageDigest =
TransferFsImage.downloadImageToStorage(
parsedParams.getInfoServer(), txid,
nnImage.getStorage(), true);
nnImage.saveDigestAndRenameCheckpointImage(txid, downloadImageDigest);
if (metrics != null) { // Metrics non-null only when used inside name node
long elapsed = now() - start;
metrics.addPutImage(elapsed);
}
// Now that we have a new checkpoint, we might be able to
// remove some old ones.

View File

@ -85,6 +85,13 @@ public class NameNodeMetrics {
@Metric("Time loading FS Image at startup in msec")
MutableGaugeInt fsImageLoadTime;
@Metric("GetImageServlet getEdit")
MutableRate getEdit;
@Metric("GetImageServlet getImage")
MutableRate getImage;
@Metric("GetImageServlet putImage")
MutableRate putImage;
NameNodeMetrics(String processName, String sessionId, int[] intervals) {
registry.tag(ProcessName, processName).tag(SessionId, sessionId);
@ -232,4 +239,16 @@ public class NameNodeMetrics {
public void setSafeModeTime(long elapsed) {
safeModeTime.set((int) elapsed);
}
public void addGetEdit(long latency) {
getEdit.add(latency);
}
public void addGetImage(long latency) {
getImage.add(latency);
}
public void addPutImage(long latency) {
putImage.add(latency);
}
}

View File

@ -20,6 +20,9 @@ package org.apache.hadoop.hdfs.server.namenode;
import static org.apache.hadoop.hdfs.server.common.Util.fileAsURI;
import static org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil.assertNNHasCheckpoints;
import static org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil.getNameNodeCurrentDirs;
import static org.apache.hadoop.test.MetricsAsserts.assertCounterGt;
import static org.apache.hadoop.test.MetricsAsserts.assertGaugeGt;
import static org.apache.hadoop.test.MetricsAsserts.getMetrics;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
@ -74,6 +77,7 @@ import org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest;
import org.apache.hadoop.hdfs.tools.DFSAdmin;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.metrics2.MetricsRecordBuilder;
import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.test.GenericTestUtils.DelayAnswer;
import org.apache.hadoop.test.GenericTestUtils.LogCapturer;
@ -107,6 +111,7 @@ public class TestCheckpoint {
}
static final Log LOG = LogFactory.getLog(TestCheckpoint.class);
static final String NN_METRICS = "NameNodeActivity";
static final long seed = 0xDEADBEEFL;
static final int blockSize = 4096;
@ -1055,6 +1060,14 @@ public class TestCheckpoint {
//
secondary = startSecondaryNameNode(conf);
secondary.doCheckpoint();
MetricsRecordBuilder rb = getMetrics(NN_METRICS);
assertCounterGt("GetImageNumOps", 0, rb);
assertCounterGt("GetEditNumOps", 0, rb);
assertCounterGt("PutImageNumOps", 0, rb);
assertGaugeGt("GetImageAvgTime", 0.0, rb);
assertGaugeGt("GetEditAvgTime", 0.0, rb);
assertGaugeGt("PutImageAvgTime", 0.0, rb);
} finally {
fileSys.close();
cleanup(secondary);