HDFS-11477. Simplify file IO profiling configuration. Contributed by Hanisha Koneru.

This commit is contained in:
Arpit Agarwal 2017-03-09 14:34:10 -08:00
parent b6c4776911
commit 5cdcda34a2
9 changed files with 43 additions and 26 deletions

View File

@ -323,7 +323,12 @@ Each metrics record contains tags such as SessionId and Hostname as additional i
FsVolume FsVolume
-------- --------
Per-volume metrics contain Datanode Volume IO related statistics. Per-volume metrics are off by default. They can be enbabled by setting `dfs.datanode.enable.fileio.profiling` to **true**, but enabling per-volume metrics may have a performance impact. Each metrics record contains tags such as Hostname as additional information along with metrics. Per-volume metrics contain Datanode Volume IO related statistics. Per-volume
metrics are off by default. They can be enabled by setting `dfs.datanode
.fileio.profiling.sampling.fraction` to a fraction between 0.0 and 1.0.
Setting this value to 0.0 would mean profiling is not enabled. But enabling
per-volume metrics may have a performance impact. Each metrics record
contains tags such as Hostname as additional information along with metrics.
| Name | Description | | Name | Description |
|:---- |:---- | |:---- |:---- |

View File

@ -634,7 +634,7 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
public static final int DFS_BLOCK_MISREPLICATION_PROCESSING_LIMIT_DEFAULT = 10000; public static final int DFS_BLOCK_MISREPLICATION_PROCESSING_LIMIT_DEFAULT = 10000;
public static final String DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_KEY = public static final String DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_KEY =
"dfs.datanode.slow.peers.report.interval"; "dfs.datanode.outliers.report.interval";
public static final int DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_DEFAULT = public static final int DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_DEFAULT =
1800 * 1000; 1800 * 1000;
@ -664,10 +664,6 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
public static final int DFS_IMAGE_TRANSFER_CHUNKSIZE_DEFAULT = 64 * 1024; public static final int DFS_IMAGE_TRANSFER_CHUNKSIZE_DEFAULT = 64 * 1024;
// Datanode File IO Stats // Datanode File IO Stats
public static final String DFS_DATANODE_ENABLE_FILEIO_PROFILING_KEY =
"dfs.datanode.enable.fileio.profiling";
public static final boolean DFS_DATANODE_ENABLE_FILEIO_PROFILING_DEFAULT =
false;
public static final String DFS_DATANODE_ENABLE_FILEIO_FAULT_INJECTION_KEY = public static final String DFS_DATANODE_ENABLE_FILEIO_FAULT_INJECTION_KEY =
"dfs.datanode.enable.fileio.fault.injection"; "dfs.datanode.enable.fileio.fault.injection";
public static final boolean public static final boolean
@ -676,7 +672,7 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
DFS_DATANODE_FILEIO_PROFILING_SAMPLING_FRACTION_KEY = DFS_DATANODE_FILEIO_PROFILING_SAMPLING_FRACTION_KEY =
"dfs.datanode.fileio.profiling.sampling.fraction"; "dfs.datanode.fileio.profiling.sampling.fraction";
public static final double public static final double
DFS_DATANODE_FILEIO_PROFILING_SAMPLING_FRACTION_DEAFULT = 1.0; DFS_DATANODE_FILEIO_PROFILING_SAMPLING_FRACTION_DEFAULT = 0.0;
//Keys with no defaults //Keys with no defaults
public static final String DFS_DATANODE_PLUGINS_KEY = "dfs.datanode.plugins"; public static final String DFS_DATANODE_PLUGINS_KEY = "dfs.datanode.plugins";

View File

@ -17,6 +17,11 @@
*/ */
package org.apache.hadoop.hdfs.server.common; package org.apache.hadoop.hdfs.server.common;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.net.URI; import java.net.URI;
@ -25,10 +30,6 @@
import java.util.Collection; import java.util.Collection;
import java.util.List; import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
@InterfaceAudience.Private @InterfaceAudience.Private
public final class Util { public final class Util {
private final static Log LOG = LogFactory.getLog(Util.class.getName()); private final static Log LOG = LogFactory.getLog(Util.class.getName());
@ -102,4 +103,21 @@ public static List<URI> stringCollectionAsURIs(
} }
return uris; return uris;
} }
public static boolean isDiskStatsEnabled(double fileIOSamplingFraction) {
final boolean isEnabled;
if (fileIOSamplingFraction < 0.000001) {
LOG.info(DFSConfigKeys
.DFS_DATANODE_FILEIO_PROFILING_SAMPLING_FRACTION_KEY + " set to "
+ fileIOSamplingFraction + ". Disabling file IO profiling");
isEnabled = false;
} else {
LOG.info(DFSConfigKeys
.DFS_DATANODE_FILEIO_PROFILING_SAMPLING_FRACTION_KEY + " set to "
+ fileIOSamplingFraction + ". Enabling file IO profiling");
isEnabled = true;
}
return isEnabled;
}
} }

View File

@ -63,6 +63,7 @@
import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.protocol.HdfsConstants;
import org.apache.hadoop.hdfs.protocol.datatransfer.TrustedChannelResolver; import org.apache.hadoop.hdfs.protocol.datatransfer.TrustedChannelResolver;
import org.apache.hadoop.hdfs.protocol.datatransfer.sasl.DataTransferSaslUtil; import org.apache.hadoop.hdfs.protocol.datatransfer.sasl.DataTransferSaslUtil;
import org.apache.hadoop.hdfs.server.common.Util;
import org.apache.hadoop.security.SaslPropertiesResolver; import org.apache.hadoop.security.SaslPropertiesResolver;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
@ -171,9 +172,9 @@ public DNConf(final Configurable dn) {
this.peerStatsEnabled = getConf().getBoolean( this.peerStatsEnabled = getConf().getBoolean(
DFSConfigKeys.DFS_DATANODE_PEER_STATS_ENABLED_KEY, DFSConfigKeys.DFS_DATANODE_PEER_STATS_ENABLED_KEY,
DFSConfigKeys.DFS_DATANODE_PEER_STATS_ENABLED_DEFAULT); DFSConfigKeys.DFS_DATANODE_PEER_STATS_ENABLED_DEFAULT);
this.diskStatsEnabled = getConf().getBoolean( this.diskStatsEnabled = Util.isDiskStatsEnabled(getConf().getDouble(
DFSConfigKeys.DFS_DATANODE_ENABLE_FILEIO_PROFILING_KEY, DFSConfigKeys.DFS_DATANODE_FILEIO_PROFILING_SAMPLING_FRACTION_KEY,
DFSConfigKeys.DFS_DATANODE_ENABLE_FILEIO_PROFILING_DEFAULT); DFSConfigKeys.DFS_DATANODE_FILEIO_PROFILING_SAMPLING_FRACTION_DEFAULT));
this.outliersReportIntervalMs = getConf().getTimeDuration( this.outliersReportIntervalMs = getConf().getTimeDuration(
DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_KEY, DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_KEY,
DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_DEFAULT, DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_DEFAULT,

View File

@ -62,7 +62,7 @@
* *
* Behavior can be injected into these events by enabling the * Behavior can be injected into these events by enabling the
* profiling and/or fault injection event hooks through * profiling and/or fault injection event hooks through
* {@link DFSConfigKeys#DFS_DATANODE_ENABLE_FILEIO_PROFILING_KEY} and * {@link DFSConfigKeys#DFS_DATANODE_FILEIO_PROFILING_SAMPLING_FRACTION_KEY} and
* {@link DFSConfigKeys#DFS_DATANODE_ENABLE_FILEIO_FAULT_INJECTION_KEY}. * {@link DFSConfigKeys#DFS_DATANODE_ENABLE_FILEIO_FAULT_INJECTION_KEY}.
* These event hooks are disabled by default. * These event hooks are disabled by default.
* *

View File

@ -23,6 +23,7 @@
import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.server.common.Util;
import org.apache.hadoop.hdfs.server.datanode.fsdataset.DataNodeVolumeMetrics; import org.apache.hadoop.hdfs.server.datanode.fsdataset.DataNodeVolumeMetrics;
import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi; import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi;
import org.apache.hadoop.util.Time; import org.apache.hadoop.util.Time;
@ -43,13 +44,11 @@ class ProfilingFileIoEvents {
public ProfilingFileIoEvents(@Nullable Configuration conf) { public ProfilingFileIoEvents(@Nullable Configuration conf) {
if (conf != null) { if (conf != null) {
isEnabled = conf.getBoolean(DFSConfigKeys double fileIOSamplingFraction = conf.getDouble(
.DFS_DATANODE_ENABLE_FILEIO_PROFILING_KEY, DFSConfigKeys DFSConfigKeys.DFS_DATANODE_FILEIO_PROFILING_SAMPLING_FRACTION_KEY,
.DFS_DATANODE_ENABLE_FILEIO_PROFILING_DEFAULT);
double fileIOSamplingFraction = conf.getDouble(DFSConfigKeys
.DFS_DATANODE_FILEIO_PROFILING_SAMPLING_FRACTION_KEY,
DFSConfigKeys DFSConfigKeys
.DFS_DATANODE_FILEIO_PROFILING_SAMPLING_FRACTION_DEAFULT); .DFS_DATANODE_FILEIO_PROFILING_SAMPLING_FRACTION_DEFAULT);
isEnabled = Util.isDiskStatsEnabled(fileIOSamplingFraction);
if (fileIOSamplingFraction > 1) { if (fileIOSamplingFraction > 1) {
LOG.warn(DFSConfigKeys LOG.warn(DFSConfigKeys
.DFS_DATANODE_FILEIO_PROFILING_SAMPLING_FRACTION_KEY + .DFS_DATANODE_FILEIO_PROFILING_SAMPLING_FRACTION_KEY +

View File

@ -1931,7 +1931,7 @@
<property> <property>
<name>dfs.datanode.outliers.report.interval</name> <name>dfs.datanode.outliers.report.interval</name>
<value>30m</value> <value>1800000</value>
<description> <description>
This setting controls how frequently DataNodes will report their peer This setting controls how frequently DataNodes will report their peer
latencies to the NameNode via heartbeats. This setting supports latencies to the NameNode via heartbeats. This setting supports

View File

@ -121,8 +121,8 @@ public void testVolumeMetricsWithVolumeDepartureArrival() throws Exception {
private MiniDFSCluster setupClusterForVolumeMetrics() throws IOException { private MiniDFSCluster setupClusterForVolumeMetrics() throws IOException {
Configuration conf = new HdfsConfiguration(); Configuration conf = new HdfsConfiguration();
conf.setBoolean(DFSConfigKeys conf.setDouble(DFSConfigKeys
.DFS_DATANODE_ENABLE_FILEIO_PROFILING_KEY, true); .DFS_DATANODE_FILEIO_PROFILING_SAMPLING_FRACTION_KEY, 1.0);
SimulatedFSDataset.setFactory(conf); SimulatedFSDataset.setFactory(conf);
return new MiniDFSCluster.Builder(conf) return new MiniDFSCluster.Builder(conf)
.numDataNodes(NUM_DATANODES) .numDataNodes(NUM_DATANODES)

View File

@ -107,8 +107,6 @@ public void initializeMemberVariables() {
.add(DFSConfigKeys.DFS_DATANODE_STARTUP_KEY); .add(DFSConfigKeys.DFS_DATANODE_STARTUP_KEY);
configurationPropsToSkipCompare configurationPropsToSkipCompare
.add(DFSConfigKeys.DFS_NAMENODE_STARTUP_KEY); .add(DFSConfigKeys.DFS_NAMENODE_STARTUP_KEY);
configurationPropsToSkipCompare
.add(DFSConfigKeys.DFS_DATANODE_ENABLE_FILEIO_PROFILING_KEY);
configurationPropsToSkipCompare.add(DFSConfigKeys configurationPropsToSkipCompare.add(DFSConfigKeys
.DFS_DATANODE_ENABLE_FILEIO_FAULT_INJECTION_KEY); .DFS_DATANODE_ENABLE_FILEIO_FAULT_INJECTION_KEY);
configurationPropsToSkipCompare.add(DFSConfigKeys configurationPropsToSkipCompare.add(DFSConfigKeys