HDFS-11722. Change Datanode file IO profiling sampling to percentage. Contributed by Hanisha Koneru.

This commit is contained in:
Arpit Agarwal 2017-05-03 16:30:01 -07:00
parent 4b7d1ff326
commit 89baea96d5
13 changed files with 53 additions and 39 deletions

View File

@ -325,8 +325,8 @@ FsVolume
Per-volume metrics contain Datanode Volume IO related statistics. Per-volume
metrics are off by default. They can be enabled by setting `dfs.datanode
.fileio.profiling.sampling.fraction` to a fraction between 0.0 and 1.0.
Setting this value to 0.0 would mean profiling is not enabled. But enabling
.fileio.profiling.percentage.fraction` to an integer value between 1 and 100.
Setting this value to 0 would mean profiling is not enabled. But enabling
per-volume metrics may have a performance impact. Each metrics record
contains tags such as Hostname as additional information along with metrics.

View File

@ -670,10 +670,10 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
public static final boolean
DFS_DATANODE_ENABLE_FILEIO_FAULT_INJECTION_DEFAULT = false;
public static final String
DFS_DATANODE_FILEIO_PROFILING_SAMPLING_FRACTION_KEY =
"dfs.datanode.fileio.profiling.sampling.fraction";
public static final double
DFS_DATANODE_FILEIO_PROFILING_SAMPLING_FRACTION_DEFAULT = 0.0;
DFS_DATANODE_FILEIO_PROFILING_SAMPLING_PERCENTAGE_KEY =
"dfs.datanode.fileio.profiling.sampling.percentage";
public static final int
DFS_DATANODE_FILEIO_PROFILING_SAMPLING_PERCENTAGE_DEFAULT = 0;
//Keys with no defaults
public static final String DFS_DATANODE_PLUGINS_KEY = "dfs.datanode.plugins";

View File

@ -207,9 +207,10 @@ public class DatanodeManager {
this.dataNodePeerStatsEnabled = conf.getBoolean(
DFSConfigKeys.DFS_DATANODE_PEER_STATS_ENABLED_KEY,
DFSConfigKeys.DFS_DATANODE_PEER_STATS_ENABLED_DEFAULT);
this.dataNodeDiskStatsEnabled = Util.isDiskStatsEnabled(conf.getDouble(
DFSConfigKeys.DFS_DATANODE_FILEIO_PROFILING_SAMPLING_FRACTION_KEY,
DFSConfigKeys.DFS_DATANODE_FILEIO_PROFILING_SAMPLING_FRACTION_DEFAULT));
this.dataNodeDiskStatsEnabled = Util.isDiskStatsEnabled(conf.getInt(
DFSConfigKeys.DFS_DATANODE_FILEIO_PROFILING_SAMPLING_PERCENTAGE_KEY,
DFSConfigKeys.
DFS_DATANODE_FILEIO_PROFILING_SAMPLING_PERCENTAGE_DEFAULT));
final Timer timer = new Timer();
this.slowPeerTracker = dataNodePeerStatsEnabled ?

View File

@ -104,17 +104,18 @@ public final class Util {
return uris;
}
public static boolean isDiskStatsEnabled(double fileIOSamplingFraction) {
public static boolean isDiskStatsEnabled(int fileIOSamplingPercentage) {
final boolean isEnabled;
if (fileIOSamplingFraction < 0.000001) {
if (fileIOSamplingPercentage <= 0) {
LOG.info(DFSConfigKeys
.DFS_DATANODE_FILEIO_PROFILING_SAMPLING_FRACTION_KEY + " set to "
+ fileIOSamplingFraction + ". Disabling file IO profiling");
.DFS_DATANODE_FILEIO_PROFILING_SAMPLING_PERCENTAGE_KEY + " set to "
+ fileIOSamplingPercentage + ". Disabling file IO profiling");
isEnabled = false;
} else {
LOG.info(DFSConfigKeys
.DFS_DATANODE_FILEIO_PROFILING_SAMPLING_FRACTION_KEY + " set to "
+ fileIOSamplingFraction + ". Enabling file IO profiling");
.DFS_DATANODE_FILEIO_PROFILING_SAMPLING_PERCENTAGE_KEY + " set to "
+ fileIOSamplingPercentage + ". Enabling file IO profiling");
isEnabled = true;
}

View File

@ -172,9 +172,10 @@ public class DNConf {
this.peerStatsEnabled = getConf().getBoolean(
DFSConfigKeys.DFS_DATANODE_PEER_STATS_ENABLED_KEY,
DFSConfigKeys.DFS_DATANODE_PEER_STATS_ENABLED_DEFAULT);
this.diskStatsEnabled = Util.isDiskStatsEnabled(getConf().getDouble(
DFSConfigKeys.DFS_DATANODE_FILEIO_PROFILING_SAMPLING_FRACTION_KEY,
DFSConfigKeys.DFS_DATANODE_FILEIO_PROFILING_SAMPLING_FRACTION_DEFAULT));
this.diskStatsEnabled = Util.isDiskStatsEnabled(getConf().getInt(
DFSConfigKeys.DFS_DATANODE_FILEIO_PROFILING_SAMPLING_PERCENTAGE_KEY,
DFSConfigKeys.
DFS_DATANODE_FILEIO_PROFILING_SAMPLING_PERCENTAGE_DEFAULT));
this.outliersReportIntervalMs = getConf().getTimeDuration(
DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_KEY,
DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_DEFAULT,

View File

@ -62,8 +62,8 @@ import static org.apache.hadoop.hdfs.server.datanode.FileIoProvider.OPERATION.*;
*
* Behavior can be injected into these events by enabling the
* profiling and/or fault injection event hooks through
* {@link DFSConfigKeys#DFS_DATANODE_FILEIO_PROFILING_SAMPLING_FRACTION_KEY} and
* {@link DFSConfigKeys#DFS_DATANODE_ENABLE_FILEIO_FAULT_INJECTION_KEY}.
* {@link DFSConfigKeys#DFS_DATANODE_FILEIO_PROFILING_SAMPLING_PERCENTAGE_KEY}
* and {@link DFSConfigKeys#DFS_DATANODE_ENABLE_FILEIO_FAULT_INJECTION_KEY}.
* These event hooks are disabled by default.
*
* Most functions accept an optional {@link FsVolumeSpi} parameter for

View File

@ -44,18 +44,19 @@ class ProfilingFileIoEvents {
public ProfilingFileIoEvents(@Nullable Configuration conf) {
if (conf != null) {
double fileIOSamplingFraction = conf.getDouble(
DFSConfigKeys.DFS_DATANODE_FILEIO_PROFILING_SAMPLING_FRACTION_KEY,
int fileIOSamplingPercentage = conf.getInt(
DFSConfigKeys.DFS_DATANODE_FILEIO_PROFILING_SAMPLING_PERCENTAGE_KEY,
DFSConfigKeys
.DFS_DATANODE_FILEIO_PROFILING_SAMPLING_FRACTION_DEFAULT);
isEnabled = Util.isDiskStatsEnabled(fileIOSamplingFraction);
if (fileIOSamplingFraction > 1) {
.DFS_DATANODE_FILEIO_PROFILING_SAMPLING_PERCENTAGE_DEFAULT);
isEnabled = Util.isDiskStatsEnabled(fileIOSamplingPercentage);
if (fileIOSamplingPercentage > 100) {
LOG.warn(DFSConfigKeys
.DFS_DATANODE_FILEIO_PROFILING_SAMPLING_FRACTION_KEY +
" value cannot be more than 1. Setting value to 1");
fileIOSamplingFraction = 1;
.DFS_DATANODE_FILEIO_PROFILING_SAMPLING_PERCENTAGE_KEY +
" value cannot be more than 100. Setting value to 100");
fileIOSamplingPercentage = 100;
}
sampleRangeMax = (int) (fileIOSamplingFraction * Integer.MAX_VALUE);
sampleRangeMax = (int) ((double) fileIOSamplingPercentage / 100 *
Integer.MAX_VALUE);
} else {
isEnabled = false;
sampleRangeMax = 0;

View File

@ -1942,6 +1942,17 @@
</description>
</property>
<property>
<name>dfs.datanode.fileio.profiling.sampling.percentage</name>
<value>0</value>
<description>
This setting controls the percentage of file I/O events which will be
profiled for DataNode disk statistics. The default value of 0 disables
disk statistics. Set to an integer value between 1 and 100 to enable disk
statistics.
</description>
</property>
<property>
<name>hadoop.user.group.metrics.percentiles.intervals</name>
<value></value>

View File

@ -28,7 +28,7 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.ImmutableMap;
import org.apache.hadoop.conf.Configuration;
import static org.apache.hadoop.hdfs.DFSConfigKeys
.DFS_DATANODE_FILEIO_PROFILING_SAMPLING_FRACTION_KEY;
.DFS_DATANODE_FILEIO_PROFILING_SAMPLING_PERCENTAGE_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys
.DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY;
@ -80,7 +80,7 @@ public class TestSlowDiskTracker {
static {
conf = new HdfsConfiguration();
conf.setLong(DFS_HEARTBEAT_INTERVAL_KEY, 1L);
conf.setDouble(DFS_DATANODE_FILEIO_PROFILING_SAMPLING_FRACTION_KEY, 1.0);
conf.setInt(DFS_DATANODE_FILEIO_PROFILING_SAMPLING_PERCENTAGE_KEY, 100);
conf.setTimeDuration(DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_KEY,
OUTLIERS_REPORT_INTERVAL, TimeUnit.MILLISECONDS);
}

View File

@ -212,8 +212,8 @@ public class TestDataNodeMXBean {
@Test
public void testDataNodeMXBeanSlowDisksEnabled() throws Exception {
Configuration conf = new Configuration();
conf.setDouble(DFSConfigKeys
.DFS_DATANODE_FILEIO_PROFILING_SAMPLING_FRACTION_KEY, 1.0);
conf.setInt(DFSConfigKeys
.DFS_DATANODE_FILEIO_PROFILING_SAMPLING_PERCENTAGE_KEY, 100);
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build();
try {

View File

@ -121,8 +121,8 @@ public class TestDataNodeVolumeMetrics {
private MiniDFSCluster setupClusterForVolumeMetrics() throws IOException {
Configuration conf = new HdfsConfiguration();
conf.setDouble(DFSConfigKeys
.DFS_DATANODE_FILEIO_PROFILING_SAMPLING_FRACTION_KEY, 1.0);
conf.setInt(DFSConfigKeys
.DFS_DATANODE_FILEIO_PROFILING_SAMPLING_PERCENTAGE_KEY, 100);
SimulatedFSDataset.setFactory(conf);
return new MiniDFSCluster.Builder(conf)
.numDataNodes(NUM_DATANODES)

View File

@ -105,8 +105,9 @@ public class TestNameNodeStatusMXBean {
@Test (timeout = 120000L)
public void testNameNodeMXBeanSlowDisksEnabled() throws Exception {
Configuration conf = new Configuration();
conf.setDouble(
DFSConfigKeys.DFS_DATANODE_FILEIO_PROFILING_SAMPLING_FRACTION_KEY, 1.0);
conf.setInt(
DFSConfigKeys.DFS_DATANODE_FILEIO_PROFILING_SAMPLING_PERCENTAGE_KEY,
100);
conf.setTimeDuration(
DFSConfigKeys.DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_KEY,
1000, TimeUnit.MILLISECONDS);

View File

@ -109,8 +109,6 @@ public class TestHdfsConfigFields extends TestConfigurationFieldsBase {
.add(DFSConfigKeys.DFS_NAMENODE_STARTUP_KEY);
configurationPropsToSkipCompare.add(DFSConfigKeys
.DFS_DATANODE_ENABLE_FILEIO_FAULT_INJECTION_KEY);
configurationPropsToSkipCompare.add(DFSConfigKeys
.DFS_DATANODE_FILEIO_PROFILING_SAMPLING_FRACTION_KEY);
// Allocate
xmlPropsToSkipCompare = new HashSet<String>();