HDFS-15854. Make some parameters configurable for SlowDiskTracker and SlowPeerTracker (#2718)

Authored-by: tomscut <litao@bigo.sg>
This commit is contained in:
litao 2021-03-01 23:52:59 +08:00 committed by GitHub
parent c3b3b36dee
commit 32353eb38a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 36 additions and 4 deletions

View File

@ -676,6 +676,10 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
"dfs.datanode.slowpeer.low.threshold.ms";
public static final long DFS_DATANODE_SLOWPEER_LOW_THRESHOLD_MS_DEFAULT =
5L;
public static final String DFS_DATANODE_MAX_NODES_TO_REPORT_KEY =
"dfs.datanode.max.nodes.to.report";
public static final int DFS_DATANODE_MAX_NODES_TO_REPORT_DEFAULT =
5;
public static final String DFS_DATANODE_MIN_OUTLIER_DETECTION_DISKS_KEY =
"dfs.datanode.min.outlier.detection.disks";
public static final long DFS_DATANODE_MIN_OUTLIER_DETECTION_DISKS_DEFAULT =
@ -684,6 +688,10 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
"dfs.datanode.slowdisk.low.threshold.ms";
public static final long DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_DEFAULT =
20L;
public static final String DFS_DATANODE_MAX_DISKS_TO_REPORT_KEY =
"dfs.datanode.max.disks.to.report";
public static final int DFS_DATANODE_MAX_DISKS_TO_REPORT_DEFAULT =
5;
public static final String DFS_DATANODE_HOST_NAME_KEY =
HdfsClientConfigKeys.DeprecatedKeys.DFS_DATANODE_HOST_NAME_KEY;
public static final String DFS_NAMENODE_CHECKPOINT_DIR_KEY =

View File

@ -77,7 +77,7 @@ public class SlowDiskTracker {
* Number of disks to include in JSON report per operation. We will return
* disks with the highest latency.
*/
private static final int MAX_DISKS_TO_REPORT = 5;
private final int maxDisksToReport;
private static final String DATANODE_DISK_SEPARATOR = ":";
private final long reportGenerationIntervalMs;
@ -107,6 +107,9 @@ public SlowDiskTracker(Configuration conf, Timer timer) {
DFSConfigKeys.DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_KEY,
DFSConfigKeys.DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_DEFAULT,
TimeUnit.MILLISECONDS);
this.maxDisksToReport = conf.getInt(
DFSConfigKeys.DFS_DATANODE_MAX_DISKS_TO_REPORT_KEY,
DFSConfigKeys.DFS_DATANODE_MAX_DISKS_TO_REPORT_DEFAULT);
this.reportValidityMs = reportGenerationIntervalMs * 3;
}
@ -153,7 +156,7 @@ public void updateSlowDiskReportAsync(long now) {
@Override
public void run() {
slowDisksReport = getSlowDisks(diskIDLatencyMap,
MAX_DISKS_TO_REPORT, now);
maxDisksToReport, now);
cleanUpOldReports(now);

View File

@ -79,7 +79,7 @@ public class SlowPeerTracker {
* Number of nodes to include in JSON report. We will return nodes with
* the highest number of votes from peers.
*/
private static final int MAX_NODES_TO_REPORT = 5;
private final int maxNodesToReport;
/**
* Information about peers that have reported a node as being slow.
@ -103,6 +103,9 @@ public SlowPeerTracker(Configuration conf, Timer timer) {
DFSConfigKeys.DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_KEY,
DFSConfigKeys.DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_DEFAULT,
TimeUnit.MILLISECONDS) * 3;
this.maxNodesToReport = conf.getInt(
DFSConfigKeys.DFS_DATANODE_MAX_NODES_TO_REPORT_KEY,
DFSConfigKeys.DFS_DATANODE_MAX_NODES_TO_REPORT_DEFAULT);
}
/**
@ -193,7 +196,7 @@ private SortedSet<String> filterNodeReports(
*/
public String getJson() {
Collection<ReportForJson> validReports = getJsonReports(
MAX_NODES_TO_REPORT);
maxNodesToReport);
try {
return WRITER.writeValueAsString(validReports);
} catch (JsonProcessingException e) {

View File

@ -2346,6 +2346,15 @@
</description>
</property>
<property>
<name>dfs.datanode.max.nodes.to.report</name>
<value>5</value>
<description>
Number of nodes to include in JSON report. We will return nodes with
the highest number of votes from peers.
</description>
</property>
<property>
<name>dfs.datanode.outliers.report.interval</name>
<value>30m</value>
@ -2386,6 +2395,15 @@
</description>
</property>
<property>
<name>dfs.datanode.max.disks.to.report</name>
<value>5</value>
<description>
Number of disks to include in JSON report per operation. We will return
disks with the highest latency.
</description>
</property>
<property>
<name>hadoop.user.group.metrics.percentiles.intervals</name>
<value></value>