HDFS-16678. RBF should supports disable getNodeUsage() in RBFMetrics (#4606)
This commit is contained in:
parent
521e65acfe
commit
e0c8c6eed4
|
@ -50,6 +50,7 @@ import javax.management.NotCompliantMBeanException;
|
|||
import javax.management.ObjectName;
|
||||
import javax.management.StandardMBean;
|
||||
|
||||
import org.apache.commons.math3.stat.descriptive.moment.StandardDeviation;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
||||
import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
|
||||
|
@ -113,6 +114,8 @@ public class RBFMetrics implements RouterMBean, FederationMBean {
|
|||
/** Prevent holding the page from load too long. */
|
||||
private final long timeOut;
|
||||
|
||||
/** Enable/Disable getNodeUsage. **/
|
||||
private boolean enableGetDNUsage;
|
||||
|
||||
/** Router interface. */
|
||||
private final Router router;
|
||||
|
@ -175,6 +178,8 @@ public class RBFMetrics implements RouterMBean, FederationMBean {
|
|||
Configuration conf = router.getConfig();
|
||||
this.timeOut = conf.getTimeDuration(RBFConfigKeys.DN_REPORT_TIME_OUT,
|
||||
RBFConfigKeys.DN_REPORT_TIME_OUT_MS_DEFAULT, TimeUnit.MILLISECONDS);
|
||||
this.enableGetDNUsage = conf.getBoolean(RBFConfigKeys.DFS_ROUTER_ENABLE_GET_DN_USAGE_KEY,
|
||||
RBFConfigKeys.DFS_ROUTER_ENABLE_GET_DN_USAGE_DEFAULT);
|
||||
this.topTokenRealOwners = conf.getInt(
|
||||
RBFConfigKeys.DFS_ROUTER_METRICS_TOP_NUM_TOKEN_OWNERS_KEY,
|
||||
RBFConfigKeys.DFS_ROUTER_METRICS_TOP_NUM_TOKEN_OWNERS_KEY_DEFAULT);
|
||||
|
@ -184,6 +189,11 @@ public class RBFMetrics implements RouterMBean, FederationMBean {
|
|||
ms.register(RBFMetrics.class.getName(), "RBFActivity Metrics", this);
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
public void setEnableGetDNUsage(boolean enableGetDNUsage) {
|
||||
this.enableGetDNUsage = enableGetDNUsage;
|
||||
}
|
||||
|
||||
/**
|
||||
* Unregister the JMX beans.
|
||||
*/
|
||||
|
@ -537,35 +547,34 @@ public class RBFMetrics implements RouterMBean, FederationMBean {
|
|||
|
||||
@Override // NameNodeMXBean
|
||||
public String getNodeUsage() {
|
||||
float median = 0;
|
||||
float max = 0;
|
||||
float min = 0;
|
||||
float dev = 0;
|
||||
double median = 0;
|
||||
double max = 0;
|
||||
double min = 0;
|
||||
double dev = 0;
|
||||
|
||||
final Map<String, Map<String, Object>> info = new HashMap<>();
|
||||
try {
|
||||
RouterRpcServer rpcServer = this.router.getRpcServer();
|
||||
DatanodeInfo[] live = rpcServer.getDatanodeReport(
|
||||
DatanodeReportType.LIVE, false, timeOut);
|
||||
DatanodeInfo[] live = null;
|
||||
if (this.enableGetDNUsage) {
|
||||
RouterRpcServer rpcServer = this.router.getRpcServer();
|
||||
live = rpcServer.getDatanodeReport(DatanodeReportType.LIVE, false, timeOut);
|
||||
} else {
|
||||
LOG.debug("Getting node usage is disabled.");
|
||||
}
|
||||
|
||||
if (live.length > 0) {
|
||||
float totalDfsUsed = 0;
|
||||
float[] usages = new float[live.length];
|
||||
if (live != null && live.length > 0) {
|
||||
double[] usages = new double[live.length];
|
||||
int i = 0;
|
||||
for (DatanodeInfo dn : live) {
|
||||
usages[i++] = dn.getDfsUsedPercent();
|
||||
totalDfsUsed += dn.getDfsUsedPercent();
|
||||
}
|
||||
totalDfsUsed /= live.length;
|
||||
Arrays.sort(usages);
|
||||
median = usages[usages.length / 2];
|
||||
max = usages[usages.length - 1];
|
||||
min = usages[0];
|
||||
|
||||
for (i = 0; i < usages.length; i++) {
|
||||
dev += (usages[i] - totalDfsUsed) * (usages[i] - totalDfsUsed);
|
||||
}
|
||||
dev = (float) Math.sqrt(dev / usages.length);
|
||||
StandardDeviation deviation = new StandardDeviation();
|
||||
dev = deviation.evaluate(usages);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
LOG.error("Cannot get the live nodes: {}", e.getMessage());
|
||||
|
|
|
@ -321,6 +321,9 @@ public class RBFConfigKeys extends CommonConfigurationKeysPublic {
|
|||
FEDERATION_ROUTER_PREFIX + "dn-report.cache-expire";
|
||||
public static final long DN_REPORT_CACHE_EXPIRE_MS_DEFAULT =
|
||||
TimeUnit.SECONDS.toMillis(10);
|
||||
public static final String DFS_ROUTER_ENABLE_GET_DN_USAGE_KEY =
|
||||
FEDERATION_ROUTER_PREFIX + "enable.get.dn.usage";
|
||||
public static final boolean DFS_ROUTER_ENABLE_GET_DN_USAGE_DEFAULT = true;
|
||||
|
||||
// HDFS Router-based federation quota
|
||||
public static final String DFS_ROUTER_QUOTA_ENABLE =
|
||||
|
|
|
@ -195,6 +195,16 @@
|
|||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.federation.router.enable.get.dn.usage</name>
|
||||
<value>true</value>
|
||||
<description>
|
||||
If true, the getNodeUsage method in RBFMetrics will return an up-to-date
|
||||
result collecting from downstream nameservices. But it will take a long
|
||||
time and take up thread resources. If false, it will return a mock result with all 0.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.federation.router.metrics.class</name>
|
||||
<value>org.apache.hadoop.hdfs.server.federation.metrics.FederationRPCPerformanceMonitor</value>
|
||||
|
|
|
@ -131,6 +131,7 @@ import org.apache.hadoop.security.UserGroupInformation;
|
|||
import org.apache.hadoop.service.Service.STATE;
|
||||
import org.apache.hadoop.test.GenericTestUtils;
|
||||
import org.apache.hadoop.test.LambdaTestUtils;
|
||||
import org.codehaus.jettison.json.JSONException;
|
||||
import org.codehaus.jettison.json.JSONObject;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.Before;
|
||||
|
@ -2181,4 +2182,34 @@ public class TestRouterRpc {
|
|||
routerDFS.delete(dirPath, true);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDisableNodeUsageInRBFMetrics() throws JSONException {
|
||||
RBFMetrics rbfMetrics = router.getRouter().getMetrics();
|
||||
FederationRPCMetrics federationRPCMetrics = router.getRouter().getRpcServer().getRPCMetrics();
|
||||
|
||||
long proxyOpBefore = federationRPCMetrics.getProxyOps();
|
||||
String nodeUsageEnable = router.getRouter().getMetrics().getNodeUsage();
|
||||
assertNotNull(nodeUsageEnable);
|
||||
long proxyOpAfterWithEnable = federationRPCMetrics.getProxyOps();
|
||||
assertEquals(proxyOpBefore + 2, proxyOpAfterWithEnable);
|
||||
|
||||
rbfMetrics.setEnableGetDNUsage(false);
|
||||
String nodeUsageDisable = rbfMetrics.getNodeUsage();
|
||||
assertNotNull(nodeUsageDisable);
|
||||
long proxyOpAfterWithDisable = federationRPCMetrics.getProxyOps();
|
||||
assertEquals(proxyOpAfterWithEnable, proxyOpAfterWithDisable);
|
||||
JSONObject jsonObject = new JSONObject(nodeUsageDisable);
|
||||
JSONObject json = jsonObject.getJSONObject("nodeUsage");
|
||||
assertEquals("0.00%", json.get("min"));
|
||||
assertEquals("0.00%", json.get("median"));
|
||||
assertEquals("0.00%", json.get("max"));
|
||||
assertEquals("0.00%", json.get("stdDev"));
|
||||
|
||||
rbfMetrics.setEnableGetDNUsage(true);
|
||||
String nodeUsageWithReEnable = rbfMetrics.getNodeUsage();
|
||||
assertNotNull(nodeUsageWithReEnable);
|
||||
long proxyOpAfterWithReEnable = federationRPCMetrics.getProxyOps();
|
||||
assertEquals(proxyOpAfterWithDisable + 2, proxyOpAfterWithReEnable);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue