YARN-4330. MiniYARNCluster is showing multiple Failed to instantiate default resource calculator warning messages. Contributed by Varun Saxena
(cherry picked from commit 3541ed8068
)
This commit is contained in:
parent
b96ed43897
commit
2d94f938c5
|
@ -190,6 +190,9 @@ public class ResourceCalculatorPlugin extends Configured {
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
return new ResourceCalculatorPlugin();
|
return new ResourceCalculatorPlugin();
|
||||||
|
} catch (UnsupportedOperationException ue) {
|
||||||
|
LOG.warn("Failed to instantiate default resource calculator. "
|
||||||
|
+ ue.getMessage());
|
||||||
} catch (Throwable t) {
|
} catch (Throwable t) {
|
||||||
LOG.warn(t + ": Failed to instantiate default resource calculator.", t);
|
LOG.warn(t + ": Failed to instantiate default resource calculator.", t);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1310,7 +1310,8 @@
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<description>How often to monitor the node and the containers.</description>
|
<description>How often to monitor the node and the containers.
|
||||||
|
If 0 or negative, monitoring is disabled.</description>
|
||||||
<name>yarn.nodemanager.resource-monitor.interval-ms</name>
|
<name>yarn.nodemanager.resource-monitor.interval-ms</name>
|
||||||
<value>3000</value>
|
<value>3000</value>
|
||||||
</property>
|
</property>
|
||||||
|
@ -1328,7 +1329,8 @@
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<description>How often to monitor containers. If not set, the value for
|
<description>How often to monitor containers. If not set, the value for
|
||||||
yarn.nodemanager.resource-monitor.interval-ms will be used.</description>
|
yarn.nodemanager.resource-monitor.interval-ms will be used.
|
||||||
|
If 0 or negative, container monitoring is disabled.</description>
|
||||||
<name>yarn.nodemanager.container-monitor.interval-ms</name>
|
<name>yarn.nodemanager.container-monitor.interval-ms</name>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
|
|
|
@ -78,6 +78,11 @@ public class NodeResourceMonitorImpl extends AbstractService implements
|
||||||
* @return <em>true</em> if we can monitor the node resource utilization.
|
* @return <em>true</em> if we can monitor the node resource utilization.
|
||||||
*/
|
*/
|
||||||
private boolean isEnabled() {
|
private boolean isEnabled() {
|
||||||
|
if (this.monitoringInterval <= 0) {
|
||||||
|
LOG.info("Node Resource monitoring interval is <=0. "
|
||||||
|
+ this.getClass().getName() + " is disabled.");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
if (resourceCalculatorPlugin == null) {
|
if (resourceCalculatorPlugin == null) {
|
||||||
LOG.info("ResourceCalculatorPlugin is unavailable on this system. "
|
LOG.info("ResourceCalculatorPlugin is unavailable on this system. "
|
||||||
+ this.getClass().getName() + " is disabled.");
|
+ this.getClass().getName() + " is disabled.");
|
||||||
|
|
|
@ -157,7 +157,8 @@ public class ContainersMonitorImpl extends AbstractService implements
|
||||||
LOG.info("Physical memory check enabled: " + pmemCheckEnabled);
|
LOG.info("Physical memory check enabled: " + pmemCheckEnabled);
|
||||||
LOG.info("Virtual memory check enabled: " + vmemCheckEnabled);
|
LOG.info("Virtual memory check enabled: " + vmemCheckEnabled);
|
||||||
|
|
||||||
containersMonitorEnabled = isContainerMonitorEnabled();
|
containersMonitorEnabled =
|
||||||
|
isContainerMonitorEnabled() && monitoringInterval > 0;
|
||||||
LOG.info("ContainersMonitor enabled: " + containersMonitorEnabled);
|
LOG.info("ContainersMonitor enabled: " + containersMonitorEnabled);
|
||||||
|
|
||||||
nodeCpuPercentageForYARN =
|
nodeCpuPercentageForYARN =
|
||||||
|
|
|
@ -37,6 +37,12 @@ public class NodeManagerHardwareUtils {
|
||||||
private static final Log LOG = LogFactory
|
private static final Log LOG = LogFactory
|
||||||
.getLog(NodeManagerHardwareUtils.class);
|
.getLog(NodeManagerHardwareUtils.class);
|
||||||
|
|
||||||
|
private static boolean isHardwareDetectionEnabled(Configuration conf) {
|
||||||
|
return conf.getBoolean(
|
||||||
|
YarnConfiguration.NM_ENABLE_HARDWARE_CAPABILITY_DETECTION,
|
||||||
|
YarnConfiguration.DEFAULT_NM_ENABLE_HARDWARE_CAPABILITY_DETECTION);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
* Returns the number of CPUs on the node. This value depends on the
|
* Returns the number of CPUs on the node. This value depends on the
|
||||||
|
@ -138,6 +144,15 @@ public class NodeManagerHardwareUtils {
|
||||||
return nodeCpuPercentage;
|
return nodeCpuPercentage;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static int getConfiguredVCores(Configuration conf) {
|
||||||
|
int cores = conf.getInt(YarnConfiguration.NM_VCORES,
|
||||||
|
YarnConfiguration.DEFAULT_NM_VCORES);
|
||||||
|
if (cores == -1) {
|
||||||
|
cores = YarnConfiguration.DEFAULT_NM_VCORES;
|
||||||
|
}
|
||||||
|
return cores;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Function to return the number of vcores on the system that can be used for
|
* Function to return the number of vcores on the system that can be used for
|
||||||
* YARN containers. If a number is specified in the configuration file, then
|
* YARN containers. If a number is specified in the configuration file, then
|
||||||
|
@ -154,11 +169,16 @@ public class NodeManagerHardwareUtils {
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public static int getVCores(Configuration conf) {
|
public static int getVCores(Configuration conf) {
|
||||||
|
if (!isHardwareDetectionEnabled(conf)) {
|
||||||
|
return getConfiguredVCores(conf);
|
||||||
|
}
|
||||||
// is this os for which we can determine cores?
|
// is this os for which we can determine cores?
|
||||||
ResourceCalculatorPlugin plugin =
|
ResourceCalculatorPlugin plugin =
|
||||||
ResourceCalculatorPlugin.getResourceCalculatorPlugin(null, conf);
|
ResourceCalculatorPlugin.getResourceCalculatorPlugin(null, conf);
|
||||||
|
if (plugin == null) {
|
||||||
return NodeManagerHardwareUtils.getVCores(plugin, conf);
|
return getConfiguredVCores(conf);
|
||||||
|
}
|
||||||
|
return getVCoresInternal(plugin, conf);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -180,43 +200,35 @@ public class NodeManagerHardwareUtils {
|
||||||
*/
|
*/
|
||||||
public static int getVCores(ResourceCalculatorPlugin plugin,
|
public static int getVCores(ResourceCalculatorPlugin plugin,
|
||||||
Configuration conf) {
|
Configuration conf) {
|
||||||
|
if (!isHardwareDetectionEnabled(conf) || plugin == null) {
|
||||||
|
return getConfiguredVCores(conf);
|
||||||
|
}
|
||||||
|
return getVCoresInternal(plugin, conf);
|
||||||
|
}
|
||||||
|
|
||||||
int cores;
|
private static int getVCoresInternal(ResourceCalculatorPlugin plugin,
|
||||||
boolean hardwareDetectionEnabled =
|
Configuration conf) {
|
||||||
conf.getBoolean(
|
|
||||||
YarnConfiguration.NM_ENABLE_HARDWARE_CAPABILITY_DETECTION,
|
|
||||||
YarnConfiguration.DEFAULT_NM_ENABLE_HARDWARE_CAPABILITY_DETECTION);
|
|
||||||
|
|
||||||
String message;
|
String message;
|
||||||
if (!hardwareDetectionEnabled || plugin == null) {
|
int cores = conf.getInt(YarnConfiguration.NM_VCORES, -1);
|
||||||
cores =
|
if (cores == -1) {
|
||||||
conf.getInt(YarnConfiguration.NM_VCORES,
|
float physicalCores =
|
||||||
YarnConfiguration.DEFAULT_NM_VCORES);
|
NodeManagerHardwareUtils.getContainersCPUs(plugin, conf);
|
||||||
if (cores == -1) {
|
float multiplier =
|
||||||
cores = YarnConfiguration.DEFAULT_NM_VCORES;
|
conf.getFloat(YarnConfiguration.NM_PCORES_VCORES_MULTIPLIER,
|
||||||
}
|
YarnConfiguration.DEFAULT_NM_PCORES_VCORES_MULTIPLIER);
|
||||||
} else {
|
if (multiplier > 0) {
|
||||||
cores = conf.getInt(YarnConfiguration.NM_VCORES, -1);
|
float tmp = physicalCores * multiplier;
|
||||||
if (cores == -1) {
|
if (tmp > 0 && tmp < 1) {
|
||||||
float physicalCores =
|
// on a single core machine - tmp can be between 0 and 1
|
||||||
NodeManagerHardwareUtils.getContainersCPUs(plugin, conf);
|
cores = 1;
|
||||||
float multiplier =
|
|
||||||
conf.getFloat(YarnConfiguration.NM_PCORES_VCORES_MULTIPLIER,
|
|
||||||
YarnConfiguration.DEFAULT_NM_PCORES_VCORES_MULTIPLIER);
|
|
||||||
if (multiplier > 0) {
|
|
||||||
float tmp = physicalCores * multiplier;
|
|
||||||
if (tmp > 0 && tmp < 1) {
|
|
||||||
// on a single core machine - tmp can be between 0 and 1
|
|
||||||
cores = 1;
|
|
||||||
} else {
|
|
||||||
cores = (int) tmp;
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
message = "Illegal value for "
|
cores = (int) tmp;
|
||||||
+ YarnConfiguration.NM_PCORES_VCORES_MULTIPLIER
|
|
||||||
+ ". Value must be greater than 0.";
|
|
||||||
throw new IllegalArgumentException(message);
|
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
message = "Illegal value for "
|
||||||
|
+ YarnConfiguration.NM_PCORES_VCORES_MULTIPLIER
|
||||||
|
+ ". Value must be greater than 0.";
|
||||||
|
throw new IllegalArgumentException(message);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if(cores <= 0) {
|
if(cores <= 0) {
|
||||||
|
@ -228,6 +240,15 @@ public class NodeManagerHardwareUtils {
|
||||||
return cores;
|
return cores;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static int getConfiguredMemoryMB(Configuration conf) {
|
||||||
|
int memoryMb = conf.getInt(YarnConfiguration.NM_PMEM_MB,
|
||||||
|
YarnConfiguration.DEFAULT_NM_PMEM_MB);
|
||||||
|
if (memoryMb == -1) {
|
||||||
|
memoryMb = YarnConfiguration.DEFAULT_NM_PMEM_MB;
|
||||||
|
}
|
||||||
|
return memoryMb;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Function to return how much memory we should set aside for YARN containers.
|
* Function to return how much memory we should set aside for YARN containers.
|
||||||
* If a number is specified in the configuration file, then that number is
|
* If a number is specified in the configuration file, then that number is
|
||||||
|
@ -244,8 +265,15 @@ public class NodeManagerHardwareUtils {
|
||||||
* @return the amount of memory that will be used for YARN containers in MB.
|
* @return the amount of memory that will be used for YARN containers in MB.
|
||||||
*/
|
*/
|
||||||
public static int getContainerMemoryMB(Configuration conf) {
|
public static int getContainerMemoryMB(Configuration conf) {
|
||||||
return NodeManagerHardwareUtils.getContainerMemoryMB(
|
if (!isHardwareDetectionEnabled(conf)) {
|
||||||
ResourceCalculatorPlugin.getResourceCalculatorPlugin(null, conf), conf);
|
return getConfiguredMemoryMB(conf);
|
||||||
|
}
|
||||||
|
ResourceCalculatorPlugin plugin =
|
||||||
|
ResourceCalculatorPlugin.getResourceCalculatorPlugin(null, conf);
|
||||||
|
if (plugin == null) {
|
||||||
|
return getConfiguredMemoryMB(conf);
|
||||||
|
}
|
||||||
|
return getContainerMemoryMBInternal(plugin, conf);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -267,41 +295,35 @@ public class NodeManagerHardwareUtils {
|
||||||
*/
|
*/
|
||||||
public static int getContainerMemoryMB(ResourceCalculatorPlugin plugin,
|
public static int getContainerMemoryMB(ResourceCalculatorPlugin plugin,
|
||||||
Configuration conf) {
|
Configuration conf) {
|
||||||
|
if (!isHardwareDetectionEnabled(conf) || plugin == null) {
|
||||||
|
return getConfiguredMemoryMB(conf);
|
||||||
|
}
|
||||||
|
return getContainerMemoryMBInternal(plugin, conf);
|
||||||
|
}
|
||||||
|
|
||||||
int memoryMb;
|
private static int getContainerMemoryMBInternal(ResourceCalculatorPlugin plugin,
|
||||||
boolean hardwareDetectionEnabled = conf.getBoolean(
|
Configuration conf) {
|
||||||
YarnConfiguration.NM_ENABLE_HARDWARE_CAPABILITY_DETECTION,
|
int memoryMb = conf.getInt(YarnConfiguration.NM_PMEM_MB, -1);
|
||||||
YarnConfiguration.DEFAULT_NM_ENABLE_HARDWARE_CAPABILITY_DETECTION);
|
if (memoryMb == -1) {
|
||||||
|
int physicalMemoryMB =
|
||||||
if (!hardwareDetectionEnabled || plugin == null) {
|
(int) (plugin.getPhysicalMemorySize() / (1024 * 1024));
|
||||||
memoryMb = conf.getInt(YarnConfiguration.NM_PMEM_MB,
|
int hadoopHeapSizeMB =
|
||||||
YarnConfiguration.DEFAULT_NM_PMEM_MB);
|
(int) (Runtime.getRuntime().maxMemory() / (1024 * 1024));
|
||||||
if (memoryMb == -1) {
|
int containerPhysicalMemoryMB =
|
||||||
memoryMb = YarnConfiguration.DEFAULT_NM_PMEM_MB;
|
(int) (0.8f * (physicalMemoryMB - (2 * hadoopHeapSizeMB)));
|
||||||
|
int reservedMemoryMB =
|
||||||
|
conf.getInt(YarnConfiguration.NM_SYSTEM_RESERVED_PMEM_MB, -1);
|
||||||
|
if (reservedMemoryMB != -1) {
|
||||||
|
containerPhysicalMemoryMB = physicalMemoryMB - reservedMemoryMB;
|
||||||
}
|
}
|
||||||
} else {
|
if(containerPhysicalMemoryMB <= 0) {
|
||||||
memoryMb = conf.getInt(YarnConfiguration.NM_PMEM_MB, -1);
|
LOG.error("Calculated memory for YARN containers is too low."
|
||||||
if (memoryMb == -1) {
|
+ " Node memory is " + physicalMemoryMB
|
||||||
int physicalMemoryMB =
|
+ " MB, system reserved memory is "
|
||||||
(int) (plugin.getPhysicalMemorySize() / (1024 * 1024));
|
+ reservedMemoryMB + " MB.");
|
||||||
int hadoopHeapSizeMB =
|
|
||||||
(int) (Runtime.getRuntime().maxMemory() / (1024 * 1024));
|
|
||||||
int containerPhysicalMemoryMB =
|
|
||||||
(int) (0.8f * (physicalMemoryMB - (2 * hadoopHeapSizeMB)));
|
|
||||||
int reservedMemoryMB =
|
|
||||||
conf.getInt(YarnConfiguration.NM_SYSTEM_RESERVED_PMEM_MB, -1);
|
|
||||||
if (reservedMemoryMB != -1) {
|
|
||||||
containerPhysicalMemoryMB = physicalMemoryMB - reservedMemoryMB;
|
|
||||||
}
|
|
||||||
if(containerPhysicalMemoryMB <= 0) {
|
|
||||||
LOG.error("Calculated memory for YARN containers is too low."
|
|
||||||
+ " Node memory is " + physicalMemoryMB
|
|
||||||
+ " MB, system reserved memory is "
|
|
||||||
+ reservedMemoryMB + " MB.");
|
|
||||||
}
|
|
||||||
containerPhysicalMemoryMB = Math.max(containerPhysicalMemoryMB, 0);
|
|
||||||
memoryMb = containerPhysicalMemoryMB;
|
|
||||||
}
|
}
|
||||||
|
containerPhysicalMemoryMB = Math.max(containerPhysicalMemoryMB, 0);
|
||||||
|
memoryMb = containerPhysicalMemoryMB;
|
||||||
}
|
}
|
||||||
if(memoryMb <= 0) {
|
if(memoryMb <= 0) {
|
||||||
String message = "Illegal value for " + YarnConfiguration.NM_PMEM_MB
|
String message = "Illegal value for " + YarnConfiguration.NM_PMEM_MB
|
||||||
|
|
|
@ -548,13 +548,16 @@ public class MiniYARNCluster extends CompositeService {
|
||||||
.setNMWebAppHostNameAndPort(config,
|
.setNMWebAppHostNameAndPort(config,
|
||||||
MiniYARNCluster.getHostname(), 0);
|
MiniYARNCluster.getHostname(), 0);
|
||||||
|
|
||||||
|
config.setBoolean(
|
||||||
|
YarnConfiguration.NM_ENABLE_HARDWARE_CAPABILITY_DETECTION, false);
|
||||||
// Disable resource checks by default
|
// Disable resource checks by default
|
||||||
if (!config.getBoolean(
|
if (!config.getBoolean(
|
||||||
YarnConfiguration.YARN_MINICLUSTER_CONTROL_RESOURCE_MONITORING,
|
YarnConfiguration.YARN_MINICLUSTER_CONTROL_RESOURCE_MONITORING,
|
||||||
YarnConfiguration.
|
YarnConfiguration.
|
||||||
DEFAULT_YARN_MINICLUSTER_CONTROL_RESOURCE_MONITORING)) {
|
DEFAULT_YARN_MINICLUSTER_CONTROL_RESOURCE_MONITORING)) {
|
||||||
config.setBoolean(YarnConfiguration.NM_PMEM_CHECK_ENABLED, false);
|
config.setBoolean(
|
||||||
config.setBoolean(YarnConfiguration.NM_VMEM_CHECK_ENABLED, false);
|
YarnConfiguration.NM_CONTAINER_MONITOR_ENABLED, false);
|
||||||
|
config.setLong(YarnConfiguration.NM_RESOURCE_MON_INTERVAL_MS, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG.info("Starting NM: " + index);
|
LOG.info("Starting NM: " + index);
|
||||||
|
|
Loading…
Reference in New Issue