YARN-3534. Collect memory/cpu usage on the node. (Inigo Goiri via kasha)
This commit is contained in:
parent
8dfec7a197
commit
def12933b3
|
@ -170,6 +170,8 @@ Release 2.8.0 - UNRELEASED
|
|||
YARN-4023. Publish Application Priority to TimelineServer. (Sunil G
|
||||
via rohithsharmaks)
|
||||
|
||||
YARN-3534. Collect memory/cpu usage on the node. (Inigo Goiri via kasha)
|
||||
|
||||
IMPROVEMENTS
|
||||
|
||||
YARN-644. Basic null check is not performed on passed in arguments before
|
||||
|
|
|
@ -961,11 +961,20 @@ public class YarnConfiguration extends Configuration {
|
|||
public static final String DEFAULT_NM_WEBAPP_HTTPS_ADDRESS = "0.0.0.0:"
|
||||
+ DEFAULT_NM_WEBAPP_HTTPS_PORT;
|
||||
|
||||
/** How often to monitor resource in a node.*/
|
||||
public static final String NM_RESOURCE_MON_INTERVAL_MS =
|
||||
NM_PREFIX + "resource-monitor.interval-ms";
|
||||
public static final int DEFAULT_NM_RESOURCE_MON_INTERVAL_MS = 3000;
|
||||
|
||||
/** How often to monitor containers.*/
|
||||
public final static String NM_CONTAINER_MON_INTERVAL_MS =
|
||||
NM_PREFIX + "container-monitor.interval-ms";
|
||||
@Deprecated
|
||||
public final static int DEFAULT_NM_CONTAINER_MON_INTERVAL_MS = 3000;
|
||||
|
||||
/** Class that calculates current resource utilization.*/
|
||||
public static final String NM_MON_RESOURCE_CALCULATOR =
|
||||
NM_PREFIX + "resource-calculator.class";
|
||||
/** Class that calculates containers current resource utilization.*/
|
||||
public static final String NM_CONTAINER_MON_RESOURCE_CALCULATOR =
|
||||
NM_PREFIX + "container-monitor.resource-calculator.class";
|
||||
|
|
|
@ -1235,13 +1235,26 @@
|
|||
</property>
|
||||
|
||||
<property>
|
||||
<description>How often to monitor containers.</description>
|
||||
<name>yarn.nodemanager.container-monitor.interval-ms</name>
|
||||
<description>How often to monitor the node and the containers.</description>
|
||||
<name>yarn.nodemanager.resource-monitor.interval-ms</name>
|
||||
<value>3000</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<description>Class that calculates containers current resource utilization.</description>
|
||||
<description>Class that calculates current resource utilization.</description>
|
||||
<name>yarn.nodemanager.resource-calculator.class</name>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<description>How often to monitor containers. If not set, the value for
|
||||
yarn.nodemanager.resource-monitor.interval-ms will be used.</description>
|
||||
<name>yarn.nodemanager.container-monitor.interval-ms</name>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<description>Class that calculates containers current resource utilization.
|
||||
If not set, the value for yarn.nodemanager.resource-calculator.class will
|
||||
be used.</description>
|
||||
<name>yarn.nodemanager.container-monitor.resource-calculator.class</name>
|
||||
</property>
|
||||
|
||||
|
|
|
@ -19,7 +19,15 @@
|
|||
package org.apache.hadoop.yarn.server.nodemanager;
|
||||
|
||||
import org.apache.hadoop.service.Service;
|
||||
import org.apache.hadoop.yarn.server.api.records.ResourceUtilization;
|
||||
|
||||
/**
|
||||
* Interface for monitoring the resources of a node.
|
||||
*/
|
||||
public interface NodeResourceMonitor extends Service {
|
||||
|
||||
/**
|
||||
* Get the <em>resource utilization</em> of the node.
|
||||
* @return <em>resource utilization</em> of the node.
|
||||
*/
|
||||
public ResourceUtilization getUtilization();
|
||||
}
|
||||
|
|
|
@ -18,13 +18,153 @@
|
|||
|
||||
package org.apache.hadoop.yarn.server.nodemanager;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.service.AbstractService;
|
||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||
import org.apache.hadoop.yarn.server.api.records.ResourceUtilization;
|
||||
import org.apache.hadoop.yarn.util.ResourceCalculatorPlugin;
|
||||
|
||||
/**
|
||||
* Implementation of the node resource monitor. It periodically tracks the
|
||||
* resource utilization of the node and reports it to the NM.
|
||||
*/
|
||||
public class NodeResourceMonitorImpl extends AbstractService implements
|
||||
NodeResourceMonitor {
|
||||
|
||||
/** Logging infrastructure. */
|
||||
final static Log LOG = LogFactory
|
||||
.getLog(NodeResourceMonitorImpl.class);
|
||||
|
||||
/** Interval to monitor the node resource utilization. */
|
||||
private long monitoringInterval;
|
||||
/** Thread to monitor the node resource utilization. */
|
||||
private MonitoringThread monitoringThread;
|
||||
|
||||
/** Resource calculator. */
|
||||
private ResourceCalculatorPlugin resourceCalculatorPlugin;
|
||||
|
||||
/** Current <em>resource utilization</em> of the node. */
|
||||
private ResourceUtilization nodeUtilization;
|
||||
|
||||
/**
|
||||
* Initialize the node resource monitor.
|
||||
*/
|
||||
public NodeResourceMonitorImpl() {
|
||||
super(NodeResourceMonitorImpl.class.getName());
|
||||
|
||||
this.monitoringThread = new MonitoringThread();
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize the service with the proper parameters.
|
||||
*/
|
||||
@Override
|
||||
protected void serviceInit(Configuration conf) throws Exception {
|
||||
this.monitoringInterval =
|
||||
conf.getLong(YarnConfiguration.NM_RESOURCE_MON_INTERVAL_MS,
|
||||
YarnConfiguration.DEFAULT_NM_RESOURCE_MON_INTERVAL_MS);
|
||||
|
||||
Class<? extends ResourceCalculatorPlugin> clazz =
|
||||
conf.getClass(YarnConfiguration.NM_MON_RESOURCE_CALCULATOR, null,
|
||||
ResourceCalculatorPlugin.class);
|
||||
|
||||
this.resourceCalculatorPlugin =
|
||||
ResourceCalculatorPlugin.getResourceCalculatorPlugin(clazz, conf);
|
||||
|
||||
LOG.info(" Using ResourceCalculatorPlugin : "
|
||||
+ this.resourceCalculatorPlugin);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if we should be monitoring.
|
||||
* @return <em>true</em> if we can monitor the node resource utilization.
|
||||
*/
|
||||
private boolean isEnabled() {
|
||||
if (resourceCalculatorPlugin == null) {
|
||||
LOG.info("ResourceCalculatorPlugin is unavailable on this system. "
|
||||
+ this.getClass().getName() + " is disabled.");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Start the thread that does the node resource utilization monitoring.
|
||||
*/
|
||||
@Override
|
||||
protected void serviceStart() throws Exception {
|
||||
if (this.isEnabled()) {
|
||||
this.monitoringThread.start();
|
||||
}
|
||||
super.serviceStart();
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop the thread that does the node resource utilization monitoring.
|
||||
*/
|
||||
@Override
|
||||
protected void serviceStop() throws Exception {
|
||||
if (this.isEnabled()) {
|
||||
this.monitoringThread.interrupt();
|
||||
try {
|
||||
this.monitoringThread.join(10 * 1000);
|
||||
} catch (InterruptedException e) {
|
||||
LOG.warn("Could not wait for the thread to join");
|
||||
}
|
||||
}
|
||||
super.serviceStop();
|
||||
}
|
||||
|
||||
/**
|
||||
* Thread that monitors the resource utilization of this node.
|
||||
*/
|
||||
private class MonitoringThread extends Thread {
|
||||
/**
|
||||
* Initialize the node resource monitoring thread.
|
||||
*/
|
||||
public MonitoringThread() {
|
||||
super("Node Resource Monitor");
|
||||
this.setDaemon(true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Periodically monitor the resource utilization of the node.
|
||||
*/
|
||||
@Override
|
||||
public void run() {
|
||||
while (true) {
|
||||
// Get node utilization and save it into the health status
|
||||
long pmem = resourceCalculatorPlugin.getPhysicalMemorySize() -
|
||||
resourceCalculatorPlugin.getAvailablePhysicalMemorySize();
|
||||
long vmem =
|
||||
resourceCalculatorPlugin.getVirtualMemorySize()
|
||||
- resourceCalculatorPlugin.getAvailableVirtualMemorySize();
|
||||
float cpu = resourceCalculatorPlugin.getCpuUsage();
|
||||
nodeUtilization =
|
||||
ResourceUtilization.newInstance(
|
||||
(int) (pmem >> 20), // B -> MB
|
||||
(int) (vmem >> 20), // B -> MB
|
||||
cpu); // 1 CPU at 100% is 1
|
||||
|
||||
try {
|
||||
Thread.sleep(monitoringInterval);
|
||||
} catch (InterruptedException e) {
|
||||
LOG.warn(NodeResourceMonitorImpl.class.getName()
|
||||
+ " is interrupted. Exiting.");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the <em>resource utilization</em> of the node.
|
||||
* @return <em>resource utilization</em> of the node.
|
||||
*/
|
||||
@Override
|
||||
public ResourceUtilization getUtilization() {
|
||||
return this.nodeUtilization;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -100,10 +100,14 @@ public class ContainersMonitorImpl extends AbstractService implements
|
|||
protected void serviceInit(Configuration conf) throws Exception {
|
||||
this.monitoringInterval =
|
||||
conf.getLong(YarnConfiguration.NM_CONTAINER_MON_INTERVAL_MS,
|
||||
YarnConfiguration.DEFAULT_NM_CONTAINER_MON_INTERVAL_MS);
|
||||
conf.getLong(YarnConfiguration.NM_RESOURCE_MON_INTERVAL_MS,
|
||||
YarnConfiguration.DEFAULT_NM_RESOURCE_MON_INTERVAL_MS));
|
||||
|
||||
Class<? extends ResourceCalculatorPlugin> clazz =
|
||||
conf.getClass(YarnConfiguration.NM_CONTAINER_MON_RESOURCE_CALCULATOR, null,
|
||||
conf.getClass(YarnConfiguration.NM_CONTAINER_MON_RESOURCE_CALCULATOR,
|
||||
conf.getClass(
|
||||
YarnConfiguration.NM_MON_RESOURCE_CALCULATOR, null,
|
||||
ResourceCalculatorPlugin.class),
|
||||
ResourceCalculatorPlugin.class);
|
||||
this.resourceCalculatorPlugin =
|
||||
ResourceCalculatorPlugin.getResourceCalculatorPlugin(clazz, conf);
|
||||
|
|
|
@ -0,0 +1,35 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.yarn.server.nodemanager;
|
||||
|
||||
import org.apache.hadoop.fs.UnsupportedFileSystemException;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.BaseContainerManagerTest;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestNodeResourceMonitor extends BaseContainerManagerTest {
|
||||
public TestNodeResourceMonitor() throws UnsupportedFileSystemException {
|
||||
super();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNodeResourceMonitor() {
|
||||
NodeResourceMonitor nrm = new NodeResourceMonitorImpl();
|
||||
}
|
||||
}
|
|
@ -125,7 +125,7 @@ public class TestContainerLaunch extends BaseContainerManagerTest {
|
|||
@Before
|
||||
public void setup() throws IOException {
|
||||
conf.setClass(
|
||||
YarnConfiguration.NM_CONTAINER_MON_RESOURCE_CALCULATOR,
|
||||
YarnConfiguration.NM_MON_RESOURCE_CALCULATOR,
|
||||
LinuxResourceCalculatorPlugin.class, ResourceCalculatorPlugin.class);
|
||||
super.setup();
|
||||
}
|
||||
|
|
|
@ -86,7 +86,7 @@ public class TestContainersMonitor extends BaseContainerManagerTest {
|
|||
@Before
|
||||
public void setup() throws IOException {
|
||||
conf.setClass(
|
||||
YarnConfiguration.NM_CONTAINER_MON_RESOURCE_CALCULATOR,
|
||||
YarnConfiguration.NM_MON_RESOURCE_CALCULATOR,
|
||||
LinuxResourceCalculatorPlugin.class, ResourceCalculatorPlugin.class);
|
||||
conf.setBoolean(YarnConfiguration.NM_VMEM_CHECK_ENABLED, true);
|
||||
super.setup();
|
||||
|
|
Loading…
Reference in New Issue