YARN-3534. Collect memory/cpu usage on the node. (Inigo Goiri via kasha)
This commit is contained in:
parent
8dfec7a197
commit
def12933b3
|
@ -170,6 +170,8 @@ Release 2.8.0 - UNRELEASED
|
||||||
YARN-4023. Publish Application Priority to TimelineServer. (Sunil G
|
YARN-4023. Publish Application Priority to TimelineServer. (Sunil G
|
||||||
via rohithsharmaks)
|
via rohithsharmaks)
|
||||||
|
|
||||||
|
YARN-3534. Collect memory/cpu usage on the node. (Inigo Goiri via kasha)
|
||||||
|
|
||||||
IMPROVEMENTS
|
IMPROVEMENTS
|
||||||
|
|
||||||
YARN-644. Basic null check is not performed on passed in arguments before
|
YARN-644. Basic null check is not performed on passed in arguments before
|
||||||
|
|
|
@ -961,11 +961,20 @@ public class YarnConfiguration extends Configuration {
|
||||||
public static final String DEFAULT_NM_WEBAPP_HTTPS_ADDRESS = "0.0.0.0:"
|
public static final String DEFAULT_NM_WEBAPP_HTTPS_ADDRESS = "0.0.0.0:"
|
||||||
+ DEFAULT_NM_WEBAPP_HTTPS_PORT;
|
+ DEFAULT_NM_WEBAPP_HTTPS_PORT;
|
||||||
|
|
||||||
|
/** How often to monitor resource in a node.*/
|
||||||
|
public static final String NM_RESOURCE_MON_INTERVAL_MS =
|
||||||
|
NM_PREFIX + "resource-monitor.interval-ms";
|
||||||
|
public static final int DEFAULT_NM_RESOURCE_MON_INTERVAL_MS = 3000;
|
||||||
|
|
||||||
/** How often to monitor containers.*/
|
/** How often to monitor containers.*/
|
||||||
public final static String NM_CONTAINER_MON_INTERVAL_MS =
|
public final static String NM_CONTAINER_MON_INTERVAL_MS =
|
||||||
NM_PREFIX + "container-monitor.interval-ms";
|
NM_PREFIX + "container-monitor.interval-ms";
|
||||||
|
@Deprecated
|
||||||
public final static int DEFAULT_NM_CONTAINER_MON_INTERVAL_MS = 3000;
|
public final static int DEFAULT_NM_CONTAINER_MON_INTERVAL_MS = 3000;
|
||||||
|
|
||||||
|
/** Class that calculates current resource utilization.*/
|
||||||
|
public static final String NM_MON_RESOURCE_CALCULATOR =
|
||||||
|
NM_PREFIX + "resource-calculator.class";
|
||||||
/** Class that calculates containers current resource utilization.*/
|
/** Class that calculates containers current resource utilization.*/
|
||||||
public static final String NM_CONTAINER_MON_RESOURCE_CALCULATOR =
|
public static final String NM_CONTAINER_MON_RESOURCE_CALCULATOR =
|
||||||
NM_PREFIX + "container-monitor.resource-calculator.class";
|
NM_PREFIX + "container-monitor.resource-calculator.class";
|
||||||
|
|
|
@ -1235,13 +1235,26 @@
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<description>How often to monitor containers.</description>
|
<description>How often to monitor the node and the containers.</description>
|
||||||
<name>yarn.nodemanager.container-monitor.interval-ms</name>
|
<name>yarn.nodemanager.resource-monitor.interval-ms</name>
|
||||||
<value>3000</value>
|
<value>3000</value>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<description>Class that calculates containers current resource utilization.</description>
|
<description>Class that calculates current resource utilization.</description>
|
||||||
|
<name>yarn.nodemanager.resource-calculator.class</name>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<description>How often to monitor containers. If not set, the value for
|
||||||
|
yarn.nodemanager.resource-monitor.interval-ms will be used.</description>
|
||||||
|
<name>yarn.nodemanager.container-monitor.interval-ms</name>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<description>Class that calculates containers current resource utilization.
|
||||||
|
If not set, the value for yarn.nodemanager.resource-calculator.class will
|
||||||
|
be used.</description>
|
||||||
<name>yarn.nodemanager.container-monitor.resource-calculator.class</name>
|
<name>yarn.nodemanager.container-monitor.resource-calculator.class</name>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
|
|
|
@ -19,7 +19,15 @@
|
||||||
package org.apache.hadoop.yarn.server.nodemanager;
|
package org.apache.hadoop.yarn.server.nodemanager;
|
||||||
|
|
||||||
import org.apache.hadoop.service.Service;
|
import org.apache.hadoop.service.Service;
|
||||||
|
import org.apache.hadoop.yarn.server.api.records.ResourceUtilization;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Interface for monitoring the resources of a node.
|
||||||
|
*/
|
||||||
public interface NodeResourceMonitor extends Service {
|
public interface NodeResourceMonitor extends Service {
|
||||||
|
/**
|
||||||
|
* Get the <em>resource utilization</em> of the node.
|
||||||
|
* @return <em>resource utilization</em> of the node.
|
||||||
|
*/
|
||||||
|
public ResourceUtilization getUtilization();
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,13 +18,153 @@
|
||||||
|
|
||||||
package org.apache.hadoop.yarn.server.nodemanager;
|
package org.apache.hadoop.yarn.server.nodemanager;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.service.AbstractService;
|
import org.apache.hadoop.service.AbstractService;
|
||||||
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
|
import org.apache.hadoop.yarn.server.api.records.ResourceUtilization;
|
||||||
|
import org.apache.hadoop.yarn.util.ResourceCalculatorPlugin;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Implementation of the node resource monitor. It periodically tracks the
|
||||||
|
* resource utilization of the node and reports it to the NM.
|
||||||
|
*/
|
||||||
public class NodeResourceMonitorImpl extends AbstractService implements
|
public class NodeResourceMonitorImpl extends AbstractService implements
|
||||||
NodeResourceMonitor {
|
NodeResourceMonitor {
|
||||||
|
|
||||||
|
/** Logging infrastructure. */
|
||||||
|
final static Log LOG = LogFactory
|
||||||
|
.getLog(NodeResourceMonitorImpl.class);
|
||||||
|
|
||||||
|
/** Interval to monitor the node resource utilization. */
|
||||||
|
private long monitoringInterval;
|
||||||
|
/** Thread to monitor the node resource utilization. */
|
||||||
|
private MonitoringThread monitoringThread;
|
||||||
|
|
||||||
|
/** Resource calculator. */
|
||||||
|
private ResourceCalculatorPlugin resourceCalculatorPlugin;
|
||||||
|
|
||||||
|
/** Current <em>resource utilization</em> of the node. */
|
||||||
|
private ResourceUtilization nodeUtilization;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize the node resource monitor.
|
||||||
|
*/
|
||||||
public NodeResourceMonitorImpl() {
|
public NodeResourceMonitorImpl() {
|
||||||
super(NodeResourceMonitorImpl.class.getName());
|
super(NodeResourceMonitorImpl.class.getName());
|
||||||
|
|
||||||
|
this.monitoringThread = new MonitoringThread();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize the service with the proper parameters.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
protected void serviceInit(Configuration conf) throws Exception {
|
||||||
|
this.monitoringInterval =
|
||||||
|
conf.getLong(YarnConfiguration.NM_RESOURCE_MON_INTERVAL_MS,
|
||||||
|
YarnConfiguration.DEFAULT_NM_RESOURCE_MON_INTERVAL_MS);
|
||||||
|
|
||||||
|
Class<? extends ResourceCalculatorPlugin> clazz =
|
||||||
|
conf.getClass(YarnConfiguration.NM_MON_RESOURCE_CALCULATOR, null,
|
||||||
|
ResourceCalculatorPlugin.class);
|
||||||
|
|
||||||
|
this.resourceCalculatorPlugin =
|
||||||
|
ResourceCalculatorPlugin.getResourceCalculatorPlugin(clazz, conf);
|
||||||
|
|
||||||
|
LOG.info(" Using ResourceCalculatorPlugin : "
|
||||||
|
+ this.resourceCalculatorPlugin);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if we should be monitoring.
|
||||||
|
* @return <em>true</em> if we can monitor the node resource utilization.
|
||||||
|
*/
|
||||||
|
private boolean isEnabled() {
|
||||||
|
if (resourceCalculatorPlugin == null) {
|
||||||
|
LOG.info("ResourceCalculatorPlugin is unavailable on this system. "
|
||||||
|
+ this.getClass().getName() + " is disabled.");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Start the thread that does the node resource utilization monitoring.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
protected void serviceStart() throws Exception {
|
||||||
|
if (this.isEnabled()) {
|
||||||
|
this.monitoringThread.start();
|
||||||
|
}
|
||||||
|
super.serviceStart();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Stop the thread that does the node resource utilization monitoring.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
protected void serviceStop() throws Exception {
|
||||||
|
if (this.isEnabled()) {
|
||||||
|
this.monitoringThread.interrupt();
|
||||||
|
try {
|
||||||
|
this.monitoringThread.join(10 * 1000);
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
LOG.warn("Could not wait for the thread to join");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
super.serviceStop();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Thread that monitors the resource utilization of this node.
|
||||||
|
*/
|
||||||
|
private class MonitoringThread extends Thread {
|
||||||
|
/**
|
||||||
|
* Initialize the node resource monitoring thread.
|
||||||
|
*/
|
||||||
|
public MonitoringThread() {
|
||||||
|
super("Node Resource Monitor");
|
||||||
|
this.setDaemon(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Periodically monitor the resource utilization of the node.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void run() {
|
||||||
|
while (true) {
|
||||||
|
// Get node utilization and save it into the health status
|
||||||
|
long pmem = resourceCalculatorPlugin.getPhysicalMemorySize() -
|
||||||
|
resourceCalculatorPlugin.getAvailablePhysicalMemorySize();
|
||||||
|
long vmem =
|
||||||
|
resourceCalculatorPlugin.getVirtualMemorySize()
|
||||||
|
- resourceCalculatorPlugin.getAvailableVirtualMemorySize();
|
||||||
|
float cpu = resourceCalculatorPlugin.getCpuUsage();
|
||||||
|
nodeUtilization =
|
||||||
|
ResourceUtilization.newInstance(
|
||||||
|
(int) (pmem >> 20), // B -> MB
|
||||||
|
(int) (vmem >> 20), // B -> MB
|
||||||
|
cpu); // 1 CPU at 100% is 1
|
||||||
|
|
||||||
|
try {
|
||||||
|
Thread.sleep(monitoringInterval);
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
LOG.warn(NodeResourceMonitorImpl.class.getName()
|
||||||
|
+ " is interrupted. Exiting.");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the <em>resource utilization</em> of the node.
|
||||||
|
* @return <em>resource utilization</em> of the node.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public ResourceUtilization getUtilization() {
|
||||||
|
return this.nodeUtilization;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -100,10 +100,14 @@ public class ContainersMonitorImpl extends AbstractService implements
|
||||||
protected void serviceInit(Configuration conf) throws Exception {
|
protected void serviceInit(Configuration conf) throws Exception {
|
||||||
this.monitoringInterval =
|
this.monitoringInterval =
|
||||||
conf.getLong(YarnConfiguration.NM_CONTAINER_MON_INTERVAL_MS,
|
conf.getLong(YarnConfiguration.NM_CONTAINER_MON_INTERVAL_MS,
|
||||||
YarnConfiguration.DEFAULT_NM_CONTAINER_MON_INTERVAL_MS);
|
conf.getLong(YarnConfiguration.NM_RESOURCE_MON_INTERVAL_MS,
|
||||||
|
YarnConfiguration.DEFAULT_NM_RESOURCE_MON_INTERVAL_MS));
|
||||||
|
|
||||||
Class<? extends ResourceCalculatorPlugin> clazz =
|
Class<? extends ResourceCalculatorPlugin> clazz =
|
||||||
conf.getClass(YarnConfiguration.NM_CONTAINER_MON_RESOURCE_CALCULATOR, null,
|
conf.getClass(YarnConfiguration.NM_CONTAINER_MON_RESOURCE_CALCULATOR,
|
||||||
|
conf.getClass(
|
||||||
|
YarnConfiguration.NM_MON_RESOURCE_CALCULATOR, null,
|
||||||
|
ResourceCalculatorPlugin.class),
|
||||||
ResourceCalculatorPlugin.class);
|
ResourceCalculatorPlugin.class);
|
||||||
this.resourceCalculatorPlugin =
|
this.resourceCalculatorPlugin =
|
||||||
ResourceCalculatorPlugin.getResourceCalculatorPlugin(clazz, conf);
|
ResourceCalculatorPlugin.getResourceCalculatorPlugin(clazz, conf);
|
||||||
|
|
|
@ -0,0 +1,35 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.yarn.server.nodemanager;
|
||||||
|
|
||||||
|
import org.apache.hadoop.fs.UnsupportedFileSystemException;
|
||||||
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.BaseContainerManagerTest;
|
||||||
|
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
public class TestNodeResourceMonitor extends BaseContainerManagerTest {
|
||||||
|
public TestNodeResourceMonitor() throws UnsupportedFileSystemException {
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNodeResourceMonitor() {
|
||||||
|
NodeResourceMonitor nrm = new NodeResourceMonitorImpl();
|
||||||
|
}
|
||||||
|
}
|
|
@ -125,7 +125,7 @@ public class TestContainerLaunch extends BaseContainerManagerTest {
|
||||||
@Before
|
@Before
|
||||||
public void setup() throws IOException {
|
public void setup() throws IOException {
|
||||||
conf.setClass(
|
conf.setClass(
|
||||||
YarnConfiguration.NM_CONTAINER_MON_RESOURCE_CALCULATOR,
|
YarnConfiguration.NM_MON_RESOURCE_CALCULATOR,
|
||||||
LinuxResourceCalculatorPlugin.class, ResourceCalculatorPlugin.class);
|
LinuxResourceCalculatorPlugin.class, ResourceCalculatorPlugin.class);
|
||||||
super.setup();
|
super.setup();
|
||||||
}
|
}
|
||||||
|
|
|
@ -86,7 +86,7 @@ public class TestContainersMonitor extends BaseContainerManagerTest {
|
||||||
@Before
|
@Before
|
||||||
public void setup() throws IOException {
|
public void setup() throws IOException {
|
||||||
conf.setClass(
|
conf.setClass(
|
||||||
YarnConfiguration.NM_CONTAINER_MON_RESOURCE_CALCULATOR,
|
YarnConfiguration.NM_MON_RESOURCE_CALCULATOR,
|
||||||
LinuxResourceCalculatorPlugin.class, ResourceCalculatorPlugin.class);
|
LinuxResourceCalculatorPlugin.class, ResourceCalculatorPlugin.class);
|
||||||
conf.setBoolean(YarnConfiguration.NM_VMEM_CHECK_ENABLED, true);
|
conf.setBoolean(YarnConfiguration.NM_VMEM_CHECK_ENABLED, true);
|
||||||
super.setup();
|
super.setup();
|
||||||
|
|
Loading…
Reference in New Issue