diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index c4513208aa1..287a9134be5 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -170,6 +170,8 @@ Release 2.8.0 - UNRELEASED YARN-4023. Publish Application Priority to TimelineServer. (Sunil G via rohithsharmaks) + YARN-3534. Collect memory/cpu usage on the node. (Inigo Goiri via kasha) + IMPROVEMENTS YARN-644. Basic null check is not performed on passed in arguments before diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index 93f7ed62cb2..6c438f28372 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -960,12 +960,21 @@ public class YarnConfiguration extends Configuration { public static final int DEFAULT_NM_WEBAPP_HTTPS_PORT = 8044; public static final String DEFAULT_NM_WEBAPP_HTTPS_ADDRESS = "0.0.0.0:" + DEFAULT_NM_WEBAPP_HTTPS_PORT; - + + /** How often to monitor resource in a node.*/ + public static final String NM_RESOURCE_MON_INTERVAL_MS = + NM_PREFIX + "resource-monitor.interval-ms"; + public static final int DEFAULT_NM_RESOURCE_MON_INTERVAL_MS = 3000; + /** How often to monitor containers.*/ public final static String NM_CONTAINER_MON_INTERVAL_MS = NM_PREFIX + "container-monitor.interval-ms"; + @Deprecated public final static int DEFAULT_NM_CONTAINER_MON_INTERVAL_MS = 3000; + /** Class that calculates current resource utilization.*/ + public static final String NM_MON_RESOURCE_CALCULATOR = + NM_PREFIX + "resource-calculator.class"; /** Class that calculates containers current resource utilization.*/ public static final String NM_CONTAINER_MON_RESOURCE_CALCULATOR = NM_PREFIX + "container-monitor.resource-calculator.class"; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index 402377d3352..53face0a3c5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -1235,13 +1235,26 @@ - How often to monitor containers. - yarn.nodemanager.container-monitor.interval-ms + How often to monitor the node and the containers. + yarn.nodemanager.resource-monitor.interval-ms 3000 - Class that calculates containers current resource utilization. + Class that calculates current resource utilization. + yarn.nodemanager.resource-calculator.class + + + + How often to monitor containers. If not set, the value for + yarn.nodemanager.resource-monitor.interval-ms will be used. + yarn.nodemanager.container-monitor.interval-ms + + + + Class that calculates containers current resource utilization. + If not set, the value for yarn.nodemanager.resource-calculator.class will + be used. yarn.nodemanager.container-monitor.resource-calculator.class diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeResourceMonitor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeResourceMonitor.java index be13d222df3..6c5a15a7aea 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeResourceMonitor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeResourceMonitor.java @@ -19,7 +19,15 @@ package org.apache.hadoop.yarn.server.nodemanager; import org.apache.hadoop.service.Service; +import org.apache.hadoop.yarn.server.api.records.ResourceUtilization; +/** + * Interface for monitoring the resources of a node. + */ public interface NodeResourceMonitor extends Service { - + /** + * Get the resource utilization of the node. + * @return resource utilization of the node. + */ + public ResourceUtilization getUtilization(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeResourceMonitorImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeResourceMonitorImpl.java index ea82546a7b9..dcdaa0a9821 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeResourceMonitorImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeResourceMonitorImpl.java @@ -18,13 +18,153 @@ package org.apache.hadoop.yarn.server.nodemanager; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.service.AbstractService; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.server.api.records.ResourceUtilization; +import org.apache.hadoop.yarn.util.ResourceCalculatorPlugin; +/** + * Implementation of the node resource monitor. It periodically tracks the + * resource utilization of the node and reports it to the NM. + */ public class NodeResourceMonitorImpl extends AbstractService implements NodeResourceMonitor { + /** Logging infrastructure. */ + final static Log LOG = LogFactory + .getLog(NodeResourceMonitorImpl.class); + + /** Interval to monitor the node resource utilization. */ + private long monitoringInterval; + /** Thread to monitor the node resource utilization. */ + private MonitoringThread monitoringThread; + + /** Resource calculator. */ + private ResourceCalculatorPlugin resourceCalculatorPlugin; + + /** Current resource utilization of the node. */ + private ResourceUtilization nodeUtilization; + + /** + * Initialize the node resource monitor. + */ public NodeResourceMonitorImpl() { super(NodeResourceMonitorImpl.class.getName()); + + this.monitoringThread = new MonitoringThread(); } + /** + * Initialize the service with the proper parameters. + */ + @Override + protected void serviceInit(Configuration conf) throws Exception { + this.monitoringInterval = + conf.getLong(YarnConfiguration.NM_RESOURCE_MON_INTERVAL_MS, + YarnConfiguration.DEFAULT_NM_RESOURCE_MON_INTERVAL_MS); + + Class clazz = + conf.getClass(YarnConfiguration.NM_MON_RESOURCE_CALCULATOR, null, + ResourceCalculatorPlugin.class); + + this.resourceCalculatorPlugin = + ResourceCalculatorPlugin.getResourceCalculatorPlugin(clazz, conf); + + LOG.info(" Using ResourceCalculatorPlugin : " + + this.resourceCalculatorPlugin); + } + + /** + * Check if we should be monitoring. + * @return true if we can monitor the node resource utilization. + */ + private boolean isEnabled() { + if (resourceCalculatorPlugin == null) { + LOG.info("ResourceCalculatorPlugin is unavailable on this system. " + + this.getClass().getName() + " is disabled."); + return false; + } + return true; + } + + /** + * Start the thread that does the node resource utilization monitoring. + */ + @Override + protected void serviceStart() throws Exception { + if (this.isEnabled()) { + this.monitoringThread.start(); + } + super.serviceStart(); + } + + /** + * Stop the thread that does the node resource utilization monitoring. + */ + @Override + protected void serviceStop() throws Exception { + if (this.isEnabled()) { + this.monitoringThread.interrupt(); + try { + this.monitoringThread.join(10 * 1000); + } catch (InterruptedException e) { + LOG.warn("Could not wait for the thread to join"); + } + } + super.serviceStop(); + } + + /** + * Thread that monitors the resource utilization of this node. + */ + private class MonitoringThread extends Thread { + /** + * Initialize the node resource monitoring thread. + */ + public MonitoringThread() { + super("Node Resource Monitor"); + this.setDaemon(true); + } + + /** + * Periodically monitor the resource utilization of the node. + */ + @Override + public void run() { + while (true) { + // Get node utilization and save it into the health status + long pmem = resourceCalculatorPlugin.getPhysicalMemorySize() - + resourceCalculatorPlugin.getAvailablePhysicalMemorySize(); + long vmem = + resourceCalculatorPlugin.getVirtualMemorySize() + - resourceCalculatorPlugin.getAvailableVirtualMemorySize(); + float cpu = resourceCalculatorPlugin.getCpuUsage(); + nodeUtilization = + ResourceUtilization.newInstance( + (int) (pmem >> 20), // B -> MB + (int) (vmem >> 20), // B -> MB + cpu); // 1 CPU at 100% is 1 + + try { + Thread.sleep(monitoringInterval); + } catch (InterruptedException e) { + LOG.warn(NodeResourceMonitorImpl.class.getName() + + " is interrupted. Exiting."); + break; + } + } + } + } + + /** + * Get the resource utilization of the node. + * @return resource utilization of the node. + */ + @Override + public ResourceUtilization getUtilization() { + return this.nodeUtilization; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java index 57d1bad353c..89dc980c707 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java @@ -100,10 +100,14 @@ public class ContainersMonitorImpl extends AbstractService implements protected void serviceInit(Configuration conf) throws Exception { this.monitoringInterval = conf.getLong(YarnConfiguration.NM_CONTAINER_MON_INTERVAL_MS, - YarnConfiguration.DEFAULT_NM_CONTAINER_MON_INTERVAL_MS); + conf.getLong(YarnConfiguration.NM_RESOURCE_MON_INTERVAL_MS, + YarnConfiguration.DEFAULT_NM_RESOURCE_MON_INTERVAL_MS)); Class clazz = - conf.getClass(YarnConfiguration.NM_CONTAINER_MON_RESOURCE_CALCULATOR, null, + conf.getClass(YarnConfiguration.NM_CONTAINER_MON_RESOURCE_CALCULATOR, + conf.getClass( + YarnConfiguration.NM_MON_RESOURCE_CALCULATOR, null, + ResourceCalculatorPlugin.class), ResourceCalculatorPlugin.class); this.resourceCalculatorPlugin = ResourceCalculatorPlugin.getResourceCalculatorPlugin(clazz, conf); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeResourceMonitor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeResourceMonitor.java new file mode 100644 index 00000000000..3c2c3860ee7 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeResourceMonitor.java @@ -0,0 +1,35 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager; + +import org.apache.hadoop.fs.UnsupportedFileSystemException; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.BaseContainerManagerTest; + +import org.junit.Test; + +public class TestNodeResourceMonitor extends BaseContainerManagerTest { + public TestNodeResourceMonitor() throws UnsupportedFileSystemException { + super(); + } + + @Test + public void testNodeResourceMonitor() { + NodeResourceMonitor nrm = new NodeResourceMonitorImpl(); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java index 285635771f3..ea6bb1dc19d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java @@ -125,7 +125,7 @@ public class TestContainerLaunch extends BaseContainerManagerTest { @Before public void setup() throws IOException { conf.setClass( - YarnConfiguration.NM_CONTAINER_MON_RESOURCE_CALCULATOR, + YarnConfiguration.NM_MON_RESOURCE_CALCULATOR, LinuxResourceCalculatorPlugin.class, ResourceCalculatorPlugin.class); super.setup(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitor.java index 909a962e3f8..3803144ce7c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitor.java @@ -86,7 +86,7 @@ public class TestContainersMonitor extends BaseContainerManagerTest { @Before public void setup() throws IOException { conf.setClass( - YarnConfiguration.NM_CONTAINER_MON_RESOURCE_CALCULATOR, + YarnConfiguration.NM_MON_RESOURCE_CALCULATOR, LinuxResourceCalculatorPlugin.class, ResourceCalculatorPlugin.class); conf.setBoolean(YarnConfiguration.NM_VMEM_CHECK_ENABLED, true); super.setup();