YARN-6862. Nodemanager resource usage metrics sometimes are negative. Contributed by Benjamin Teke
This commit is contained in:
parent
4201cdb44e
commit
d3fded12dc
|
@ -537,6 +537,14 @@ public class ContainersMonitorImpl extends AbstractService implements
|
||||||
pTree.updateProcessTree(); // update process-tree
|
pTree.updateProcessTree(); // update process-tree
|
||||||
long currentVmemUsage = pTree.getVirtualMemorySize();
|
long currentVmemUsage = pTree.getVirtualMemorySize();
|
||||||
long currentPmemUsage = pTree.getRssMemorySize();
|
long currentPmemUsage = pTree.getRssMemorySize();
|
||||||
|
if (currentVmemUsage < 0 || currentPmemUsage < 0) {
|
||||||
|
// YARN-6862/YARN-5021 If the container just exited or for
|
||||||
|
// another reason the physical/virtual memory is UNAVAILABLE (-1)
|
||||||
|
// the values shouldn't be aggregated.
|
||||||
|
LOG.info("Skipping monitoring container {} because "
|
||||||
|
+ "memory usage is not available.", containerId);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
// if machine has 6 cores and 3 are used,
|
// if machine has 6 cores and 3 are used,
|
||||||
// cpuUsagePercentPerCore should be 300%
|
// cpuUsagePercentPerCore should be 300%
|
||||||
|
|
|
@ -56,6 +56,16 @@ public class MockCPUResourceCalculatorProcessTree
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getVirtualMemorySize(int olderThanAge) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getRssMemorySize(int olderThanAge) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public float getCpuUsagePercent() {
|
public float getCpuUsagePercent() {
|
||||||
long cpu = this.cpuPercentage;
|
long cpu = this.cpuPercentage;
|
||||||
|
|
|
@ -0,0 +1,89 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor;
|
||||||
|
|
||||||
|
import org.apache.hadoop.yarn.util.ResourceCalculatorProcessTree;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Mock class to obtain resource usage (Memory).
|
||||||
|
*/
|
||||||
|
public class MockMemoryResourceCalculatorProcessTree extends ResourceCalculatorProcessTree {
|
||||||
|
private final long memorySize = 500000000L;
|
||||||
|
|
||||||
|
private long rssMemorySize = memorySize;
|
||||||
|
private long virtualMemorySize = ResourceCalculatorProcessTree.UNAVAILABLE;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructor for MockMemoryResourceCalculatorProcessTree with specified root
|
||||||
|
* process.
|
||||||
|
* @param root
|
||||||
|
*/
|
||||||
|
public MockMemoryResourceCalculatorProcessTree(String root) {
|
||||||
|
super(root);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void updateProcessTree() {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getProcessTreeDump() {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getCumulativeCpuTime() {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean checkPidPgrpidForMatch() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getRssMemorySize(int olderThanAge) {
|
||||||
|
long rssMemory = this.rssMemorySize;
|
||||||
|
// First getter call will return with 500000000, and second call will
|
||||||
|
// return -1, rest of the calls will return a valid value.
|
||||||
|
if (rssMemory == memorySize) {
|
||||||
|
this.rssMemorySize = ResourceCalculatorProcessTree.UNAVAILABLE;
|
||||||
|
}
|
||||||
|
if (rssMemory == ResourceCalculatorProcessTree.UNAVAILABLE) {
|
||||||
|
this.rssMemorySize = 2 * memorySize;
|
||||||
|
}
|
||||||
|
return rssMemory;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getVirtualMemorySize(int olderThanAge) {
|
||||||
|
long virtualMemory = this.virtualMemorySize;
|
||||||
|
// First getter call will return with -1, and rest of the calls will
|
||||||
|
// return a valid value.
|
||||||
|
if (virtualMemory == ResourceCalculatorProcessTree.UNAVAILABLE) {
|
||||||
|
this.virtualMemorySize = 3 * memorySize;
|
||||||
|
}
|
||||||
|
return virtualMemory;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float getCpuUsagePercent() {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
|
@ -51,10 +51,16 @@ public class MockResourceCalculatorProcessTree extends ResourceCalculatorProcess
|
||||||
this.rssMemorySize = rssMemorySize;
|
this.rssMemorySize = rssMemorySize;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
public long getRssMemorySize() {
|
public long getRssMemorySize() {
|
||||||
return this.rssMemorySize;
|
return this.rssMemorySize;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getVirtualMemorySize() {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public float getCpuUsagePercent() {
|
public float getCpuUsagePercent() {
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -282,13 +282,24 @@ public class TestContainersMonitorResourceChange {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testContainersCPUResourceForDefaultValue() throws Exception {
|
public void testContainersCPUResourceForDefaultValue() throws Exception {
|
||||||
|
testContainerMonitoringInvalidResources(
|
||||||
|
MockCPUResourceCalculatorProcessTree.class.getCanonicalName());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testContainersMemoryResourceUnavailable() throws Exception {
|
||||||
|
testContainerMonitoringInvalidResources(
|
||||||
|
MockMemoryResourceCalculatorProcessTree.class.getCanonicalName());
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testContainerMonitoringInvalidResources(
|
||||||
|
String processTreeClassName) throws Exception {
|
||||||
Configuration newConf = new Configuration(conf);
|
Configuration newConf = new Configuration(conf);
|
||||||
// set container monitor interval to be 20s
|
// set container monitor interval to be 20ms
|
||||||
newConf.setLong(YarnConfiguration.NM_CONTAINER_MON_INTERVAL_MS, 20L);
|
newConf.setLong(YarnConfiguration.NM_CONTAINER_MON_INTERVAL_MS, 20L);
|
||||||
containersMonitor = createContainersMonitor(executor, dispatcher, context);
|
containersMonitor = createContainersMonitor(executor, dispatcher, context);
|
||||||
newConf.set(YarnConfiguration.NM_CONTAINER_MON_PROCESS_TREE,
|
newConf.set(YarnConfiguration.NM_CONTAINER_MON_PROCESS_TREE,
|
||||||
MockCPUResourceCalculatorProcessTree.class.getCanonicalName());
|
processTreeClassName);
|
||||||
// set container monitor interval to be 20ms
|
|
||||||
containersMonitor.init(newConf);
|
containersMonitor.init(newConf);
|
||||||
containersMonitor.start();
|
containersMonitor.start();
|
||||||
|
|
||||||
|
@ -305,7 +316,7 @@ public class TestContainersMonitorResourceChange {
|
||||||
0, containersMonitor.getContainersUtilization()
|
0, containersMonitor.getContainersUtilization()
|
||||||
.compareTo(ResourceUtilization.newInstance(0, 0, 0.0f)));
|
.compareTo(ResourceUtilization.newInstance(0, 0, 0.0f)));
|
||||||
|
|
||||||
// Verify the container utilization value. Since atleast one round is done,
|
// Verify the container utilization value. Since at least one round is done,
|
||||||
// we can expect a non-zero value for container utilization as
|
// we can expect a non-zero value for container utilization as
|
||||||
// MockCPUResourceCalculatorProcessTree#getCpuUsagePercent will return 50.
|
// MockCPUResourceCalculatorProcessTree#getCpuUsagePercent will return 50.
|
||||||
waitForContainerResourceUtilizationChange(containersMonitor, 100);
|
waitForContainerResourceUtilizationChange(containersMonitor, 100);
|
||||||
|
@ -324,12 +335,13 @@ public class TestContainersMonitorResourceChange {
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG.info(
|
LOG.info(
|
||||||
"Monitor thread is waiting for resource utlization change.");
|
"Monitor thread is waiting for resource utilization change.");
|
||||||
Thread.sleep(WAIT_MS_PER_LOOP);
|
Thread.sleep(WAIT_MS_PER_LOOP);
|
||||||
timeWaiting += WAIT_MS_PER_LOOP;
|
timeWaiting += WAIT_MS_PER_LOOP;
|
||||||
}
|
}
|
||||||
|
|
||||||
assertTrue("Resource utilization is not changed from second run onwards",
|
assertTrue("Resource utilization is not changed after " +
|
||||||
|
timeoutMsecs / WAIT_MS_PER_LOOP + " updates",
|
||||||
0 != containersMonitor.getContainersUtilization()
|
0 != containersMonitor.getContainersUtilization()
|
||||||
.compareTo(ResourceUtilization.newInstance(0, 0, 0.0f)));
|
.compareTo(ResourceUtilization.newInstance(0, 0, 0.0f)));
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue