YARN-6870. Fix floating point inaccuracies in resource availability check in AllocationBasedResourceUtilizationTracker. (Brook Zhou via asuresh)
This commit is contained in:
parent
bcde66bed1
commit
890e14c02a
|
@ -118,19 +118,40 @@ public class AllocationBasedResourceUtilizationTracker implements
|
|||
return false;
|
||||
}
|
||||
|
||||
float vCores = (float) cpuVcores /
|
||||
getContainersMonitor().getVCoresAllocatedForContainers();
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("before cpuCheck [asked={} > allowed={}]",
|
||||
this.containersAllocation.getCPU(), vCores);
|
||||
this.containersAllocation.getCPU(),
|
||||
getContainersMonitor().getVCoresAllocatedForContainers());
|
||||
}
|
||||
// Check CPU.
|
||||
if (this.containersAllocation.getCPU() + vCores > 1.0f) {
|
||||
// Check CPU. Compare using integral values of cores to avoid decimal
|
||||
// inaccuracies.
|
||||
if (!hasEnoughCpu(this.containersAllocation.getCPU(),
|
||||
getContainersMonitor().getVCoresAllocatedForContainers(), cpuVcores)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether there is enough space for coresRequested in totalCores.
|
||||
* Converts currentAllocation usage to nearest integer count before comparing,
|
||||
* as floats are inherently imprecise. NOTE: this calculation assumes that
|
||||
* requested core counts must be integers, and currentAllocation core count
|
||||
* must also be an integer.
|
||||
*
|
||||
* @param currentAllocation The current allocation, a float value from 0 to 1.
|
||||
* @param totalCores The total cores in the system.
|
||||
* @param coresRequested The number of cores requested.
|
||||
* @return True if currentAllocationtotalCores*coresRequested <=
|
||||
* totalCores.
|
||||
*/
|
||||
public boolean hasEnoughCpu(float currentAllocation, long totalCores,
|
||||
int coresRequested) {
|
||||
// Must not cast here, as it would truncate the decimal digits.
|
||||
return Math.round(currentAllocation * totalCores)
|
||||
+ coresRequested <= totalCores;
|
||||
}
|
||||
|
||||
public ContainersMonitor getContainersMonitor() {
|
||||
return this.scheduler.getContainersMonitor();
|
||||
}
|
||||
|
|
|
@ -392,7 +392,10 @@ public class ContainerScheduler extends AbstractService implements
|
|||
ResourceUtilization resourcesToFreeUp) {
|
||||
return resourcesToFreeUp.getPhysicalMemory() <= 0 &&
|
||||
resourcesToFreeUp.getVirtualMemory() <= 0 &&
|
||||
resourcesToFreeUp.getCPU() <= 0.0f;
|
||||
// Convert the number of cores to nearest integral number, due to
|
||||
// imprecision of direct float comparison.
|
||||
Math.round(resourcesToFreeUp.getCPU()
|
||||
* getContainersMonitor().getVCoresAllocatedForContainers()) <= 0;
|
||||
}
|
||||
|
||||
private ResourceUtilization resourcesToFreeUp(
|
||||
|
|
|
@ -0,0 +1,93 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler;
|
||||
|
||||
import static org.mockito.Mockito.mock;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
import org.apache.hadoop.yarn.api.records.Resource;
|
||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||
import org.apache.hadoop.yarn.event.AsyncDispatcher;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.Context;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitor;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorImpl;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
* Tests for the {@link AllocationBasedResourceUtilizationTracker} class.
|
||||
*/
|
||||
public class TestAllocationBasedResourceUtilizationTracker {
|
||||
|
||||
private ContainerScheduler mockContainerScheduler;
|
||||
|
||||
@Before
|
||||
public void setup() {
|
||||
mockContainerScheduler = mock(ContainerScheduler.class);
|
||||
ContainersMonitor containersMonitor =
|
||||
new ContainersMonitorImpl(mock(ContainerExecutor.class),
|
||||
mock(AsyncDispatcher.class), mock(Context.class));
|
||||
YarnConfiguration conf = new YarnConfiguration();
|
||||
conf.setInt(YarnConfiguration.NM_PMEM_MB, 1024);
|
||||
conf.setBoolean(YarnConfiguration.NM_PMEM_CHECK_ENABLED, true);
|
||||
conf.setBoolean(YarnConfiguration.NM_VMEM_CHECK_ENABLED, true);
|
||||
conf.setFloat(YarnConfiguration.NM_VMEM_PMEM_RATIO, 2.0f);
|
||||
conf.setInt(YarnConfiguration.NM_VCORES, 8);
|
||||
containersMonitor.init(conf);
|
||||
when(mockContainerScheduler.getContainersMonitor())
|
||||
.thenReturn(containersMonitor);
|
||||
}
|
||||
|
||||
/**
|
||||
* Node has capacity for 1024 MB and 8 cores. Saturate the node. When full the
|
||||
* hasResourceAvailable should return false.
|
||||
*/
|
||||
@Test
|
||||
public void testHasResourcesAvailable() {
|
||||
AllocationBasedResourceUtilizationTracker tracker =
|
||||
new AllocationBasedResourceUtilizationTracker(mockContainerScheduler);
|
||||
Container testContainer = mock(Container.class);
|
||||
when(testContainer.getResource()).thenReturn(Resource.newInstance(512, 4));
|
||||
for (int i = 0; i < 2; i++) {
|
||||
Assert.assertTrue(tracker.hasResourcesAvailable(testContainer));
|
||||
tracker.addContainerResources(testContainer);
|
||||
}
|
||||
Assert.assertFalse(tracker.hasResourcesAvailable(testContainer));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test the case where the current allocation has been truncated to 0.8888891
|
||||
* (8/9 cores used). Request 1 additional core - hasEnoughCpu should return
|
||||
* true.
|
||||
*/
|
||||
@Test
|
||||
public void testHasEnoughCpu() {
|
||||
AllocationBasedResourceUtilizationTracker tracker =
|
||||
new AllocationBasedResourceUtilizationTracker(mockContainerScheduler);
|
||||
float currentAllocation = 0.8888891f;
|
||||
long totalCores = 9;
|
||||
int alreadyUsedCores = 8;
|
||||
Assert.assertTrue(tracker.hasEnoughCpu(currentAllocation, totalCores,
|
||||
(int) totalCores - alreadyUsedCores));
|
||||
Assert.assertFalse(tracker.hasEnoughCpu(currentAllocation, totalCores,
|
||||
(int) totalCores - alreadyUsedCores + 1));
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue