YARN-8911. ContainerScheduler incorrectly uses percentage number as the cpu resource utlization.
This commit is contained in:
parent
0891cdda79
commit
766b78ee07
|
@ -89,18 +89,18 @@ public abstract class ResourceUtilization implements
|
|||
public abstract void setPhysicalMemory(int pmem);
|
||||
|
||||
/**
|
||||
* Get <em>CPU</em> utilization.
|
||||
* Get <em>CPU</em> utilization (The amount of vcores used).
|
||||
*
|
||||
* @return <em>CPU utilization</em> normalized to 1 CPU
|
||||
* @return <em>CPU utilization</em>
|
||||
*/
|
||||
@Public
|
||||
@Unstable
|
||||
public abstract float getCPU();
|
||||
|
||||
/**
|
||||
* Set <em>CPU</em> utilization.
|
||||
* Set <em>CPU</em> utilization (The amount of vcores used).
|
||||
*
|
||||
* @param cpu <em>CPU utilization</em> normalized to 1 CPU
|
||||
* @param cpu <em>CPU utilization</em>
|
||||
*/
|
||||
@Public
|
||||
@Unstable
|
||||
|
|
|
@ -43,8 +43,7 @@ public interface ContainersMonitor extends Service,
|
|||
static void increaseResourceUtilization(
|
||||
ContainersMonitor containersMonitor, ResourceUtilization resourceUtil,
|
||||
Resource resource) {
|
||||
float vCores = (float) resource.getVirtualCores() /
|
||||
containersMonitor.getVCoresAllocatedForContainers();
|
||||
float vCores = (float) resource.getVirtualCores();
|
||||
int vmem = (int) (resource.getMemorySize()
|
||||
* containersMonitor.getVmemRatio());
|
||||
resourceUtil.addTo((int)resource.getMemorySize(), vmem, vCores);
|
||||
|
@ -60,8 +59,7 @@ public interface ContainersMonitor extends Service,
|
|||
static void decreaseResourceUtilization(
|
||||
ContainersMonitor containersMonitor, ResourceUtilization resourceUtil,
|
||||
Resource resource) {
|
||||
float vCores = (float) resource.getVirtualCores() /
|
||||
containersMonitor.getVCoresAllocatedForContainers();
|
||||
float vCores = (float) resource.getVirtualCores();
|
||||
int vmem = (int) (resource.getMemorySize()
|
||||
* containersMonitor.getVmemRatio());
|
||||
resourceUtil.subtractFrom((int)resource.getMemorySize(), vmem, vCores);
|
||||
|
|
|
@ -949,7 +949,8 @@ public class ContainersMonitorImpl extends AbstractService implements
|
|||
public void subtractNodeResourcesFromResourceUtilization(
|
||||
ResourceUtilization resourceUtil) {
|
||||
resourceUtil.subtractFrom((int) (getPmemAllocatedForContainers() >> 20),
|
||||
(int) (getVmemAllocatedForContainers() >> 20), 1.0f);
|
||||
(int) (getVmemAllocatedForContainers() >> 20),
|
||||
getVCoresAllocatedForContainers());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -123,35 +123,14 @@ public class AllocationBasedResourceUtilizationTracker implements
|
|||
this.containersAllocation.getCPU(),
|
||||
getContainersMonitor().getVCoresAllocatedForContainers());
|
||||
}
|
||||
// Check CPU. Compare using integral values of cores to avoid decimal
|
||||
// inaccuracies.
|
||||
if (!hasEnoughCpu(this.containersAllocation.getCPU(),
|
||||
getContainersMonitor().getVCoresAllocatedForContainers(), cpuVcores)) {
|
||||
// Check CPU.
|
||||
if (this.containersAllocation.getCPU() + cpuVcores >
|
||||
getContainersMonitor().getVCoresAllocatedForContainers()) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether there is enough space for coresRequested in totalCores.
|
||||
* Converts currentAllocation usage to nearest integer count before comparing,
|
||||
* as floats are inherently imprecise. NOTE: this calculation assumes that
|
||||
* requested core counts must be integers, and currentAllocation core count
|
||||
* must also be an integer.
|
||||
*
|
||||
* @param currentAllocation The current allocation, a float value from 0 to 1.
|
||||
* @param totalCores The total cores in the system.
|
||||
* @param coresRequested The number of cores requested.
|
||||
* @return True if currentAllocationtotalCores*coresRequested <=
|
||||
* totalCores.
|
||||
*/
|
||||
public boolean hasEnoughCpu(float currentAllocation, long totalCores,
|
||||
int coresRequested) {
|
||||
// Must not cast here, as it would truncate the decimal digits.
|
||||
return Math.round(currentAllocation * totalCores)
|
||||
+ coresRequested <= totalCores;
|
||||
}
|
||||
|
||||
public ContainersMonitor getContainersMonitor() {
|
||||
return this.scheduler.getContainersMonitor();
|
||||
}
|
||||
|
|
|
@ -594,10 +594,7 @@ public class ContainerScheduler extends AbstractService implements
|
|||
ResourceUtilization resourcesToFreeUp) {
|
||||
return resourcesToFreeUp.getPhysicalMemory() <= 0 &&
|
||||
resourcesToFreeUp.getVirtualMemory() <= 0 &&
|
||||
// Convert the number of cores to nearest integral number, due to
|
||||
// imprecision of direct float comparison.
|
||||
Math.round(resourcesToFreeUp.getCPU()
|
||||
* getContainersMonitor().getVCoresAllocatedForContainers()) <= 0;
|
||||
resourcesToFreeUp.getCPU() <= 0;
|
||||
}
|
||||
|
||||
private ResourceUtilization resourcesToFreeUp(
|
||||
|
|
|
@ -527,7 +527,7 @@ public class TestContainerManagerRecovery extends BaseContainerManagerTest {
|
|||
assertNotNull(app);
|
||||
|
||||
ResourceUtilization utilization =
|
||||
ResourceUtilization.newInstance(1024, 2048, 0.25F);
|
||||
ResourceUtilization.newInstance(1024, 2048, 1.0F);
|
||||
assertEquals(cm.getContainerScheduler().getNumRunningContainers(), 1);
|
||||
assertEquals(utilization,
|
||||
cm.getContainerScheduler().getCurrentUtilization());
|
||||
|
|
|
@ -72,22 +72,4 @@ public class TestAllocationBasedResourceUtilizationTracker {
|
|||
}
|
||||
Assert.assertFalse(tracker.hasResourcesAvailable(testContainer));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test the case where the current allocation has been truncated to 0.8888891
|
||||
* (8/9 cores used). Request 1 additional core - hasEnoughCpu should return
|
||||
* true.
|
||||
*/
|
||||
@Test
|
||||
public void testHasEnoughCpu() {
|
||||
AllocationBasedResourceUtilizationTracker tracker =
|
||||
new AllocationBasedResourceUtilizationTracker(mockContainerScheduler);
|
||||
float currentAllocation = 0.8888891f;
|
||||
long totalCores = 9;
|
||||
int alreadyUsedCores = 8;
|
||||
Assert.assertTrue(tracker.hasEnoughCpu(currentAllocation, totalCores,
|
||||
(int) totalCores - alreadyUsedCores));
|
||||
Assert.assertFalse(tracker.hasEnoughCpu(currentAllocation, totalCores,
|
||||
(int) totalCores - alreadyUsedCores + 1));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,17 +20,19 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler;
|
|||
import static org.junit.Assert.assertEquals;
|
||||
import static org.mockito.Mockito.mock;
|
||||
import static org.mockito.Mockito.when;
|
||||
import static org.mockito.Mockito.spy;
|
||||
import static org.mockito.Mockito.doNothing;
|
||||
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||
import org.apache.hadoop.yarn.api.records.ExecutionType;
|
||||
import org.apache.hadoop.yarn.api.records.Resource;
|
||||
import org.apache.hadoop.yarn.api.records.ResourceUtilization;
|
||||
import org.apache.hadoop.yarn.event.AsyncDispatcher;
|
||||
import org.apache.hadoop.yarn.security.ContainerTokenIdentifier;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.NodeManager.NMContext;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManager;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerImpl;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitor;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService
|
||||
.RecoveredContainerState;
|
||||
|
@ -40,7 +42,6 @@ import org.junit.Before;
|
|||
import org.junit.Test;
|
||||
import org.mockito.InjectMocks;
|
||||
import org.mockito.Mock;
|
||||
import org.mockito.Mockito;
|
||||
import org.mockito.MockitoAnnotations;
|
||||
|
||||
/**
|
||||
|
@ -49,6 +50,10 @@ import org.mockito.MockitoAnnotations;
|
|||
* ExecutionType.
|
||||
*/
|
||||
public class TestContainerSchedulerRecovery {
|
||||
private static final Resource CONTAINER_SIZE =
|
||||
Resource.newInstance(1024, 4);
|
||||
private static final ResourceUtilization ZERO =
|
||||
ResourceUtilization.newInstance(0, 0, 0.0f);
|
||||
|
||||
@Mock private NMContext context;
|
||||
|
||||
|
@ -66,13 +71,9 @@ public class TestContainerSchedulerRecovery {
|
|||
|
||||
@Mock private ContainerId containerId;
|
||||
|
||||
@Mock private AllocationBasedResourceUtilizationTracker
|
||||
allocationBasedResourceUtilizationTracker;
|
||||
|
||||
@InjectMocks private ContainerScheduler tempContainerScheduler =
|
||||
@InjectMocks private ContainerScheduler spy =
|
||||
new ContainerScheduler(context, dispatcher, metrics, 0);
|
||||
|
||||
private ContainerScheduler spy;
|
||||
|
||||
private RecoveredContainerState createRecoveredContainerState(
|
||||
RecoveredContainerStatus status) {
|
||||
|
@ -81,16 +82,32 @@ public class TestContainerSchedulerRecovery {
|
|||
return mockState;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set up the {@link ContainersMonitor} dependency of
|
||||
* {@link ResourceUtilizationTracker} so that we can
|
||||
* verify the resource utilization.
|
||||
*/
|
||||
private void setupContainerMonitor() {
|
||||
ContainersMonitor containersMonitor = mock(ContainersMonitor.class);
|
||||
when(containersMonitor.getVCoresAllocatedForContainers()).thenReturn(10L);
|
||||
when(containersMonitor.getPmemAllocatedForContainers()).thenReturn(10240L);
|
||||
when(containersMonitor.getVmemRatio()).thenReturn(1.0f);
|
||||
when(containersMonitor.getVmemAllocatedForContainers()).thenReturn(10240L);
|
||||
|
||||
ContainerManager cm = mock(ContainerManager.class);
|
||||
when(cm.getContainersMonitor()).thenReturn(containersMonitor);
|
||||
when(context.getContainerManager()).thenReturn(cm);
|
||||
}
|
||||
|
||||
@Before public void setUp() throws Exception {
|
||||
MockitoAnnotations.initMocks(this);
|
||||
spy = spy(tempContainerScheduler);
|
||||
setupContainerMonitor();
|
||||
when(container.getContainerId()).thenReturn(containerId);
|
||||
when(container.getResource()).thenReturn(CONTAINER_SIZE);
|
||||
when(containerId.getApplicationAttemptId()).thenReturn(appAttemptId);
|
||||
when(containerId.getApplicationAttemptId().getApplicationId())
|
||||
.thenReturn(appId);
|
||||
when(containerId.getContainerId()).thenReturn(123L);
|
||||
doNothing().when(allocationBasedResourceUtilizationTracker)
|
||||
.addContainerResources(container);
|
||||
}
|
||||
|
||||
@After public void tearDown() {
|
||||
|
@ -112,8 +129,7 @@ public class TestContainerSchedulerRecovery {
|
|||
assertEquals(1, spy.getNumQueuedGuaranteedContainers());
|
||||
assertEquals(0, spy.getNumQueuedOpportunisticContainers());
|
||||
assertEquals(0, spy.getNumRunningContainers());
|
||||
Mockito.verify(allocationBasedResourceUtilizationTracker, Mockito.times(0))
|
||||
.addContainerResources(container);
|
||||
assertEquals(ZERO, spy.getCurrentUtilization());
|
||||
}
|
||||
|
||||
/*Test if a container is recovered as QUEUED, OPPORTUNISTIC,
|
||||
|
@ -132,8 +148,7 @@ public class TestContainerSchedulerRecovery {
|
|||
assertEquals(0, spy.getNumQueuedGuaranteedContainers());
|
||||
assertEquals(1, spy.getNumQueuedOpportunisticContainers());
|
||||
assertEquals(0, spy.getNumRunningContainers());
|
||||
Mockito.verify(allocationBasedResourceUtilizationTracker, Mockito.times(0))
|
||||
.addContainerResources(container);
|
||||
assertEquals(ZERO, spy.getCurrentUtilization());
|
||||
}
|
||||
|
||||
/*Test if a container is recovered as PAUSED, GUARANTEED,
|
||||
|
@ -152,8 +167,7 @@ public class TestContainerSchedulerRecovery {
|
|||
assertEquals(1, spy.getNumQueuedGuaranteedContainers());
|
||||
assertEquals(0, spy.getNumQueuedOpportunisticContainers());
|
||||
assertEquals(0, spy.getNumRunningContainers());
|
||||
Mockito.verify(allocationBasedResourceUtilizationTracker, Mockito.times(0))
|
||||
.addContainerResources(container);
|
||||
assertEquals(ZERO, spy.getCurrentUtilization());
|
||||
}
|
||||
|
||||
/*Test if a container is recovered as PAUSED, OPPORTUNISTIC,
|
||||
|
@ -172,8 +186,7 @@ public class TestContainerSchedulerRecovery {
|
|||
assertEquals(0, spy.getNumQueuedGuaranteedContainers());
|
||||
assertEquals(1, spy.getNumQueuedOpportunisticContainers());
|
||||
assertEquals(0, spy.getNumRunningContainers());
|
||||
Mockito.verify(allocationBasedResourceUtilizationTracker, Mockito.times(0))
|
||||
.addContainerResources(container);
|
||||
assertEquals(ZERO, spy.getCurrentUtilization());
|
||||
}
|
||||
|
||||
/*Test if a container is recovered as LAUNCHED, GUARANTEED,
|
||||
|
@ -192,8 +205,9 @@ public class TestContainerSchedulerRecovery {
|
|||
assertEquals(0, spy.getNumQueuedGuaranteedContainers());
|
||||
assertEquals(0, spy.getNumQueuedOpportunisticContainers());
|
||||
assertEquals(1, spy.getNumRunningContainers());
|
||||
Mockito.verify(allocationBasedResourceUtilizationTracker, Mockito.times(1))
|
||||
.addContainerResources(container);
|
||||
assertEquals(
|
||||
ResourceUtilization.newInstance(1024, 1024, 4.0f),
|
||||
spy.getCurrentUtilization());
|
||||
}
|
||||
|
||||
/*Test if a container is recovered as LAUNCHED, OPPORTUNISTIC,
|
||||
|
@ -212,8 +226,9 @@ public class TestContainerSchedulerRecovery {
|
|||
assertEquals(0, spy.getNumQueuedGuaranteedContainers());
|
||||
assertEquals(0, spy.getNumQueuedOpportunisticContainers());
|
||||
assertEquals(1, spy.getNumRunningContainers());
|
||||
Mockito.verify(allocationBasedResourceUtilizationTracker, Mockito.times(1))
|
||||
.addContainerResources(container);
|
||||
assertEquals(
|
||||
ResourceUtilization.newInstance(1024, 1024, 4.0f),
|
||||
spy.getCurrentUtilization());
|
||||
}
|
||||
|
||||
/*Test if a container is recovered as REQUESTED, GUARANTEED,
|
||||
|
@ -232,8 +247,7 @@ public class TestContainerSchedulerRecovery {
|
|||
assertEquals(0, spy.getNumQueuedGuaranteedContainers());
|
||||
assertEquals(0, spy.getNumQueuedOpportunisticContainers());
|
||||
assertEquals(0, spy.getNumRunningContainers());
|
||||
Mockito.verify(allocationBasedResourceUtilizationTracker, Mockito.times(0))
|
||||
.addContainerResources(container);
|
||||
assertEquals(ZERO, spy.getCurrentUtilization());
|
||||
}
|
||||
|
||||
/*Test if a container is recovered as REQUESTED, OPPORTUNISTIC,
|
||||
|
@ -252,8 +266,7 @@ public class TestContainerSchedulerRecovery {
|
|||
assertEquals(0, spy.getNumQueuedGuaranteedContainers());
|
||||
assertEquals(0, spy.getNumQueuedOpportunisticContainers());
|
||||
assertEquals(0, spy.getNumRunningContainers());
|
||||
Mockito.verify(allocationBasedResourceUtilizationTracker, Mockito.times(0))
|
||||
.addContainerResources(container);
|
||||
assertEquals(ZERO, spy.getCurrentUtilization());
|
||||
}
|
||||
|
||||
/*Test if a container is recovered as COMPLETED, GUARANTEED,
|
||||
|
@ -272,8 +285,7 @@ public class TestContainerSchedulerRecovery {
|
|||
assertEquals(0, spy.getNumQueuedGuaranteedContainers());
|
||||
assertEquals(0, spy.getNumQueuedOpportunisticContainers());
|
||||
assertEquals(0, spy.getNumRunningContainers());
|
||||
Mockito.verify(allocationBasedResourceUtilizationTracker, Mockito.times(0))
|
||||
.addContainerResources(container);
|
||||
assertEquals(ZERO, spy.getCurrentUtilization());
|
||||
}
|
||||
|
||||
/*Test if a container is recovered as COMPLETED, OPPORTUNISTIC,
|
||||
|
@ -292,8 +304,7 @@ public class TestContainerSchedulerRecovery {
|
|||
assertEquals(0, spy.getNumQueuedGuaranteedContainers());
|
||||
assertEquals(0, spy.getNumQueuedOpportunisticContainers());
|
||||
assertEquals(0, spy.getNumRunningContainers());
|
||||
Mockito.verify(allocationBasedResourceUtilizationTracker, Mockito.times(0))
|
||||
.addContainerResources(container);
|
||||
assertEquals(ZERO, spy.getCurrentUtilization());
|
||||
}
|
||||
|
||||
/*Test if a container is recovered as GUARANTEED but no executionType set,
|
||||
|
@ -311,8 +322,7 @@ public class TestContainerSchedulerRecovery {
|
|||
assertEquals(0, spy.getNumQueuedGuaranteedContainers());
|
||||
assertEquals(0, spy.getNumQueuedOpportunisticContainers());
|
||||
assertEquals(0, spy.getNumRunningContainers());
|
||||
Mockito.verify(allocationBasedResourceUtilizationTracker, Mockito.times(0))
|
||||
.addContainerResources(container);
|
||||
assertEquals(ZERO, spy.getCurrentUtilization());
|
||||
}
|
||||
|
||||
/*Test if a container is recovered as PAUSED but no executionType set,
|
||||
|
@ -330,7 +340,6 @@ public class TestContainerSchedulerRecovery {
|
|||
assertEquals(0, spy.getNumQueuedGuaranteedContainers());
|
||||
assertEquals(0, spy.getNumQueuedOpportunisticContainers());
|
||||
assertEquals(0, spy.getNumRunningContainers());
|
||||
Mockito.verify(allocationBasedResourceUtilizationTracker, Mockito.times(0))
|
||||
.addContainerResources(container);
|
||||
assertEquals(ZERO, spy.getCurrentUtilization());
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue