YARN-9323. FSLeafQueue#computeMaxAMResource does not override zero values for custom resources

(Contributed by Szilard Nemeth via Daniel Templeton)

Change-Id: Id844ccf09488f367c0c7de0a3b2d4aca1bba31cc
This commit is contained in:
Szilard Nemeth 2019-02-27 19:49:34 -08:00 committed by Daniel Templeton
parent 7b928f19a4
commit 538bb4880d
5 changed files with 172 additions and 0 deletions

View File

@ -832,4 +832,8 @@ public class QueueMetrics implements MetricsSource {
public long getAggregatePreemptedContainers() {
return aggregateContainersPreempted.value();
}
public QueueMetricsForCustomResources getQueueMetricsForCustomResources() {
return queueMetricsForCustomResources;
}
}

View File

@ -101,4 +101,8 @@ public class QueueMetricsForCustomResources {
QueueMetricsCustomResource getAggregatePreemptedSeconds() {
return aggregatePreemptedSeconds;
}
public QueueMetricsCustomResource getAvailable() {
return available;
}
}

View File

@ -23,6 +23,7 @@ import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReadWriteLock;
@ -42,6 +43,8 @@ import org.apache.hadoop.yarn.api.records.QueueUserACLInfo;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManager;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetricsCustomResource;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetricsForCustomResources;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerAppUtils;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt;
import org.apache.hadoop.yarn.util.resource.Resources;
@ -517,6 +520,29 @@ public class FSLeafQueue extends FSQueue {
getMaxShare().getVirtualCores()));
}
QueueMetricsForCustomResources metricsForCustomResources =
scheduler.getRootQueueMetrics().getQueueMetricsForCustomResources();
if (metricsForCustomResources != null) {
QueueMetricsCustomResource availableResources =
metricsForCustomResources.getAvailable();
// We expect all custom resources contained in availableResources,
// so we will loop through all of them.
for (Map.Entry<String, Long> availableEntry : availableResources
.getValues().entrySet()) {
String resourceName = availableEntry.getKey();
// We only update the value if fairshare is 0 for that resource.
if (maxResource.getResourceValue(resourceName) == 0) {
Long availableValue = availableEntry.getValue();
long value = Math.min(availableValue,
getMaxShare().getResourceValue(resourceName));
maxResource.setResourceValue(resourceName, value);
}
}
}
// Round up to allow AM to run when there is only one vcore on the cluster
return Resources.multiplyAndRoundUp(maxResource, maxAMShare);
}

View File

@ -310,4 +310,8 @@ public class FSQueueMetrics extends QueueMetrics {
return (FSQueueMetrics)metrics;
}
FSQueueMetricsForCustomResources getCustomResources() {
return customResources;
}
}

View File

@ -34,6 +34,7 @@ import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
import com.google.common.collect.ImmutableMap;
import org.apache.hadoop.util.concurrent.HadoopExecutors;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.Resource;
@ -42,19 +43,26 @@ import org.apache.hadoop.yarn.server.resourcemanager.MockNodes;
import org.apache.hadoop.yarn.server.resourcemanager.MockRM;
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetricsCustomResource;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent;
import org.apache.hadoop.yarn.util.resource.ResourceUtils;
import org.apache.hadoop.yarn.util.resource.Resources;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.mockito.Mockito;
import java.util.Map;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
public class TestFSLeafQueue extends FairSchedulerTestBase {
private final static String ALLOC_FILE = new File(TEST_DIR,
TestFSLeafQueue.class.getName() + ".xml").getAbsolutePath();
private Resource maxResource = Resources.createResource(1024 * 8);
private static final float MAX_AM_SHARE = 0.5f;
private static final String CUSTOM_RESOURCE = "test1";
@Before
public void setup() throws IOException {
@ -105,6 +113,8 @@ public class TestFSLeafQueue extends FairSchedulerTestBase {
PrintWriter out = new PrintWriter(new FileWriter(ALLOC_FILE));
out.println("<?xml version=\"1.0\"?>");
out.println("<allocations>");
out.println("<queueMaxAMShareDefault>" + MAX_AM_SHARE +
"</queueMaxAMShareDefault>");
out.println("<queue name=\"queueA\"></queue>");
out.println("<queue name=\"queueB\"></queue>");
out.println("</allocations>");
@ -221,4 +231,128 @@ public class TestFSLeafQueue extends FairSchedulerTestBase {
assertTrue("Test failed with exception(s)" + exceptions,
exceptions.isEmpty());
}
@Test
public void testCanRunAppAMReturnsTrue() {
conf.set(YarnConfiguration.RESOURCE_TYPES, CUSTOM_RESOURCE);
ResourceUtils.resetResourceTypes(conf);
resourceManager = new MockRM(conf);
resourceManager.start();
scheduler = (FairScheduler) resourceManager.getResourceScheduler();
Resource maxShare = Resource.newInstance(1024 * 8, 4,
ImmutableMap.of(CUSTOM_RESOURCE, 10L));
// Add a node to increase available memory and vcores in scheduler's
// root queue metrics
addNodeToScheduler(Resource.newInstance(4096, 10,
ImmutableMap.of(CUSTOM_RESOURCE, 25L)));
FSLeafQueue queue = setupQueue(maxShare);
//Min(availableMemory, maxShareMemory (maxResourceOverridden))
// --> Min(4096, 8192) = 4096
//Min(availableVCores, maxShareVCores (maxResourceOverridden))
// --> Min(10, 4) = 4
//Min(available test1, maxShare test1 (maxResourceOverridden))
// --> Min(25, 10) = 10
//MaxAMResource: (4096 MB memory, 4 vcores, 10 test1) * MAX_AM_SHARE
// --> 2048 MB memory, 2 vcores, 5 test1
Resource expectedAMShare = Resource.newInstance(2048, 2,
ImmutableMap.of(CUSTOM_RESOURCE, 5L));
Resource appAMResource = Resource.newInstance(2048, 2,
ImmutableMap.of(CUSTOM_RESOURCE, 3L));
Map<String, Long> customResourceValues =
verifyQueueMetricsForCustomResources(queue);
boolean result = queue.canRunAppAM(appAMResource);
assertTrue("AM should have been allocated!", result);
verifyAMShare(queue, expectedAMShare, customResourceValues);
}
private FSLeafQueue setupQueue(Resource maxShare) {
String queueName = "root.queue1";
FSLeafQueue schedulable = new FSLeafQueue(queueName, scheduler, null);
schedulable.setMaxShare(new ConfigurableResource(maxShare));
schedulable.setMaxAMShare(MAX_AM_SHARE);
return schedulable;
}
@Test
public void testCanRunAppAMReturnsFalse() {
conf.set(YarnConfiguration.RESOURCE_TYPES, CUSTOM_RESOURCE);
ResourceUtils.resetResourceTypes(conf);
resourceManager = new MockRM(conf);
resourceManager.start();
scheduler = (FairScheduler) resourceManager.getResourceScheduler();
Resource maxShare = Resource.newInstance(1024 * 8, 4,
ImmutableMap.of(CUSTOM_RESOURCE, 10L));
// Add a node to increase available memory and vcores in scheduler's
// root queue metrics
addNodeToScheduler(Resource.newInstance(4096, 10,
ImmutableMap.of(CUSTOM_RESOURCE, 25L)));
FSLeafQueue queue = setupQueue(maxShare);
//Min(availableMemory, maxShareMemory (maxResourceOverridden))
// --> Min(4096, 8192) = 4096
//Min(availableVCores, maxShareVCores (maxResourceOverridden))
// --> Min(10, 4) = 4
//Min(available test1, maxShare test1 (maxResourceOverridden))
// --> Min(25, 10) = 10
//MaxAMResource: (4096 MB memory, 4 vcores, 10 test1) * MAX_AM_SHARE
// --> 2048 MB memory, 2 vcores, 5 test1
Resource expectedAMShare = Resource.newInstance(2048, 2,
ImmutableMap.of(CUSTOM_RESOURCE, 5L));
Resource appAMResource = Resource.newInstance(2048, 2,
ImmutableMap.of(CUSTOM_RESOURCE, 6L));
Map<String, Long> customResourceValues =
verifyQueueMetricsForCustomResources(queue);
boolean result = queue.canRunAppAM(appAMResource);
assertFalse("AM should not have been allocated!", result);
verifyAMShare(queue, expectedAMShare, customResourceValues);
}
private void addNodeToScheduler(Resource node1Resource) {
RMNode node1 = MockNodes.newNodeInfo(0, node1Resource, 1, "127.0.0.2");
scheduler.handle(new NodeAddedSchedulerEvent(node1));
}
private void verifyAMShare(FSLeafQueue schedulable,
Resource expectedAMShare, Map<String, Long> customResourceValues) {
Resource actualAMShare = Resource.newInstance(
schedulable.getMetrics().getMaxAMShareMB(),
schedulable.getMetrics().getMaxAMShareVCores(), customResourceValues);
long customResourceValue =
actualAMShare.getResourceValue(CUSTOM_RESOURCE);
//make sure to verify custom resource value explicitly!
assertEquals(5L, customResourceValue);
assertEquals("AM share is not the expected!", expectedAMShare,
actualAMShare);
}
private Map<String, Long> verifyQueueMetricsForCustomResources(
FSLeafQueue schedulable) {
QueueMetricsCustomResource maxAMShareCustomResources =
schedulable.getMetrics().getCustomResources().getMaxAMShare();
Map<String, Long> customResourceValues = maxAMShareCustomResources
.getValues();
assertNotNull("Queue metrics for custom resources should not be null!",
maxAMShareCustomResources);
assertNotNull("Queue metrics for custom resources resource values " +
"should not be null!", customResourceValues);
return customResourceValues;
}
}