diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java index 3713cfb814b..ecc1d1be062 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java @@ -61,6 +61,7 @@ import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptKillEvent; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.StringInterner; +import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerExitStatus; @@ -77,6 +78,7 @@ import org.apache.hadoop.yarn.client.api.NMTokenCache; import org.apache.hadoop.yarn.exceptions.ApplicationAttemptNotFoundException; import org.apache.hadoop.yarn.exceptions.ApplicationMasterNotRegisteredException; +import org.apache.hadoop.yarn.exceptions.InvalidLabelResourceRequestException; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.security.AMRMTokenIdentifier; @@ -730,6 +732,16 @@ private List getResources() throws Exception { register(); addOutstandingRequestOnResync(); return null; + } catch (InvalidLabelResourceRequestException e) { + // If Invalid label exception is received means the requested label doesnt + // have access so killing job in this case. + String diagMsg = "Requested node-label-expression is invalid: " + + StringUtils.stringifyException(e); + LOG.info(diagMsg); + JobId jobId = this.getJob().getID(); + eventHandler.handle(new JobDiagnosticsUpdateEvent(jobId, diagMsg)); + eventHandler.handle(new JobEvent(jobId, JobEventType.JOB_KILL)); + throw e; } catch (Exception e) { // This can happen when the connection to the RM has gone down. Keep // re-trying until the retryInterval has expired. diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 10d97f3ad39..f250ee2545d 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -631,6 +631,9 @@ Release 2.8.0 - UNRELEASED YARN-4537. Pull out priority comparison from fifocomparator and use compound comparator for FifoOrdering policy. (Rohith Sharma K S via jianhe) + YARN-4582. Label-related invalid resource request exception should be able to + properly handled by application. (Bibin A Chundatt via wangda) + OPTIMIZATIONS YARN-3339. TestDockerContainerExecutor should pull a single image and not diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/exceptions/InvalidLabelResourceRequestException.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/exceptions/InvalidLabelResourceRequestException.java new file mode 100644 index 00000000000..219b1eda150 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/exceptions/InvalidLabelResourceRequestException.java @@ -0,0 +1,49 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.exceptions; + +import org.apache.hadoop.yarn.api.ApplicationMasterProtocol; +import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest; +import org.apache.hadoop.yarn.api.records.ResourceRequest; + +/** + * This exception is thrown when a resource requested via + * {@link ResourceRequest} in the + * {@link ApplicationMasterProtocol#allocate(AllocateRequest)} when requested + * label is not having permission to access. + * + */ +public class InvalidLabelResourceRequestException + extends InvalidResourceRequestException { + + private static final long serialVersionUID = 13498237L; + + public InvalidLabelResourceRequestException(Throwable cause) { + super(cause); + } + + public InvalidLabelResourceRequestException(String message) { + super(message); + } + + public InvalidLabelResourceRequestException(String message, Throwable cause) { + super(message, cause); + } + +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerUtils.java index abefee84ed6..a80e921cb11 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerUtils.java @@ -32,6 +32,7 @@ import org.apache.hadoop.yarn.api.records.QueueInfo; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceRequest; +import org.apache.hadoop.yarn.exceptions.InvalidLabelResourceRequestException; import org.apache.hadoop.yarn.exceptions.InvalidResourceRequestException; import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; @@ -291,7 +292,7 @@ private static void validateResourceRequest(ResourceRequest resReq, // we don't allow specify label expression with more than one node labels now if (labelExp != null && labelExp.contains("&&")) { - throw new InvalidResourceRequestException( + throw new InvalidLabelResourceRequestException( "Invailid resource request, queue=" + queueInfo.getQueueName() + " specified more than one node label " + "in a node label expression, node label expression = " @@ -301,7 +302,8 @@ private static void validateResourceRequest(ResourceRequest resReq, if (labelExp != null && !labelExp.trim().isEmpty() && queueInfo != null) { if (!checkQueueLabelExpression(queueInfo.getAccessibleNodeLabels(), labelExp, rmContext)) { - throw new InvalidResourceRequestException("Invalid resource request" + throw new InvalidLabelResourceRequestException( + "Invalid resource request" + ", queue=" + queueInfo.getQueueName() + " doesn't have permission to access all labels " diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestSchedulerUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestSchedulerUtils.java index ab053d33098..0e84d38bcc4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestSchedulerUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestSchedulerUtils.java @@ -60,6 +60,7 @@ import org.apache.hadoop.yarn.api.records.impl.pb.ResourceRequestPBImpl; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.EventHandler; +import org.apache.hadoop.yarn.exceptions.InvalidLabelResourceRequestException; import org.apache.hadoop.yarn.exceptions.InvalidResourceBlacklistRequestException; import org.apache.hadoop.yarn.exceptions.InvalidResourceRequestException; import org.apache.hadoop.yarn.ipc.YarnRPC; @@ -191,7 +192,7 @@ public void testNormalizeRequestWithDominantResourceCalculator() { assertEquals(2048, ask.getCapability().getMemory()); } - @Test (timeout = 30000) + @Test(timeout = 30000) public void testValidateResourceRequestWithErrorLabelsPermission() throws IOException { // mock queue and scheduler @@ -336,7 +337,7 @@ public void testValidateResourceRequestWithErrorLabelsPermission() e.printStackTrace(); fail("Should be valid when request labels is empty"); } - + boolean invalidlabelexception=false; // queue doesn't have label, failed (when request any label) try { // set queue accessible node labels to empty @@ -354,12 +355,15 @@ public void testValidateResourceRequestWithErrorLabelsPermission() SchedulerUtils.normalizeAndvalidateRequest(resReq, maxResource, "queue", scheduler, rmContext); fail("Should fail"); + } catch (InvalidLabelResourceRequestException e) { + invalidlabelexception=true; } catch (InvalidResourceRequestException e) { } finally { rmContext.getNodeLabelManager().removeFromClusterNodeLabels( Arrays.asList("x")); } - + Assert.assertTrue("InvalidLabelResourceRequestException excpeted", + invalidlabelexception); // queue is "*", always succeeded try { // set queue accessible node labels to empty