YARN-4582. Label-related invalid resource request exception should be able to properly handled by application. (Bibin A Chundatt via wangda)
(cherry picked from commit 9e792da014
)
This commit is contained in:
parent
df95343047
commit
15e174fe79
|
@ -61,6 +61,7 @@ import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEventType;
|
||||||
import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptKillEvent;
|
import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptKillEvent;
|
||||||
import org.apache.hadoop.security.UserGroupInformation;
|
import org.apache.hadoop.security.UserGroupInformation;
|
||||||
import org.apache.hadoop.util.StringInterner;
|
import org.apache.hadoop.util.StringInterner;
|
||||||
|
import org.apache.hadoop.util.StringUtils;
|
||||||
import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
|
import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
|
||||||
import org.apache.hadoop.yarn.api.records.Container;
|
import org.apache.hadoop.yarn.api.records.Container;
|
||||||
import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
|
import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
|
||||||
|
@ -77,6 +78,7 @@ import org.apache.hadoop.yarn.client.ClientRMProxy;
|
||||||
import org.apache.hadoop.yarn.client.api.NMTokenCache;
|
import org.apache.hadoop.yarn.client.api.NMTokenCache;
|
||||||
import org.apache.hadoop.yarn.exceptions.ApplicationAttemptNotFoundException;
|
import org.apache.hadoop.yarn.exceptions.ApplicationAttemptNotFoundException;
|
||||||
import org.apache.hadoop.yarn.exceptions.ApplicationMasterNotRegisteredException;
|
import org.apache.hadoop.yarn.exceptions.ApplicationMasterNotRegisteredException;
|
||||||
|
import org.apache.hadoop.yarn.exceptions.InvalidLabelResourceRequestException;
|
||||||
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
|
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
|
||||||
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
|
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
|
||||||
import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;
|
import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;
|
||||||
|
@ -730,6 +732,16 @@ public class RMContainerAllocator extends RMContainerRequestor
|
||||||
register();
|
register();
|
||||||
addOutstandingRequestOnResync();
|
addOutstandingRequestOnResync();
|
||||||
return null;
|
return null;
|
||||||
|
} catch (InvalidLabelResourceRequestException e) {
|
||||||
|
// If Invalid label exception is received means the requested label doesnt
|
||||||
|
// have access so killing job in this case.
|
||||||
|
String diagMsg = "Requested node-label-expression is invalid: "
|
||||||
|
+ StringUtils.stringifyException(e);
|
||||||
|
LOG.info(diagMsg);
|
||||||
|
JobId jobId = this.getJob().getID();
|
||||||
|
eventHandler.handle(new JobDiagnosticsUpdateEvent(jobId, diagMsg));
|
||||||
|
eventHandler.handle(new JobEvent(jobId, JobEventType.JOB_KILL));
|
||||||
|
throw e;
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
// This can happen when the connection to the RM has gone down. Keep
|
// This can happen when the connection to the RM has gone down. Keep
|
||||||
// re-trying until the retryInterval has expired.
|
// re-trying until the retryInterval has expired.
|
||||||
|
|
|
@ -631,6 +631,9 @@ Release 2.8.0 - UNRELEASED
|
||||||
YARN-4537. Pull out priority comparison from fifocomparator and use compound
|
YARN-4537. Pull out priority comparison from fifocomparator and use compound
|
||||||
comparator for FifoOrdering policy. (Rohith Sharma K S via jianhe)
|
comparator for FifoOrdering policy. (Rohith Sharma K S via jianhe)
|
||||||
|
|
||||||
|
YARN-4582. Label-related invalid resource request exception should be able to
|
||||||
|
properly handled by application. (Bibin A Chundatt via wangda)
|
||||||
|
|
||||||
OPTIMIZATIONS
|
OPTIMIZATIONS
|
||||||
|
|
||||||
YARN-3339. TestDockerContainerExecutor should pull a single image and not
|
YARN-3339. TestDockerContainerExecutor should pull a single image and not
|
||||||
|
|
|
@ -0,0 +1,49 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.yarn.exceptions;
|
||||||
|
|
||||||
|
import org.apache.hadoop.yarn.api.ApplicationMasterProtocol;
|
||||||
|
import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This exception is thrown when a resource requested via
|
||||||
|
* {@link ResourceRequest} in the
|
||||||
|
* {@link ApplicationMasterProtocol#allocate(AllocateRequest)} when requested
|
||||||
|
* label is not having permission to access.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class InvalidLabelResourceRequestException
|
||||||
|
extends InvalidResourceRequestException {
|
||||||
|
|
||||||
|
private static final long serialVersionUID = 13498237L;
|
||||||
|
|
||||||
|
public InvalidLabelResourceRequestException(Throwable cause) {
|
||||||
|
super(cause);
|
||||||
|
}
|
||||||
|
|
||||||
|
public InvalidLabelResourceRequestException(String message) {
|
||||||
|
super(message);
|
||||||
|
}
|
||||||
|
|
||||||
|
public InvalidLabelResourceRequestException(String message, Throwable cause) {
|
||||||
|
super(message, cause);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -32,6 +32,7 @@ import org.apache.hadoop.yarn.api.records.QueueACL;
|
||||||
import org.apache.hadoop.yarn.api.records.QueueInfo;
|
import org.apache.hadoop.yarn.api.records.QueueInfo;
|
||||||
import org.apache.hadoop.yarn.api.records.Resource;
|
import org.apache.hadoop.yarn.api.records.Resource;
|
||||||
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
||||||
|
import org.apache.hadoop.yarn.exceptions.InvalidLabelResourceRequestException;
|
||||||
import org.apache.hadoop.yarn.exceptions.InvalidResourceRequestException;
|
import org.apache.hadoop.yarn.exceptions.InvalidResourceRequestException;
|
||||||
import org.apache.hadoop.yarn.factories.RecordFactory;
|
import org.apache.hadoop.yarn.factories.RecordFactory;
|
||||||
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
|
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
|
||||||
|
@ -291,7 +292,7 @@ public class SchedulerUtils {
|
||||||
|
|
||||||
// we don't allow specify label expression with more than one node labels now
|
// we don't allow specify label expression with more than one node labels now
|
||||||
if (labelExp != null && labelExp.contains("&&")) {
|
if (labelExp != null && labelExp.contains("&&")) {
|
||||||
throw new InvalidResourceRequestException(
|
throw new InvalidLabelResourceRequestException(
|
||||||
"Invailid resource request, queue=" + queueInfo.getQueueName()
|
"Invailid resource request, queue=" + queueInfo.getQueueName()
|
||||||
+ " specified more than one node label "
|
+ " specified more than one node label "
|
||||||
+ "in a node label expression, node label expression = "
|
+ "in a node label expression, node label expression = "
|
||||||
|
@ -301,7 +302,8 @@ public class SchedulerUtils {
|
||||||
if (labelExp != null && !labelExp.trim().isEmpty() && queueInfo != null) {
|
if (labelExp != null && !labelExp.trim().isEmpty() && queueInfo != null) {
|
||||||
if (!checkQueueLabelExpression(queueInfo.getAccessibleNodeLabels(),
|
if (!checkQueueLabelExpression(queueInfo.getAccessibleNodeLabels(),
|
||||||
labelExp, rmContext)) {
|
labelExp, rmContext)) {
|
||||||
throw new InvalidResourceRequestException("Invalid resource request"
|
throw new InvalidLabelResourceRequestException(
|
||||||
|
"Invalid resource request"
|
||||||
+ ", queue="
|
+ ", queue="
|
||||||
+ queueInfo.getQueueName()
|
+ queueInfo.getQueueName()
|
||||||
+ " doesn't have permission to access all labels "
|
+ " doesn't have permission to access all labels "
|
||||||
|
|
|
@ -60,6 +60,7 @@ import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
||||||
import org.apache.hadoop.yarn.api.records.impl.pb.ResourceRequestPBImpl;
|
import org.apache.hadoop.yarn.api.records.impl.pb.ResourceRequestPBImpl;
|
||||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
import org.apache.hadoop.yarn.event.EventHandler;
|
import org.apache.hadoop.yarn.event.EventHandler;
|
||||||
|
import org.apache.hadoop.yarn.exceptions.InvalidLabelResourceRequestException;
|
||||||
import org.apache.hadoop.yarn.exceptions.InvalidResourceBlacklistRequestException;
|
import org.apache.hadoop.yarn.exceptions.InvalidResourceBlacklistRequestException;
|
||||||
import org.apache.hadoop.yarn.exceptions.InvalidResourceRequestException;
|
import org.apache.hadoop.yarn.exceptions.InvalidResourceRequestException;
|
||||||
import org.apache.hadoop.yarn.ipc.YarnRPC;
|
import org.apache.hadoop.yarn.ipc.YarnRPC;
|
||||||
|
@ -191,7 +192,7 @@ public class TestSchedulerUtils {
|
||||||
assertEquals(2048, ask.getCapability().getMemory());
|
assertEquals(2048, ask.getCapability().getMemory());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test (timeout = 30000)
|
@Test(timeout = 30000)
|
||||||
public void testValidateResourceRequestWithErrorLabelsPermission()
|
public void testValidateResourceRequestWithErrorLabelsPermission()
|
||||||
throws IOException {
|
throws IOException {
|
||||||
// mock queue and scheduler
|
// mock queue and scheduler
|
||||||
|
@ -336,7 +337,7 @@ public class TestSchedulerUtils {
|
||||||
e.printStackTrace();
|
e.printStackTrace();
|
||||||
fail("Should be valid when request labels is empty");
|
fail("Should be valid when request labels is empty");
|
||||||
}
|
}
|
||||||
|
boolean invalidlabelexception=false;
|
||||||
// queue doesn't have label, failed (when request any label)
|
// queue doesn't have label, failed (when request any label)
|
||||||
try {
|
try {
|
||||||
// set queue accessible node labels to empty
|
// set queue accessible node labels to empty
|
||||||
|
@ -354,12 +355,15 @@ public class TestSchedulerUtils {
|
||||||
SchedulerUtils.normalizeAndvalidateRequest(resReq, maxResource, "queue",
|
SchedulerUtils.normalizeAndvalidateRequest(resReq, maxResource, "queue",
|
||||||
scheduler, rmContext);
|
scheduler, rmContext);
|
||||||
fail("Should fail");
|
fail("Should fail");
|
||||||
|
} catch (InvalidLabelResourceRequestException e) {
|
||||||
|
invalidlabelexception=true;
|
||||||
} catch (InvalidResourceRequestException e) {
|
} catch (InvalidResourceRequestException e) {
|
||||||
} finally {
|
} finally {
|
||||||
rmContext.getNodeLabelManager().removeFromClusterNodeLabels(
|
rmContext.getNodeLabelManager().removeFromClusterNodeLabels(
|
||||||
Arrays.asList("x"));
|
Arrays.asList("x"));
|
||||||
}
|
}
|
||||||
|
Assert.assertTrue("InvalidLabelResourceRequestException excpeted",
|
||||||
|
invalidlabelexception);
|
||||||
// queue is "*", always succeeded
|
// queue is "*", always succeeded
|
||||||
try {
|
try {
|
||||||
// set queue accessible node labels to empty
|
// set queue accessible node labels to empty
|
||||||
|
|
Loading…
Reference in New Issue