MAPREDUCE-7048. Uber AM can crash due to unknown task in statusUpdate. Contributed by Peter Bacsko

(cherry picked from commit 08feac4c83)
This commit is contained in:
Jason Lowe 2018-02-12 14:30:42 -06:00
parent ed512645b2
commit b8b67e4b8d
2 changed files with 95 additions and 5 deletions

View File

@ -195,6 +195,7 @@ abstract public class Task implements Writable, Configurable {
protected SecretKey tokenSecret;
protected SecretKey shuffleSecret;
protected GcTimeUpdater gcUpdater;
private boolean uberized = false;
////////////////////////////////////////////
// Constructors
@ -785,9 +786,6 @@ abstract public class Task implements Writable, Configurable {
long taskProgressInterval = MRJobConfUtil.
getTaskProgressReportInterval(conf);
boolean uberized = conf.getBoolean("mapreduce.task.uberized",
false);
while (!taskDone.get()) {
synchronized (lock) {
done = false;
@ -1164,11 +1162,17 @@ abstract public class Task implements Writable, Configurable {
public void statusUpdate(TaskUmbilicalProtocol umbilical)
throws IOException {
int retries = MAX_RETRIES;
while (true) {
try {
if (!umbilical.statusUpdate(getTaskID(), taskStatus)) {
LOG.warn("Parent died. Exiting "+taskId);
System.exit(66);
if (uberized) {
LOG.warn("Task no longer available: " + taskId);
break;
} else {
LOG.warn("Parent died. Exiting " + taskId);
ExitUtil.terminate(66);
}
}
taskStatus.clearStatus();
return;
@ -1381,6 +1385,8 @@ abstract public class Task implements Writable, Configurable {
NetUtils.addStaticResolution(name, resolvedName);
}
}
uberized = conf.getBoolean("mapreduce.task.uberized", false);
}
public Configuration getConf() {

View File

@ -0,0 +1,84 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.mapred;
import static org.mockito.Matchers.any;
import static org.mockito.Mockito.when;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.util.ExitUtil;
import org.apache.hadoop.util.ExitUtil.ExitException;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.mockito.Mock;
import org.mockito.runners.MockitoJUnitRunner;
@RunWith(MockitoJUnitRunner.class)
public class TestTask {
@Mock
private TaskUmbilicalProtocol umbilical;
private Task task;
@Before
public void setup() {
task = new StubTask();
ExitUtil.disableSystemExit();
}
@Test
public void testStatusUpdateDoesNotExitInUberMode() throws Exception {
setupTest(true);
task.statusUpdate(umbilical);
}
@Test(expected = ExitException.class)
public void testStatusUpdateExitsInNonUberMode() throws Exception {
setupTest(false);
task.statusUpdate(umbilical);
}
private void setupTest(boolean uberized)
throws IOException, InterruptedException {
Configuration conf = new Configuration(false);
conf.setBoolean("mapreduce.task.uberized", uberized);
task.setConf(conf);
// (false, true) to avoid possible infinite loop
when(umbilical.statusUpdate(any(TaskAttemptID.class),
any(TaskStatus.class))).thenReturn(false, true);
}
public class StubTask extends Task {
@Override
public void run(JobConf job, TaskUmbilicalProtocol umbilical)
throws IOException, ClassNotFoundException, InterruptedException {
// nop
}
@Override
public boolean isMapTask() {
return false;
}
}
}