From 90bc439b6636997cd7880c41125dfcc653c713fe Mon Sep 17 00:00:00 2001 From: Vinod Kumar Vavilapalli Date: Tue, 7 Feb 2012 21:58:54 +0000 Subject: [PATCH] MAPREDUCE-3815. Fixed MR AM to always use hostnames and never IPs when requesting containers so that scheduler can give off data local containers correctly. Contributed by Siddarth Seth. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1241654 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 4 + .../v2/app/job/impl/TaskAttemptImpl.java | 45 ++++++++-- .../v2/app/job/impl/TestTaskAttempt.java | 82 +++++++++++++++++++ .../v2/app/job/impl/TestTaskImpl.java | 2 +- 4 files changed, 126 insertions(+), 7 deletions(-) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index f3a5debfcd1..82a7331a9f0 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -750,6 +750,10 @@ Release 0.23.1 - Unreleased MAPREDUCE-3436. JobHistory webapp address should use the host configured in the jobhistory address. (Ahmed Radwan via sseth) + + MAPREDUCE-3815. Fixed MR AM to always use hostnames and never IPs when + requesting containers so that scheduler can give off data local containers + correctly. (Siddarth Seth via vinodkv) Release 0.23.0 - 2011-11-01 diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java index 4f504b381a4..42a6f08e363 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java @@ -19,7 +19,9 @@ package org.apache.hadoop.mapreduce.v2.app.job.impl; import java.io.IOException; +import java.net.InetAddress; import java.net.InetSocketAddress; +import java.net.UnknownHostException; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Collection; @@ -32,6 +34,7 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock; +import java.util.regex.Pattern; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -142,7 +145,7 @@ public abstract class TaskAttemptImpl implements protected final JobConf conf; protected final Path jobFile; protected final int partition; - protected final EventHandler eventHandler; + protected EventHandler eventHandler; private final TaskAttemptId attemptId; private final Clock clock; private final org.apache.hadoop.mapred.JobID oldJobId; @@ -1056,7 +1059,7 @@ public abstract class TaskAttemptImpl implements } } - private static class RequestContainerTransition implements + static class RequestContainerTransition implements SingleArcTransition { private final boolean rescheduled; public RequestContainerTransition(boolean rescheduled) { @@ -1081,14 +1084,44 @@ public abstract class TaskAttemptImpl implements for (String host : taskAttempt.dataLocalHosts) { racks[i++] = RackResolver.resolve(host).getNetworkLocation(); } - taskAttempt.eventHandler.handle( - new ContainerRequestEvent(taskAttempt.attemptId, - taskAttempt.resourceCapability, - taskAttempt.dataLocalHosts, racks)); + taskAttempt.eventHandler.handle(new ContainerRequestEvent( + taskAttempt.attemptId, taskAttempt.resourceCapability, taskAttempt + .resolveHosts(taskAttempt.dataLocalHosts), racks)); } } } + protected String[] resolveHosts(String[] src) { + String[] result = new String[src.length]; + for (int i = 0; i < src.length; i++) { + if (isIP(src[i])) { + result[i] = resolveHost(src[i]); + } else { + result[i] = src[i]; + } + } + return result; + } + + protected String resolveHost(String src) { + String result = src; // Fallback in case of failure. + try { + InetAddress addr = InetAddress.getByName(src); + result = addr.getHostName(); + } catch (UnknownHostException e) { + LOG.warn("Failed to resolve address: " + src + + ". Continuing to use the same."); + } + return result; + } + + private static final Pattern ipPattern = // Pattern for matching ip + Pattern.compile("\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}"); + + protected boolean isIP(String src) { + return ipPattern.matcher(src).matches(); + } + private static class ContainerAssignedTransition implements SingleArcTransition { @SuppressWarnings({ "unchecked" }) diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskAttempt.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskAttempt.java index 637ae1beff7..68fa6ae9bf6 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskAttempt.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskAttempt.java @@ -18,30 +18,54 @@ package org.apache.hadoop.mapreduce.v2.app.job.impl; +import static org.junit.Assert.assertEquals; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import java.util.HashMap; import java.util.Iterator; import java.util.Map; import junit.framework.Assert; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.MapTaskAttemptImpl; +import org.apache.hadoop.mapreduce.OutputCommitter; import org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent; import org.apache.hadoop.mapreduce.jobhistory.TaskAttemptUnsuccessfulCompletion; +import org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitMetaInfo; +import org.apache.hadoop.mapreduce.v2.api.records.JobId; import org.apache.hadoop.mapreduce.v2.api.records.JobState; import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId; import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptReport; import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptState; import org.apache.hadoop.mapreduce.v2.api.records.TaskId; import org.apache.hadoop.mapreduce.v2.api.records.TaskState; +import org.apache.hadoop.mapreduce.v2.api.records.TaskType; import org.apache.hadoop.mapreduce.v2.app.AppContext; import org.apache.hadoop.mapreduce.v2.app.MRApp; +import org.apache.hadoop.mapreduce.v2.app.TaskAttemptListener; import org.apache.hadoop.mapreduce.v2.app.job.Job; import org.apache.hadoop.mapreduce.v2.app.job.Task; import org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt; import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptDiagnosticsUpdateEvent; import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent; import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEventType; +import org.apache.hadoop.mapreduce.v2.app.rm.ContainerRequestEvent; +import org.apache.hadoop.mapreduce.v2.util.MRBuilderUtils; +import org.apache.hadoop.yarn.Clock; +import org.apache.hadoop.yarn.SystemClock; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.event.Event; import org.apache.hadoop.yarn.event.EventHandler; +import org.apache.hadoop.yarn.util.BuilderUtils; import org.junit.Test; +import org.mockito.ArgumentCaptor; @SuppressWarnings("unchecked") public class TestTaskAttempt{ @@ -57,6 +81,64 @@ public class TestTaskAttempt{ MRApp app = new FailingAttemptsMRApp(0, 1); testMRAppHistory(app); } + + @SuppressWarnings("rawtypes") + @Test + public void testHostResolveAttempt() throws Exception { + TaskAttemptImpl.RequestContainerTransition rct = + new TaskAttemptImpl.RequestContainerTransition(false); + + EventHandler eventHandler = mock(EventHandler.class); + String[] hosts = new String[3]; + hosts[0] = "192.168.1.1"; + hosts[1] = "host2"; + hosts[2] = "host3"; + TaskSplitMetaInfo splitInfo = + new TaskSplitMetaInfo(hosts, 0, 128 * 1024 * 1024l); + + TaskAttemptImpl mockTaskAttempt = + createMapTaskAttemptImplForTest(eventHandler, splitInfo); + TaskAttemptImpl spyTa = spy(mockTaskAttempt); + when(spyTa.resolveHost(hosts[0])).thenReturn("host1"); + + TaskAttemptEvent mockTAEvent = mock(TaskAttemptEvent.class); + rct.transition(spyTa, mockTAEvent); + verify(spyTa).resolveHost(hosts[0]); + ArgumentCaptor arg = ArgumentCaptor.forClass(Event.class); + verify(eventHandler, times(2)).handle(arg.capture()); + if (!(arg.getAllValues().get(1) instanceof ContainerRequestEvent)) { + Assert.fail("Second Event not of type ContainerRequestEvent"); + } + Map expected = new HashMap(); + expected.put("host1", true); + expected.put("host2", true); + expected.put("host3", true); + ContainerRequestEvent cre = + (ContainerRequestEvent) arg.getAllValues().get(1); + String[] requestedHosts = cre.getHosts(); + for (String h : requestedHosts) { + expected.remove(h); + } + assertEquals(0, expected.size()); + } + + @SuppressWarnings("rawtypes") + private TaskAttemptImpl createMapTaskAttemptImplForTest( + EventHandler eventHandler, TaskSplitMetaInfo taskSplitMetaInfo) { + ApplicationId appId = BuilderUtils.newApplicationId(1, 1); + JobId jobId = MRBuilderUtils.newJobId(appId, 1); + TaskId taskId = MRBuilderUtils.newTaskId(jobId, 1, TaskType.MAP); + TaskAttemptListener taListener = mock(TaskAttemptListener.class); + Path jobFile = mock(Path.class); + JobConf jobConf = new JobConf(); + OutputCommitter outputCommitter = mock(OutputCommitter.class); + Clock clock = new SystemClock(); + TaskAttemptImpl taImpl = + new MapTaskAttemptImpl(taskId, 1, eventHandler, jobFile, 1, + taskSplitMetaInfo, jobConf, taListener, outputCommitter, null, + null, clock); + return taImpl; + } private void testMRAppHistory(MRApp app) throws Exception { Configuration conf = new Configuration(); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskImpl.java index b55bae22546..4dcb96a561c 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskImpl.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskImpl.java @@ -59,7 +59,7 @@ import org.junit.After; import org.junit.Before; import org.junit.Test; -@SuppressWarnings({ "rawtypes", "deprecation" }) +@SuppressWarnings("rawtypes") public class TestTaskImpl { private static final Log LOG = LogFactory.getLog(TestTaskImpl.class);