MAPREDUCE-3250. When AM restarts, client keeps reconnecting to the new AM and prints a lots of logs. (vinodkv via mahadev) - Merging r1189023 from trunk.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-0.23@1189024 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Mahadev Konar 2011-10-26 04:35:40 +00:00
parent b0384fc087
commit 383b8ed9d9
4 changed files with 303 additions and 222 deletions

View File

@ -1715,6 +1715,9 @@ Release 0.23.0 - Unreleased
MAPREDUCE-3269. Fixed log4j properties to correctly set logging options MAPREDUCE-3269. Fixed log4j properties to correctly set logging options
for JobHistoryServer vis-a-vis JobSummary logs. (mahadev via acmurthy) for JobHistoryServer vis-a-vis JobSummary logs. (mahadev via acmurthy)
MAPREDUCE-3250. When AM restarts, client keeps reconnecting to the new AM
and prints a lots of logs. (vinodkv via mahadev)
Release 0.22.0 - Unreleased Release 0.22.0 - Unreleased
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -81,7 +81,6 @@ public class ClientServiceDelegate {
private final ApplicationId appId; private final ApplicationId appId;
private final ResourceMgrDelegate rm; private final ResourceMgrDelegate rm;
private final MRClientProtocol historyServerProxy; private final MRClientProtocol historyServerProxy;
private boolean forceRefresh;
private MRClientProtocol realProxy = null; private MRClientProtocol realProxy = null;
private RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null); private RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null);
private static String UNKNOWN_USER = "Unknown User"; private static String UNKNOWN_USER = "Unknown User";
@ -122,7 +121,7 @@ public class ClientServiceDelegate {
} }
private MRClientProtocol getProxy() throws YarnRemoteException { private MRClientProtocol getProxy() throws YarnRemoteException {
if (!forceRefresh && realProxy != null) { if (realProxy != null) {
return realProxy; return realProxy;
} }
@ -133,7 +132,9 @@ public class ClientServiceDelegate {
trackingUrl = application.getTrackingUrl(); trackingUrl = application.getTrackingUrl();
} }
String serviceAddr = null; String serviceAddr = null;
while (application == null || YarnApplicationState.RUNNING == application.getYarnApplicationState()) { while (application == null
|| YarnApplicationState.RUNNING == application
.getYarnApplicationState()) {
if (application == null) { if (application == null) {
LOG.info("Could not get Job info from RM for job " + jobId LOG.info("Could not get Job info from RM for job " + jobId
+ ". Redirecting to job history server."); + ". Redirecting to job history server.");
@ -163,7 +164,7 @@ public class ClientServiceDelegate {
} }
LOG.info("Tracking Url of JOB is " + application.getTrackingUrl()); LOG.info("Tracking Url of JOB is " + application.getTrackingUrl());
LOG.info("Connecting to " + serviceAddr); LOG.info("Connecting to " + serviceAddr);
instantiateAMProxy(serviceAddr); realProxy = instantiateAMProxy(serviceAddr);
return realProxy; return realProxy;
} catch (IOException e) { } catch (IOException e) {
//possibly the AM has crashed //possibly the AM has crashed
@ -233,10 +234,12 @@ public class ClientServiceDelegate {
return historyServerProxy; return historyServerProxy;
} }
private void instantiateAMProxy(final String serviceAddr) throws IOException { MRClientProtocol instantiateAMProxy(final String serviceAddr)
throws IOException {
UserGroupInformation currentUser = UserGroupInformation.getCurrentUser(); UserGroupInformation currentUser = UserGroupInformation.getCurrentUser();
LOG.trace("Connecting to ApplicationMaster at: " + serviceAddr); LOG.trace("Connecting to ApplicationMaster at: " + serviceAddr);
realProxy = currentUser.doAs(new PrivilegedAction<MRClientProtocol>() { MRClientProtocol proxy = currentUser
.doAs(new PrivilegedAction<MRClientProtocol>() {
@Override @Override
public MRClientProtocol run() { public MRClientProtocol run() {
YarnRPC rpc = YarnRPC.create(conf); YarnRPC rpc = YarnRPC.create(conf);
@ -245,6 +248,7 @@ public class ClientServiceDelegate {
} }
}); });
LOG.trace("Connected to ApplicationMaster at: " + serviceAddr); LOG.trace("Connected to ApplicationMaster at: " + serviceAddr);
return proxy;
} }
private synchronized Object invoke(String method, Class argClass, private synchronized Object invoke(String method, Class argClass,
@ -274,12 +278,14 @@ public class ClientServiceDelegate {
" retrying.."); " retrying..");
LOG.debug("Failed exception on AM/History contact", LOG.debug("Failed exception on AM/History contact",
e.getTargetException()); e.getTargetException());
forceRefresh = true; // Force reconnection by setting the proxy to null.
realProxy = null;
} catch (Exception e) { } catch (Exception e) {
LOG.info("Failed to contact AM/History for job " + jobId LOG.info("Failed to contact AM/History for job " + jobId
+ " Will retry.."); + " Will retry..");
LOG.debug("Failing to contact application master", e); LOG.debug("Failing to contact application master", e);
forceRefresh = true; // Force reconnection by setting the proxy to null.
realProxy = null;
} }
} }
} }

View File

@ -68,8 +68,6 @@ import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.yarn.YarnException; import org.apache.hadoop.yarn.YarnException;
import org.apache.hadoop.yarn.api.ClientRMProtocol; import org.apache.hadoop.yarn.api.ClientRMProtocol;
import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest;
import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationResponse;
import org.apache.hadoop.yarn.api.protocolrecords.GetAllApplicationsRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetAllApplicationsRequest;
import org.apache.hadoop.yarn.api.protocolrecords.GetAllApplicationsResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetAllApplicationsResponse;
import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest;
@ -84,6 +82,8 @@ import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoRequest;
import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoResponse;
import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoRequest;
import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoResponse;
import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest;
import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationResponse;
import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest; import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest;
import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationResponse; import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationResponse;
import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationId;
@ -123,20 +123,24 @@ public class TestClientRedirect {
conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.YARN_FRAMEWORK_NAME); conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.YARN_FRAMEWORK_NAME);
conf.set(YarnConfiguration.RM_ADDRESS, RMADDRESS); conf.set(YarnConfiguration.RM_ADDRESS, RMADDRESS);
conf.set(JHAdminConfig.MR_HISTORY_ADDRESS, HSHOSTADDRESS); conf.set(JHAdminConfig.MR_HISTORY_ADDRESS, HSHOSTADDRESS);
// Start the RM.
RMService rmService = new RMService("test"); RMService rmService = new RMService("test");
rmService.init(conf); rmService.init(conf);
rmService.start(); rmService.start();
// Start the AM.
AMService amService = new AMService(); AMService amService = new AMService();
amService.init(conf); amService.init(conf);
amService.start(conf); amService.start(conf);
amRunning = true;
// Start the HS.
HistoryService historyService = new HistoryService(); HistoryService historyService = new HistoryService();
historyService.init(conf); historyService.init(conf);
historyService.start(conf); historyService.start(conf);
LOG.info("services started"); LOG.info("services started");
Cluster cluster = new Cluster(conf); Cluster cluster = new Cluster(conf);
org.apache.hadoop.mapreduce.JobID jobID = org.apache.hadoop.mapreduce.JobID jobID =
new org.apache.hadoop.mapred.JobID("201103121733", 1); new org.apache.hadoop.mapred.JobID("201103121733", 1);
@ -151,13 +155,13 @@ public class TestClientRedirect {
//bring down the AM service //bring down the AM service
amService.stop(); amService.stop();
amRunning = false;
LOG.info("Sleeping for 5 seconds after stop for" + LOG.info("Sleeping for 5 seconds after stop for" +
" the server to exit cleanly.."); " the server to exit cleanly..");
Thread.sleep(5000); Thread.sleep(5000);
amRestarting = true; amRestarting = true;
// Same client // Same client
//results are returned from fake (not started job) //results are returned from fake (not started job)
counters = cluster.getJob(jobID).getCounters(); counters = cluster.getJob(jobID).getCounters();
@ -181,14 +185,15 @@ public class TestClientRedirect {
amService = new AMService(); amService = new AMService();
amService.init(conf); amService.init(conf);
amService.start(conf); amService.start(conf);
amRunning = true;
amContact = false; //reset amContact = false; //reset
counters = cluster.getJob(jobID).getCounters(); counters = cluster.getJob(jobID).getCounters();
validateCounters(counters); validateCounters(counters);
Assert.assertTrue(amContact); Assert.assertTrue(amContact);
amRunning = false; // Stop the AM. It is not even restarting. So it should be treated as
// completed.
amService.stop();
// Same client // Same client
counters = cluster.getJob(jobID).getCounters(); counters = cluster.getJob(jobID).getCounters();
@ -347,6 +352,7 @@ public class TestClientRedirect {
private InetSocketAddress bindAddress; private InetSocketAddress bindAddress;
private Server server; private Server server;
private final String hostAddress; private final String hostAddress;
public AMService() { public AMService() {
this(AMHOSTADDRESS); this(AMHOSTADDRESS);
} }
@ -376,11 +382,13 @@ public class TestClientRedirect {
NetUtils.createSocketAddr(hostNameResolved.getHostAddress() NetUtils.createSocketAddr(hostNameResolved.getHostAddress()
+ ":" + server.getPort()); + ":" + server.getPort());
super.start(); super.start();
amRunning = true;
} }
public void stop() { public void stop() {
server.stop(); server.stop();
super.stop(); super.stop();
amRunning = false;
} }
@Override @Override

View File

@ -1,208 +1,272 @@
/** /**
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file * regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the * to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance * "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at * with the License. You may obtain a copy of the License at
* *
* http://www.apache.org/licenses/LICENSE-2.0 * http://www.apache.org/licenses/LICENSE-2.0
* *
* Unless required by applicable law or agreed to in writing, software * Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, * distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
package org.apache.hadoop.mapred; package org.apache.hadoop.mapred;
import static org.mockito.Mockito.mock; import static org.mockito.Matchers.any;
import static org.mockito.Mockito.when; import static org.mockito.Mockito.*;
import junit.framework.Assert;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.JobID; import junit.framework.Assert;
import org.apache.hadoop.mapreduce.JobStatus;
import org.apache.hadoop.mapreduce.MRConfig; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.TypeConverter; import org.apache.hadoop.mapreduce.JobID;
import org.apache.hadoop.mapreduce.v2.api.MRClientProtocol; import org.apache.hadoop.mapreduce.JobStatus;
import org.apache.hadoop.mapreduce.v2.api.protocolrecords.GetJobReportRequest; import org.apache.hadoop.mapreduce.MRConfig;
import org.apache.hadoop.mapreduce.v2.api.protocolrecords.GetJobReportResponse; import org.apache.hadoop.mapreduce.TypeConverter;
import org.apache.hadoop.mapreduce.v2.api.records.JobReport; import org.apache.hadoop.mapreduce.v2.api.MRClientProtocol;
import org.apache.hadoop.mapreduce.v2.api.records.JobState; import org.apache.hadoop.mapreduce.v2.api.protocolrecords.GetJobReportRequest;
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.mapreduce.v2.api.protocolrecords.GetJobReportResponse;
import org.apache.hadoop.yarn.api.records.ApplicationReport; import org.apache.hadoop.mapreduce.v2.api.records.JobReport;
import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.mapreduce.v2.api.records.JobState;
import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.mapreduce.v2.util.MRBuilderUtils;
import org.apache.hadoop.yarn.exceptions.YarnRemoteException; import org.apache.hadoop.yarn.api.records.ApplicationReport;
import org.apache.hadoop.yarn.ipc.RPCUtil; import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
import org.apache.hadoop.yarn.util.Records; import org.apache.hadoop.yarn.api.records.YarnApplicationState;
import org.junit.Test; import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
/** import org.apache.hadoop.yarn.ipc.RPCUtil;
* Tests for ClientServiceDelegate.java import org.apache.hadoop.yarn.util.BuilderUtils;
*/ import org.apache.hadoop.yarn.util.Records;
import org.junit.Test;
public class TestClientServiceDelegate {
private JobID oldJobId = JobID.forName("job_1315895242400_2"); /**
private org.apache.hadoop.mapreduce.v2.api.records.JobId jobId = TypeConverter * Tests for ClientServiceDelegate.java
.toYarn(oldJobId); */
@Test public class TestClientServiceDelegate {
public void testUnknownAppInRM() throws Exception { private JobID oldJobId = JobID.forName("job_1315895242400_2");
MRClientProtocol historyServerProxy = mock(MRClientProtocol.class); private org.apache.hadoop.mapreduce.v2.api.records.JobId jobId = TypeConverter
when(historyServerProxy.getJobReport(getJobReportRequest())).thenReturn( .toYarn(oldJobId);
getJobReportResponse());
ClientServiceDelegate clientServiceDelegate = getClientServiceDelegate( @Test
historyServerProxy, getRMDelegate()); public void testUnknownAppInRM() throws Exception {
MRClientProtocol historyServerProxy = mock(MRClientProtocol.class);
JobStatus jobStatus = clientServiceDelegate.getJobStatus(oldJobId); when(historyServerProxy.getJobReport(getJobReportRequest())).thenReturn(
Assert.assertNotNull(jobStatus); getJobReportResponse());
} ClientServiceDelegate clientServiceDelegate = getClientServiceDelegate(
historyServerProxy, getRMDelegate());
@Test
public void testRemoteExceptionFromHistoryServer() throws Exception { JobStatus jobStatus = clientServiceDelegate.getJobStatus(oldJobId);
Assert.assertNotNull(jobStatus);
MRClientProtocol historyServerProxy = mock(MRClientProtocol.class); }
when(historyServerProxy.getJobReport(getJobReportRequest())).thenThrow(
RPCUtil.getRemoteException("Job ID doesnot Exist")); @Test
public void testRemoteExceptionFromHistoryServer() throws Exception {
ResourceMgrDelegate rm = mock(ResourceMgrDelegate.class);
when(rm.getApplicationReport(TypeConverter.toYarn(oldJobId).getAppId())) MRClientProtocol historyServerProxy = mock(MRClientProtocol.class);
.thenReturn(null); when(historyServerProxy.getJobReport(getJobReportRequest())).thenThrow(
RPCUtil.getRemoteException("Job ID doesnot Exist"));
ClientServiceDelegate clientServiceDelegate = getClientServiceDelegate(
historyServerProxy, rm); ResourceMgrDelegate rm = mock(ResourceMgrDelegate.class);
when(rm.getApplicationReport(TypeConverter.toYarn(oldJobId).getAppId()))
try { .thenReturn(null);
clientServiceDelegate.getJobStatus(oldJobId);
Assert.fail("Invoke should throw exception after retries."); ClientServiceDelegate clientServiceDelegate = getClientServiceDelegate(
} catch (YarnRemoteException e) { historyServerProxy, rm);
Assert.assertEquals("Job ID doesnot Exist", e.getMessage());
} try {
} clientServiceDelegate.getJobStatus(oldJobId);
Assert.fail("Invoke should throw exception after retries.");
@Test } catch (YarnRemoteException e) {
public void testRetriesOnConnectionFailure() throws Exception { Assert.assertEquals("Job ID doesnot Exist", e.getMessage());
}
MRClientProtocol historyServerProxy = mock(MRClientProtocol.class); }
when(historyServerProxy.getJobReport(getJobReportRequest())).thenThrow(
new RuntimeException("1")).thenThrow(new RuntimeException("2")) @Test
.thenThrow(new RuntimeException("3")) public void testRetriesOnConnectionFailure() throws Exception {
.thenReturn(getJobReportResponse());
MRClientProtocol historyServerProxy = mock(MRClientProtocol.class);
ResourceMgrDelegate rm = mock(ResourceMgrDelegate.class); when(historyServerProxy.getJobReport(getJobReportRequest())).thenThrow(
when(rm.getApplicationReport(TypeConverter.toYarn(oldJobId).getAppId())) new RuntimeException("1")).thenThrow(new RuntimeException("2"))
.thenReturn(null); .thenThrow(new RuntimeException("3"))
.thenReturn(getJobReportResponse());
ClientServiceDelegate clientServiceDelegate = getClientServiceDelegate(
historyServerProxy, rm); ResourceMgrDelegate rm = mock(ResourceMgrDelegate.class);
when(rm.getApplicationReport(TypeConverter.toYarn(oldJobId).getAppId()))
JobStatus jobStatus = clientServiceDelegate.getJobStatus(oldJobId); .thenReturn(null);
Assert.assertNotNull(jobStatus);
} ClientServiceDelegate clientServiceDelegate = getClientServiceDelegate(
historyServerProxy, rm);
@Test
public void testHistoryServerNotConfigured() throws Exception { JobStatus jobStatus = clientServiceDelegate.getJobStatus(oldJobId);
//RM doesn't have app report and job History Server is not configured Assert.assertNotNull(jobStatus);
ClientServiceDelegate clientServiceDelegate = getClientServiceDelegate( verify(historyServerProxy, times(4)).getJobReport(
null, getRMDelegate()); any(GetJobReportRequest.class));
JobStatus jobStatus = clientServiceDelegate.getJobStatus(oldJobId); }
Assert.assertEquals("N/A", jobStatus.getUsername());
Assert.assertEquals(JobStatus.State.PREP, jobStatus.getState()); @Test
public void testHistoryServerNotConfigured() throws Exception {
//RM has app report and job History Server is not configured //RM doesn't have app report and job History Server is not configured
ResourceMgrDelegate rm = mock(ResourceMgrDelegate.class); ClientServiceDelegate clientServiceDelegate = getClientServiceDelegate(
ApplicationReport applicationReport = getApplicationReport(); null, getRMDelegate());
when(rm.getApplicationReport(jobId.getAppId())).thenReturn( JobStatus jobStatus = clientServiceDelegate.getJobStatus(oldJobId);
applicationReport); Assert.assertEquals("N/A", jobStatus.getUsername());
Assert.assertEquals(JobStatus.State.PREP, jobStatus.getState());
clientServiceDelegate = getClientServiceDelegate(null, rm);
jobStatus = clientServiceDelegate.getJobStatus(oldJobId); //RM has app report and job History Server is not configured
Assert.assertEquals(applicationReport.getUser(), jobStatus.getUsername()); ResourceMgrDelegate rm = mock(ResourceMgrDelegate.class);
Assert.assertEquals(JobStatus.State.SUCCEEDED, jobStatus.getState()); ApplicationReport applicationReport = getFinishedApplicationReport();
} when(rm.getApplicationReport(jobId.getAppId())).thenReturn(
applicationReport);
@Test clientServiceDelegate = getClientServiceDelegate(null, rm);
public void testJobReportFromHistoryServer() throws Exception { jobStatus = clientServiceDelegate.getJobStatus(oldJobId);
MRClientProtocol historyServerProxy = mock(MRClientProtocol.class); Assert.assertEquals(applicationReport.getUser(), jobStatus.getUsername());
when(historyServerProxy.getJobReport(getJobReportRequest())).thenReturn( Assert.assertEquals(JobStatus.State.SUCCEEDED, jobStatus.getState());
getJobReportResponseFromHistoryServer()); }
ResourceMgrDelegate rm = mock(ResourceMgrDelegate.class);
when(rm.getApplicationReport(TypeConverter.toYarn(oldJobId).getAppId())) @Test
.thenReturn(null); public void testJobReportFromHistoryServer() throws Exception {
ClientServiceDelegate clientServiceDelegate = getClientServiceDelegate( MRClientProtocol historyServerProxy = mock(MRClientProtocol.class);
historyServerProxy, rm); when(historyServerProxy.getJobReport(getJobReportRequest())).thenReturn(
getJobReportResponseFromHistoryServer());
JobStatus jobStatus = clientServiceDelegate.getJobStatus(oldJobId); ResourceMgrDelegate rm = mock(ResourceMgrDelegate.class);
Assert.assertNotNull(jobStatus); when(rm.getApplicationReport(TypeConverter.toYarn(oldJobId).getAppId()))
Assert.assertEquals("TestJobFilePath", jobStatus.getJobFile()); .thenReturn(null);
Assert.assertEquals("http://TestTrackingUrl", jobStatus.getTrackingUrl()); ClientServiceDelegate clientServiceDelegate = getClientServiceDelegate(
Assert.assertEquals(1.0f, jobStatus.getMapProgress()); historyServerProxy, rm);
Assert.assertEquals(1.0f, jobStatus.getReduceProgress());
} JobStatus jobStatus = clientServiceDelegate.getJobStatus(oldJobId);
Assert.assertNotNull(jobStatus);
private GetJobReportRequest getJobReportRequest() { Assert.assertEquals("TestJobFilePath", jobStatus.getJobFile());
GetJobReportRequest request = Records.newRecord(GetJobReportRequest.class); Assert.assertEquals("http://TestTrackingUrl", jobStatus.getTrackingUrl());
request.setJobId(jobId); Assert.assertEquals(1.0f, jobStatus.getMapProgress());
return request; Assert.assertEquals(1.0f, jobStatus.getReduceProgress());
} }
private GetJobReportResponse getJobReportResponse() { @Test
GetJobReportResponse jobReportResponse = Records public void testReconnectOnAMRestart() throws IOException {
.newRecord(GetJobReportResponse.class);
JobReport jobReport = Records.newRecord(JobReport.class); MRClientProtocol historyServerProxy = mock(MRClientProtocol.class);
jobReport.setJobId(jobId);
jobReport.setJobState(JobState.SUCCEEDED); // RM returns AM1 url, null, null and AM2 url on invocations.
jobReportResponse.setJobReport(jobReport); // Nulls simulate the time when AM2 is in the process of restarting.
return jobReportResponse; ResourceMgrDelegate rmDelegate = mock(ResourceMgrDelegate.class);
} when(rmDelegate.getApplicationReport(jobId.getAppId())).thenReturn(
getRunningApplicationReport("am1", 78)).thenReturn(
private ApplicationReport getApplicationReport() { getRunningApplicationReport(null, 0)).thenReturn(
ApplicationReport applicationReport = Records getRunningApplicationReport(null, 0)).thenReturn(
.newRecord(ApplicationReport.class); getRunningApplicationReport("am2", 90));
applicationReport.setYarnApplicationState(YarnApplicationState.FINISHED);
applicationReport.setUser("root"); GetJobReportResponse jobReportResponse1 = mock(GetJobReportResponse.class);
applicationReport.setHost("N/A"); when(jobReportResponse1.getJobReport()).thenReturn(
applicationReport.setName("N/A"); MRBuilderUtils.newJobReport(jobId, "jobName-firstGen", "user",
applicationReport.setQueue("N/A"); JobState.RUNNING, 0, 0, 0, 0, 0, 0, "anything"));
applicationReport.setStartTime(0);
applicationReport.setFinishTime(0); // First AM returns a report with jobName firstGen and simulates AM shutdown
applicationReport.setTrackingUrl("N/A"); // on second invocation.
applicationReport.setDiagnostics("N/A"); MRClientProtocol firstGenAMProxy = mock(MRClientProtocol.class);
applicationReport.setFinalApplicationStatus(FinalApplicationStatus.SUCCEEDED); when(firstGenAMProxy.getJobReport(any(GetJobReportRequest.class)))
return applicationReport; .thenReturn(jobReportResponse1).thenThrow(
} new RuntimeException("AM is down!"));
private ResourceMgrDelegate getRMDelegate() throws YarnRemoteException { GetJobReportResponse jobReportResponse2 = mock(GetJobReportResponse.class);
ResourceMgrDelegate rm = mock(ResourceMgrDelegate.class); when(jobReportResponse2.getJobReport()).thenReturn(
when(rm.getApplicationReport(jobId.getAppId())).thenReturn(null); MRBuilderUtils.newJobReport(jobId, "jobName-secondGen", "user",
return rm; JobState.RUNNING, 0, 0, 0, 0, 0, 0, "anything"));
}
// Second AM generation returns a report with jobName secondGen
private ClientServiceDelegate getClientServiceDelegate( MRClientProtocol secondGenAMProxy = mock(MRClientProtocol.class);
MRClientProtocol historyServerProxy, ResourceMgrDelegate rm) { when(secondGenAMProxy.getJobReport(any(GetJobReportRequest.class)))
Configuration conf = new YarnConfiguration(); .thenReturn(jobReportResponse2);
conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.YARN_FRAMEWORK_NAME);
ClientServiceDelegate clientServiceDelegate = new ClientServiceDelegate( ClientServiceDelegate clientServiceDelegate = spy(getClientServiceDelegate(
conf, rm, oldJobId, historyServerProxy); historyServerProxy, rmDelegate));
return clientServiceDelegate; // First time, connection should be to AM1, then to AM2. Further requests
} // should use the same proxy to AM2 and so instantiateProxy shouldn't be
// called.
private GetJobReportResponse getJobReportResponseFromHistoryServer() { doReturn(firstGenAMProxy).doReturn(secondGenAMProxy).when(
GetJobReportResponse jobReportResponse = Records clientServiceDelegate).instantiateAMProxy(any(String.class));
.newRecord(GetJobReportResponse.class);
JobReport jobReport = Records.newRecord(JobReport.class); JobStatus jobStatus = clientServiceDelegate.getJobStatus(oldJobId);
jobReport.setJobId(jobId); Assert.assertNotNull(jobStatus);
jobReport.setJobState(JobState.SUCCEEDED); Assert.assertEquals("jobName-firstGen", jobStatus.getJobName());
jobReport.setMapProgress(1.0f);
jobReport.setReduceProgress(1.0f); jobStatus = clientServiceDelegate.getJobStatus(oldJobId);
jobReport.setJobFile("TestJobFilePath"); Assert.assertNotNull(jobStatus);
jobReport.setTrackingUrl("TestTrackingUrl"); Assert.assertEquals("jobName-secondGen", jobStatus.getJobName());
jobReportResponse.setJobReport(jobReport);
return jobReportResponse; jobStatus = clientServiceDelegate.getJobStatus(oldJobId);
} Assert.assertNotNull(jobStatus);
} Assert.assertEquals("jobName-secondGen", jobStatus.getJobName());
verify(clientServiceDelegate, times(2)).instantiateAMProxy(
any(String.class));
}
private GetJobReportRequest getJobReportRequest() {
GetJobReportRequest request = Records.newRecord(GetJobReportRequest.class);
request.setJobId(jobId);
return request;
}
private GetJobReportResponse getJobReportResponse() {
GetJobReportResponse jobReportResponse = Records
.newRecord(GetJobReportResponse.class);
JobReport jobReport = Records.newRecord(JobReport.class);
jobReport.setJobId(jobId);
jobReport.setJobState(JobState.SUCCEEDED);
jobReportResponse.setJobReport(jobReport);
return jobReportResponse;
}
private ApplicationReport getFinishedApplicationReport() {
return BuilderUtils.newApplicationReport(BuilderUtils.newApplicationId(
1234, 5), "user", "queue", "appname", "host", 124, null,
YarnApplicationState.FINISHED, "diagnostics", "url", 0, 0,
FinalApplicationStatus.SUCCEEDED, null);
}
private ApplicationReport getRunningApplicationReport(String host, int port) {
return BuilderUtils.newApplicationReport(BuilderUtils.newApplicationId(
1234, 5), "user", "queue", "appname", host, port, null,
YarnApplicationState.RUNNING, "diagnostics", "url", 0, 0,
FinalApplicationStatus.UNDEFINED, null);
}
private ResourceMgrDelegate getRMDelegate() throws YarnRemoteException {
ResourceMgrDelegate rm = mock(ResourceMgrDelegate.class);
when(rm.getApplicationReport(jobId.getAppId())).thenReturn(null);
return rm;
}
private ClientServiceDelegate getClientServiceDelegate(
MRClientProtocol historyServerProxy, ResourceMgrDelegate rm) {
Configuration conf = new YarnConfiguration();
conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.YARN_FRAMEWORK_NAME);
ClientServiceDelegate clientServiceDelegate = new ClientServiceDelegate(
conf, rm, oldJobId, historyServerProxy);
return clientServiceDelegate;
}
private GetJobReportResponse getJobReportResponseFromHistoryServer() {
GetJobReportResponse jobReportResponse = Records
.newRecord(GetJobReportResponse.class);
JobReport jobReport = Records.newRecord(JobReport.class);
jobReport.setJobId(jobId);
jobReport.setJobState(JobState.SUCCEEDED);
jobReport.setMapProgress(1.0f);
jobReport.setReduceProgress(1.0f);
jobReport.setJobFile("TestJobFilePath");
jobReport.setTrackingUrl("TestTrackingUrl");
jobReportResponse.setJobReport(jobReport);
return jobReportResponse;
}
}