MAPREDUCE-3250. When AM restarts, client keeps reconnecting to the new AM and prints a lots of logs. (vinodkv via mahadev) - Merging r1189023 from trunk.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-0.23@1189024 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
b0384fc087
commit
383b8ed9d9
|
@ -1715,6 +1715,9 @@ Release 0.23.0 - Unreleased
|
||||||
MAPREDUCE-3269. Fixed log4j properties to correctly set logging options
|
MAPREDUCE-3269. Fixed log4j properties to correctly set logging options
|
||||||
for JobHistoryServer vis-a-vis JobSummary logs. (mahadev via acmurthy)
|
for JobHistoryServer vis-a-vis JobSummary logs. (mahadev via acmurthy)
|
||||||
|
|
||||||
|
MAPREDUCE-3250. When AM restarts, client keeps reconnecting to the new AM
|
||||||
|
and prints a lots of logs. (vinodkv via mahadev)
|
||||||
|
|
||||||
Release 0.22.0 - Unreleased
|
Release 0.22.0 - Unreleased
|
||||||
|
|
||||||
INCOMPATIBLE CHANGES
|
INCOMPATIBLE CHANGES
|
||||||
|
|
|
@ -81,7 +81,6 @@ public class ClientServiceDelegate {
|
||||||
private final ApplicationId appId;
|
private final ApplicationId appId;
|
||||||
private final ResourceMgrDelegate rm;
|
private final ResourceMgrDelegate rm;
|
||||||
private final MRClientProtocol historyServerProxy;
|
private final MRClientProtocol historyServerProxy;
|
||||||
private boolean forceRefresh;
|
|
||||||
private MRClientProtocol realProxy = null;
|
private MRClientProtocol realProxy = null;
|
||||||
private RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null);
|
private RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null);
|
||||||
private static String UNKNOWN_USER = "Unknown User";
|
private static String UNKNOWN_USER = "Unknown User";
|
||||||
|
@ -122,7 +121,7 @@ public class ClientServiceDelegate {
|
||||||
}
|
}
|
||||||
|
|
||||||
private MRClientProtocol getProxy() throws YarnRemoteException {
|
private MRClientProtocol getProxy() throws YarnRemoteException {
|
||||||
if (!forceRefresh && realProxy != null) {
|
if (realProxy != null) {
|
||||||
return realProxy;
|
return realProxy;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -133,7 +132,9 @@ public class ClientServiceDelegate {
|
||||||
trackingUrl = application.getTrackingUrl();
|
trackingUrl = application.getTrackingUrl();
|
||||||
}
|
}
|
||||||
String serviceAddr = null;
|
String serviceAddr = null;
|
||||||
while (application == null || YarnApplicationState.RUNNING == application.getYarnApplicationState()) {
|
while (application == null
|
||||||
|
|| YarnApplicationState.RUNNING == application
|
||||||
|
.getYarnApplicationState()) {
|
||||||
if (application == null) {
|
if (application == null) {
|
||||||
LOG.info("Could not get Job info from RM for job " + jobId
|
LOG.info("Could not get Job info from RM for job " + jobId
|
||||||
+ ". Redirecting to job history server.");
|
+ ". Redirecting to job history server.");
|
||||||
|
@ -163,7 +164,7 @@ public class ClientServiceDelegate {
|
||||||
}
|
}
|
||||||
LOG.info("Tracking Url of JOB is " + application.getTrackingUrl());
|
LOG.info("Tracking Url of JOB is " + application.getTrackingUrl());
|
||||||
LOG.info("Connecting to " + serviceAddr);
|
LOG.info("Connecting to " + serviceAddr);
|
||||||
instantiateAMProxy(serviceAddr);
|
realProxy = instantiateAMProxy(serviceAddr);
|
||||||
return realProxy;
|
return realProxy;
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
//possibly the AM has crashed
|
//possibly the AM has crashed
|
||||||
|
@ -233,10 +234,12 @@ public class ClientServiceDelegate {
|
||||||
return historyServerProxy;
|
return historyServerProxy;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void instantiateAMProxy(final String serviceAddr) throws IOException {
|
MRClientProtocol instantiateAMProxy(final String serviceAddr)
|
||||||
|
throws IOException {
|
||||||
UserGroupInformation currentUser = UserGroupInformation.getCurrentUser();
|
UserGroupInformation currentUser = UserGroupInformation.getCurrentUser();
|
||||||
LOG.trace("Connecting to ApplicationMaster at: " + serviceAddr);
|
LOG.trace("Connecting to ApplicationMaster at: " + serviceAddr);
|
||||||
realProxy = currentUser.doAs(new PrivilegedAction<MRClientProtocol>() {
|
MRClientProtocol proxy = currentUser
|
||||||
|
.doAs(new PrivilegedAction<MRClientProtocol>() {
|
||||||
@Override
|
@Override
|
||||||
public MRClientProtocol run() {
|
public MRClientProtocol run() {
|
||||||
YarnRPC rpc = YarnRPC.create(conf);
|
YarnRPC rpc = YarnRPC.create(conf);
|
||||||
|
@ -245,6 +248,7 @@ public class ClientServiceDelegate {
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
LOG.trace("Connected to ApplicationMaster at: " + serviceAddr);
|
LOG.trace("Connected to ApplicationMaster at: " + serviceAddr);
|
||||||
|
return proxy;
|
||||||
}
|
}
|
||||||
|
|
||||||
private synchronized Object invoke(String method, Class argClass,
|
private synchronized Object invoke(String method, Class argClass,
|
||||||
|
@ -274,12 +278,14 @@ public class ClientServiceDelegate {
|
||||||
" retrying..");
|
" retrying..");
|
||||||
LOG.debug("Failed exception on AM/History contact",
|
LOG.debug("Failed exception on AM/History contact",
|
||||||
e.getTargetException());
|
e.getTargetException());
|
||||||
forceRefresh = true;
|
// Force reconnection by setting the proxy to null.
|
||||||
|
realProxy = null;
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
LOG.info("Failed to contact AM/History for job " + jobId
|
LOG.info("Failed to contact AM/History for job " + jobId
|
||||||
+ " Will retry..");
|
+ " Will retry..");
|
||||||
LOG.debug("Failing to contact application master", e);
|
LOG.debug("Failing to contact application master", e);
|
||||||
forceRefresh = true;
|
// Force reconnection by setting the proxy to null.
|
||||||
|
realProxy = null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -68,8 +68,6 @@ import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
|
||||||
import org.apache.hadoop.net.NetUtils;
|
import org.apache.hadoop.net.NetUtils;
|
||||||
import org.apache.hadoop.yarn.YarnException;
|
import org.apache.hadoop.yarn.YarnException;
|
||||||
import org.apache.hadoop.yarn.api.ClientRMProtocol;
|
import org.apache.hadoop.yarn.api.ClientRMProtocol;
|
||||||
import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest;
|
|
||||||
import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationResponse;
|
|
||||||
import org.apache.hadoop.yarn.api.protocolrecords.GetAllApplicationsRequest;
|
import org.apache.hadoop.yarn.api.protocolrecords.GetAllApplicationsRequest;
|
||||||
import org.apache.hadoop.yarn.api.protocolrecords.GetAllApplicationsResponse;
|
import org.apache.hadoop.yarn.api.protocolrecords.GetAllApplicationsResponse;
|
||||||
import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest;
|
import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest;
|
||||||
|
@ -84,6 +82,8 @@ import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoRequest;
|
||||||
import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoResponse;
|
import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoResponse;
|
||||||
import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoRequest;
|
import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoRequest;
|
||||||
import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoResponse;
|
import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoResponse;
|
||||||
|
import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest;
|
||||||
|
import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationResponse;
|
||||||
import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest;
|
import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest;
|
||||||
import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationResponse;
|
import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationResponse;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||||
|
@ -123,20 +123,24 @@ public class TestClientRedirect {
|
||||||
conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.YARN_FRAMEWORK_NAME);
|
conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.YARN_FRAMEWORK_NAME);
|
||||||
conf.set(YarnConfiguration.RM_ADDRESS, RMADDRESS);
|
conf.set(YarnConfiguration.RM_ADDRESS, RMADDRESS);
|
||||||
conf.set(JHAdminConfig.MR_HISTORY_ADDRESS, HSHOSTADDRESS);
|
conf.set(JHAdminConfig.MR_HISTORY_ADDRESS, HSHOSTADDRESS);
|
||||||
|
|
||||||
|
// Start the RM.
|
||||||
RMService rmService = new RMService("test");
|
RMService rmService = new RMService("test");
|
||||||
rmService.init(conf);
|
rmService.init(conf);
|
||||||
rmService.start();
|
rmService.start();
|
||||||
|
|
||||||
|
// Start the AM.
|
||||||
AMService amService = new AMService();
|
AMService amService = new AMService();
|
||||||
amService.init(conf);
|
amService.init(conf);
|
||||||
amService.start(conf);
|
amService.start(conf);
|
||||||
amRunning = true;
|
|
||||||
|
|
||||||
|
// Start the HS.
|
||||||
HistoryService historyService = new HistoryService();
|
HistoryService historyService = new HistoryService();
|
||||||
historyService.init(conf);
|
historyService.init(conf);
|
||||||
historyService.start(conf);
|
historyService.start(conf);
|
||||||
|
|
||||||
LOG.info("services started");
|
LOG.info("services started");
|
||||||
|
|
||||||
Cluster cluster = new Cluster(conf);
|
Cluster cluster = new Cluster(conf);
|
||||||
org.apache.hadoop.mapreduce.JobID jobID =
|
org.apache.hadoop.mapreduce.JobID jobID =
|
||||||
new org.apache.hadoop.mapred.JobID("201103121733", 1);
|
new org.apache.hadoop.mapred.JobID("201103121733", 1);
|
||||||
|
@ -151,13 +155,13 @@ public class TestClientRedirect {
|
||||||
|
|
||||||
//bring down the AM service
|
//bring down the AM service
|
||||||
amService.stop();
|
amService.stop();
|
||||||
amRunning = false;
|
|
||||||
|
|
||||||
LOG.info("Sleeping for 5 seconds after stop for" +
|
LOG.info("Sleeping for 5 seconds after stop for" +
|
||||||
" the server to exit cleanly..");
|
" the server to exit cleanly..");
|
||||||
Thread.sleep(5000);
|
Thread.sleep(5000);
|
||||||
|
|
||||||
amRestarting = true;
|
amRestarting = true;
|
||||||
|
|
||||||
// Same client
|
// Same client
|
||||||
//results are returned from fake (not started job)
|
//results are returned from fake (not started job)
|
||||||
counters = cluster.getJob(jobID).getCounters();
|
counters = cluster.getJob(jobID).getCounters();
|
||||||
|
@ -181,14 +185,15 @@ public class TestClientRedirect {
|
||||||
amService = new AMService();
|
amService = new AMService();
|
||||||
amService.init(conf);
|
amService.init(conf);
|
||||||
amService.start(conf);
|
amService.start(conf);
|
||||||
amRunning = true;
|
|
||||||
amContact = false; //reset
|
amContact = false; //reset
|
||||||
|
|
||||||
counters = cluster.getJob(jobID).getCounters();
|
counters = cluster.getJob(jobID).getCounters();
|
||||||
validateCounters(counters);
|
validateCounters(counters);
|
||||||
Assert.assertTrue(amContact);
|
Assert.assertTrue(amContact);
|
||||||
|
|
||||||
amRunning = false;
|
// Stop the AM. It is not even restarting. So it should be treated as
|
||||||
|
// completed.
|
||||||
|
amService.stop();
|
||||||
|
|
||||||
// Same client
|
// Same client
|
||||||
counters = cluster.getJob(jobID).getCounters();
|
counters = cluster.getJob(jobID).getCounters();
|
||||||
|
@ -347,6 +352,7 @@ public class TestClientRedirect {
|
||||||
private InetSocketAddress bindAddress;
|
private InetSocketAddress bindAddress;
|
||||||
private Server server;
|
private Server server;
|
||||||
private final String hostAddress;
|
private final String hostAddress;
|
||||||
|
|
||||||
public AMService() {
|
public AMService() {
|
||||||
this(AMHOSTADDRESS);
|
this(AMHOSTADDRESS);
|
||||||
}
|
}
|
||||||
|
@ -376,11 +382,13 @@ public class TestClientRedirect {
|
||||||
NetUtils.createSocketAddr(hostNameResolved.getHostAddress()
|
NetUtils.createSocketAddr(hostNameResolved.getHostAddress()
|
||||||
+ ":" + server.getPort());
|
+ ":" + server.getPort());
|
||||||
super.start();
|
super.start();
|
||||||
|
amRunning = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void stop() {
|
public void stop() {
|
||||||
server.stop();
|
server.stop();
|
||||||
super.stop();
|
super.stop();
|
||||||
|
amRunning = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -1,208 +1,272 @@
|
||||||
/**
|
/**
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
* to you under the Apache License, Version 2.0 (the
|
* to you under the Apache License, Version 2.0 (the
|
||||||
* "License"); you may not use this file except in compliance
|
* "License"); you may not use this file except in compliance
|
||||||
* with the License. You may obtain a copy of the License at
|
* with the License. You may obtain a copy of the License at
|
||||||
*
|
*
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
*
|
*
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.apache.hadoop.mapred;
|
package org.apache.hadoop.mapred;
|
||||||
|
|
||||||
import static org.mockito.Mockito.mock;
|
import static org.mockito.Matchers.any;
|
||||||
import static org.mockito.Mockito.when;
|
import static org.mockito.Mockito.*;
|
||||||
import junit.framework.Assert;
|
|
||||||
|
import java.io.IOException;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.mapreduce.JobID;
|
import junit.framework.Assert;
|
||||||
import org.apache.hadoop.mapreduce.JobStatus;
|
|
||||||
import org.apache.hadoop.mapreduce.MRConfig;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.mapreduce.TypeConverter;
|
import org.apache.hadoop.mapreduce.JobID;
|
||||||
import org.apache.hadoop.mapreduce.v2.api.MRClientProtocol;
|
import org.apache.hadoop.mapreduce.JobStatus;
|
||||||
import org.apache.hadoop.mapreduce.v2.api.protocolrecords.GetJobReportRequest;
|
import org.apache.hadoop.mapreduce.MRConfig;
|
||||||
import org.apache.hadoop.mapreduce.v2.api.protocolrecords.GetJobReportResponse;
|
import org.apache.hadoop.mapreduce.TypeConverter;
|
||||||
import org.apache.hadoop.mapreduce.v2.api.records.JobReport;
|
import org.apache.hadoop.mapreduce.v2.api.MRClientProtocol;
|
||||||
import org.apache.hadoop.mapreduce.v2.api.records.JobState;
|
import org.apache.hadoop.mapreduce.v2.api.protocolrecords.GetJobReportRequest;
|
||||||
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
|
import org.apache.hadoop.mapreduce.v2.api.protocolrecords.GetJobReportResponse;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationReport;
|
import org.apache.hadoop.mapreduce.v2.api.records.JobReport;
|
||||||
import org.apache.hadoop.yarn.api.records.YarnApplicationState;
|
import org.apache.hadoop.mapreduce.v2.api.records.JobState;
|
||||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
import org.apache.hadoop.mapreduce.v2.util.MRBuilderUtils;
|
||||||
import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
|
import org.apache.hadoop.yarn.api.records.ApplicationReport;
|
||||||
import org.apache.hadoop.yarn.ipc.RPCUtil;
|
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
|
||||||
import org.apache.hadoop.yarn.util.Records;
|
import org.apache.hadoop.yarn.api.records.YarnApplicationState;
|
||||||
import org.junit.Test;
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
|
import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
|
||||||
/**
|
import org.apache.hadoop.yarn.ipc.RPCUtil;
|
||||||
* Tests for ClientServiceDelegate.java
|
import org.apache.hadoop.yarn.util.BuilderUtils;
|
||||||
*/
|
import org.apache.hadoop.yarn.util.Records;
|
||||||
|
import org.junit.Test;
|
||||||
public class TestClientServiceDelegate {
|
|
||||||
private JobID oldJobId = JobID.forName("job_1315895242400_2");
|
/**
|
||||||
private org.apache.hadoop.mapreduce.v2.api.records.JobId jobId = TypeConverter
|
* Tests for ClientServiceDelegate.java
|
||||||
.toYarn(oldJobId);
|
*/
|
||||||
|
|
||||||
@Test
|
public class TestClientServiceDelegate {
|
||||||
public void testUnknownAppInRM() throws Exception {
|
private JobID oldJobId = JobID.forName("job_1315895242400_2");
|
||||||
MRClientProtocol historyServerProxy = mock(MRClientProtocol.class);
|
private org.apache.hadoop.mapreduce.v2.api.records.JobId jobId = TypeConverter
|
||||||
when(historyServerProxy.getJobReport(getJobReportRequest())).thenReturn(
|
.toYarn(oldJobId);
|
||||||
getJobReportResponse());
|
|
||||||
ClientServiceDelegate clientServiceDelegate = getClientServiceDelegate(
|
@Test
|
||||||
historyServerProxy, getRMDelegate());
|
public void testUnknownAppInRM() throws Exception {
|
||||||
|
MRClientProtocol historyServerProxy = mock(MRClientProtocol.class);
|
||||||
JobStatus jobStatus = clientServiceDelegate.getJobStatus(oldJobId);
|
when(historyServerProxy.getJobReport(getJobReportRequest())).thenReturn(
|
||||||
Assert.assertNotNull(jobStatus);
|
getJobReportResponse());
|
||||||
}
|
ClientServiceDelegate clientServiceDelegate = getClientServiceDelegate(
|
||||||
|
historyServerProxy, getRMDelegate());
|
||||||
@Test
|
|
||||||
public void testRemoteExceptionFromHistoryServer() throws Exception {
|
JobStatus jobStatus = clientServiceDelegate.getJobStatus(oldJobId);
|
||||||
|
Assert.assertNotNull(jobStatus);
|
||||||
MRClientProtocol historyServerProxy = mock(MRClientProtocol.class);
|
}
|
||||||
when(historyServerProxy.getJobReport(getJobReportRequest())).thenThrow(
|
|
||||||
RPCUtil.getRemoteException("Job ID doesnot Exist"));
|
@Test
|
||||||
|
public void testRemoteExceptionFromHistoryServer() throws Exception {
|
||||||
ResourceMgrDelegate rm = mock(ResourceMgrDelegate.class);
|
|
||||||
when(rm.getApplicationReport(TypeConverter.toYarn(oldJobId).getAppId()))
|
MRClientProtocol historyServerProxy = mock(MRClientProtocol.class);
|
||||||
.thenReturn(null);
|
when(historyServerProxy.getJobReport(getJobReportRequest())).thenThrow(
|
||||||
|
RPCUtil.getRemoteException("Job ID doesnot Exist"));
|
||||||
ClientServiceDelegate clientServiceDelegate = getClientServiceDelegate(
|
|
||||||
historyServerProxy, rm);
|
ResourceMgrDelegate rm = mock(ResourceMgrDelegate.class);
|
||||||
|
when(rm.getApplicationReport(TypeConverter.toYarn(oldJobId).getAppId()))
|
||||||
try {
|
.thenReturn(null);
|
||||||
clientServiceDelegate.getJobStatus(oldJobId);
|
|
||||||
Assert.fail("Invoke should throw exception after retries.");
|
ClientServiceDelegate clientServiceDelegate = getClientServiceDelegate(
|
||||||
} catch (YarnRemoteException e) {
|
historyServerProxy, rm);
|
||||||
Assert.assertEquals("Job ID doesnot Exist", e.getMessage());
|
|
||||||
}
|
try {
|
||||||
}
|
clientServiceDelegate.getJobStatus(oldJobId);
|
||||||
|
Assert.fail("Invoke should throw exception after retries.");
|
||||||
@Test
|
} catch (YarnRemoteException e) {
|
||||||
public void testRetriesOnConnectionFailure() throws Exception {
|
Assert.assertEquals("Job ID doesnot Exist", e.getMessage());
|
||||||
|
}
|
||||||
MRClientProtocol historyServerProxy = mock(MRClientProtocol.class);
|
}
|
||||||
when(historyServerProxy.getJobReport(getJobReportRequest())).thenThrow(
|
|
||||||
new RuntimeException("1")).thenThrow(new RuntimeException("2"))
|
@Test
|
||||||
.thenThrow(new RuntimeException("3"))
|
public void testRetriesOnConnectionFailure() throws Exception {
|
||||||
.thenReturn(getJobReportResponse());
|
|
||||||
|
MRClientProtocol historyServerProxy = mock(MRClientProtocol.class);
|
||||||
ResourceMgrDelegate rm = mock(ResourceMgrDelegate.class);
|
when(historyServerProxy.getJobReport(getJobReportRequest())).thenThrow(
|
||||||
when(rm.getApplicationReport(TypeConverter.toYarn(oldJobId).getAppId()))
|
new RuntimeException("1")).thenThrow(new RuntimeException("2"))
|
||||||
.thenReturn(null);
|
.thenThrow(new RuntimeException("3"))
|
||||||
|
.thenReturn(getJobReportResponse());
|
||||||
ClientServiceDelegate clientServiceDelegate = getClientServiceDelegate(
|
|
||||||
historyServerProxy, rm);
|
ResourceMgrDelegate rm = mock(ResourceMgrDelegate.class);
|
||||||
|
when(rm.getApplicationReport(TypeConverter.toYarn(oldJobId).getAppId()))
|
||||||
JobStatus jobStatus = clientServiceDelegate.getJobStatus(oldJobId);
|
.thenReturn(null);
|
||||||
Assert.assertNotNull(jobStatus);
|
|
||||||
}
|
ClientServiceDelegate clientServiceDelegate = getClientServiceDelegate(
|
||||||
|
historyServerProxy, rm);
|
||||||
@Test
|
|
||||||
public void testHistoryServerNotConfigured() throws Exception {
|
JobStatus jobStatus = clientServiceDelegate.getJobStatus(oldJobId);
|
||||||
//RM doesn't have app report and job History Server is not configured
|
Assert.assertNotNull(jobStatus);
|
||||||
ClientServiceDelegate clientServiceDelegate = getClientServiceDelegate(
|
verify(historyServerProxy, times(4)).getJobReport(
|
||||||
null, getRMDelegate());
|
any(GetJobReportRequest.class));
|
||||||
JobStatus jobStatus = clientServiceDelegate.getJobStatus(oldJobId);
|
}
|
||||||
Assert.assertEquals("N/A", jobStatus.getUsername());
|
|
||||||
Assert.assertEquals(JobStatus.State.PREP, jobStatus.getState());
|
@Test
|
||||||
|
public void testHistoryServerNotConfigured() throws Exception {
|
||||||
//RM has app report and job History Server is not configured
|
//RM doesn't have app report and job History Server is not configured
|
||||||
ResourceMgrDelegate rm = mock(ResourceMgrDelegate.class);
|
ClientServiceDelegate clientServiceDelegate = getClientServiceDelegate(
|
||||||
ApplicationReport applicationReport = getApplicationReport();
|
null, getRMDelegate());
|
||||||
when(rm.getApplicationReport(jobId.getAppId())).thenReturn(
|
JobStatus jobStatus = clientServiceDelegate.getJobStatus(oldJobId);
|
||||||
applicationReport);
|
Assert.assertEquals("N/A", jobStatus.getUsername());
|
||||||
|
Assert.assertEquals(JobStatus.State.PREP, jobStatus.getState());
|
||||||
clientServiceDelegate = getClientServiceDelegate(null, rm);
|
|
||||||
jobStatus = clientServiceDelegate.getJobStatus(oldJobId);
|
//RM has app report and job History Server is not configured
|
||||||
Assert.assertEquals(applicationReport.getUser(), jobStatus.getUsername());
|
ResourceMgrDelegate rm = mock(ResourceMgrDelegate.class);
|
||||||
Assert.assertEquals(JobStatus.State.SUCCEEDED, jobStatus.getState());
|
ApplicationReport applicationReport = getFinishedApplicationReport();
|
||||||
}
|
when(rm.getApplicationReport(jobId.getAppId())).thenReturn(
|
||||||
|
applicationReport);
|
||||||
|
|
||||||
@Test
|
clientServiceDelegate = getClientServiceDelegate(null, rm);
|
||||||
public void testJobReportFromHistoryServer() throws Exception {
|
jobStatus = clientServiceDelegate.getJobStatus(oldJobId);
|
||||||
MRClientProtocol historyServerProxy = mock(MRClientProtocol.class);
|
Assert.assertEquals(applicationReport.getUser(), jobStatus.getUsername());
|
||||||
when(historyServerProxy.getJobReport(getJobReportRequest())).thenReturn(
|
Assert.assertEquals(JobStatus.State.SUCCEEDED, jobStatus.getState());
|
||||||
getJobReportResponseFromHistoryServer());
|
}
|
||||||
ResourceMgrDelegate rm = mock(ResourceMgrDelegate.class);
|
|
||||||
when(rm.getApplicationReport(TypeConverter.toYarn(oldJobId).getAppId()))
|
@Test
|
||||||
.thenReturn(null);
|
public void testJobReportFromHistoryServer() throws Exception {
|
||||||
ClientServiceDelegate clientServiceDelegate = getClientServiceDelegate(
|
MRClientProtocol historyServerProxy = mock(MRClientProtocol.class);
|
||||||
historyServerProxy, rm);
|
when(historyServerProxy.getJobReport(getJobReportRequest())).thenReturn(
|
||||||
|
getJobReportResponseFromHistoryServer());
|
||||||
JobStatus jobStatus = clientServiceDelegate.getJobStatus(oldJobId);
|
ResourceMgrDelegate rm = mock(ResourceMgrDelegate.class);
|
||||||
Assert.assertNotNull(jobStatus);
|
when(rm.getApplicationReport(TypeConverter.toYarn(oldJobId).getAppId()))
|
||||||
Assert.assertEquals("TestJobFilePath", jobStatus.getJobFile());
|
.thenReturn(null);
|
||||||
Assert.assertEquals("http://TestTrackingUrl", jobStatus.getTrackingUrl());
|
ClientServiceDelegate clientServiceDelegate = getClientServiceDelegate(
|
||||||
Assert.assertEquals(1.0f, jobStatus.getMapProgress());
|
historyServerProxy, rm);
|
||||||
Assert.assertEquals(1.0f, jobStatus.getReduceProgress());
|
|
||||||
}
|
JobStatus jobStatus = clientServiceDelegate.getJobStatus(oldJobId);
|
||||||
|
Assert.assertNotNull(jobStatus);
|
||||||
private GetJobReportRequest getJobReportRequest() {
|
Assert.assertEquals("TestJobFilePath", jobStatus.getJobFile());
|
||||||
GetJobReportRequest request = Records.newRecord(GetJobReportRequest.class);
|
Assert.assertEquals("http://TestTrackingUrl", jobStatus.getTrackingUrl());
|
||||||
request.setJobId(jobId);
|
Assert.assertEquals(1.0f, jobStatus.getMapProgress());
|
||||||
return request;
|
Assert.assertEquals(1.0f, jobStatus.getReduceProgress());
|
||||||
}
|
}
|
||||||
|
|
||||||
private GetJobReportResponse getJobReportResponse() {
|
@Test
|
||||||
GetJobReportResponse jobReportResponse = Records
|
public void testReconnectOnAMRestart() throws IOException {
|
||||||
.newRecord(GetJobReportResponse.class);
|
|
||||||
JobReport jobReport = Records.newRecord(JobReport.class);
|
MRClientProtocol historyServerProxy = mock(MRClientProtocol.class);
|
||||||
jobReport.setJobId(jobId);
|
|
||||||
jobReport.setJobState(JobState.SUCCEEDED);
|
// RM returns AM1 url, null, null and AM2 url on invocations.
|
||||||
jobReportResponse.setJobReport(jobReport);
|
// Nulls simulate the time when AM2 is in the process of restarting.
|
||||||
return jobReportResponse;
|
ResourceMgrDelegate rmDelegate = mock(ResourceMgrDelegate.class);
|
||||||
}
|
when(rmDelegate.getApplicationReport(jobId.getAppId())).thenReturn(
|
||||||
|
getRunningApplicationReport("am1", 78)).thenReturn(
|
||||||
private ApplicationReport getApplicationReport() {
|
getRunningApplicationReport(null, 0)).thenReturn(
|
||||||
ApplicationReport applicationReport = Records
|
getRunningApplicationReport(null, 0)).thenReturn(
|
||||||
.newRecord(ApplicationReport.class);
|
getRunningApplicationReport("am2", 90));
|
||||||
applicationReport.setYarnApplicationState(YarnApplicationState.FINISHED);
|
|
||||||
applicationReport.setUser("root");
|
GetJobReportResponse jobReportResponse1 = mock(GetJobReportResponse.class);
|
||||||
applicationReport.setHost("N/A");
|
when(jobReportResponse1.getJobReport()).thenReturn(
|
||||||
applicationReport.setName("N/A");
|
MRBuilderUtils.newJobReport(jobId, "jobName-firstGen", "user",
|
||||||
applicationReport.setQueue("N/A");
|
JobState.RUNNING, 0, 0, 0, 0, 0, 0, "anything"));
|
||||||
applicationReport.setStartTime(0);
|
|
||||||
applicationReport.setFinishTime(0);
|
// First AM returns a report with jobName firstGen and simulates AM shutdown
|
||||||
applicationReport.setTrackingUrl("N/A");
|
// on second invocation.
|
||||||
applicationReport.setDiagnostics("N/A");
|
MRClientProtocol firstGenAMProxy = mock(MRClientProtocol.class);
|
||||||
applicationReport.setFinalApplicationStatus(FinalApplicationStatus.SUCCEEDED);
|
when(firstGenAMProxy.getJobReport(any(GetJobReportRequest.class)))
|
||||||
return applicationReport;
|
.thenReturn(jobReportResponse1).thenThrow(
|
||||||
}
|
new RuntimeException("AM is down!"));
|
||||||
|
|
||||||
private ResourceMgrDelegate getRMDelegate() throws YarnRemoteException {
|
GetJobReportResponse jobReportResponse2 = mock(GetJobReportResponse.class);
|
||||||
ResourceMgrDelegate rm = mock(ResourceMgrDelegate.class);
|
when(jobReportResponse2.getJobReport()).thenReturn(
|
||||||
when(rm.getApplicationReport(jobId.getAppId())).thenReturn(null);
|
MRBuilderUtils.newJobReport(jobId, "jobName-secondGen", "user",
|
||||||
return rm;
|
JobState.RUNNING, 0, 0, 0, 0, 0, 0, "anything"));
|
||||||
}
|
|
||||||
|
// Second AM generation returns a report with jobName secondGen
|
||||||
private ClientServiceDelegate getClientServiceDelegate(
|
MRClientProtocol secondGenAMProxy = mock(MRClientProtocol.class);
|
||||||
MRClientProtocol historyServerProxy, ResourceMgrDelegate rm) {
|
when(secondGenAMProxy.getJobReport(any(GetJobReportRequest.class)))
|
||||||
Configuration conf = new YarnConfiguration();
|
.thenReturn(jobReportResponse2);
|
||||||
conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.YARN_FRAMEWORK_NAME);
|
|
||||||
ClientServiceDelegate clientServiceDelegate = new ClientServiceDelegate(
|
ClientServiceDelegate clientServiceDelegate = spy(getClientServiceDelegate(
|
||||||
conf, rm, oldJobId, historyServerProxy);
|
historyServerProxy, rmDelegate));
|
||||||
return clientServiceDelegate;
|
// First time, connection should be to AM1, then to AM2. Further requests
|
||||||
}
|
// should use the same proxy to AM2 and so instantiateProxy shouldn't be
|
||||||
|
// called.
|
||||||
private GetJobReportResponse getJobReportResponseFromHistoryServer() {
|
doReturn(firstGenAMProxy).doReturn(secondGenAMProxy).when(
|
||||||
GetJobReportResponse jobReportResponse = Records
|
clientServiceDelegate).instantiateAMProxy(any(String.class));
|
||||||
.newRecord(GetJobReportResponse.class);
|
|
||||||
JobReport jobReport = Records.newRecord(JobReport.class);
|
JobStatus jobStatus = clientServiceDelegate.getJobStatus(oldJobId);
|
||||||
jobReport.setJobId(jobId);
|
Assert.assertNotNull(jobStatus);
|
||||||
jobReport.setJobState(JobState.SUCCEEDED);
|
Assert.assertEquals("jobName-firstGen", jobStatus.getJobName());
|
||||||
jobReport.setMapProgress(1.0f);
|
|
||||||
jobReport.setReduceProgress(1.0f);
|
jobStatus = clientServiceDelegate.getJobStatus(oldJobId);
|
||||||
jobReport.setJobFile("TestJobFilePath");
|
Assert.assertNotNull(jobStatus);
|
||||||
jobReport.setTrackingUrl("TestTrackingUrl");
|
Assert.assertEquals("jobName-secondGen", jobStatus.getJobName());
|
||||||
jobReportResponse.setJobReport(jobReport);
|
|
||||||
return jobReportResponse;
|
jobStatus = clientServiceDelegate.getJobStatus(oldJobId);
|
||||||
}
|
Assert.assertNotNull(jobStatus);
|
||||||
}
|
Assert.assertEquals("jobName-secondGen", jobStatus.getJobName());
|
||||||
|
|
||||||
|
verify(clientServiceDelegate, times(2)).instantiateAMProxy(
|
||||||
|
any(String.class));
|
||||||
|
}
|
||||||
|
|
||||||
|
private GetJobReportRequest getJobReportRequest() {
|
||||||
|
GetJobReportRequest request = Records.newRecord(GetJobReportRequest.class);
|
||||||
|
request.setJobId(jobId);
|
||||||
|
return request;
|
||||||
|
}
|
||||||
|
|
||||||
|
private GetJobReportResponse getJobReportResponse() {
|
||||||
|
GetJobReportResponse jobReportResponse = Records
|
||||||
|
.newRecord(GetJobReportResponse.class);
|
||||||
|
JobReport jobReport = Records.newRecord(JobReport.class);
|
||||||
|
jobReport.setJobId(jobId);
|
||||||
|
jobReport.setJobState(JobState.SUCCEEDED);
|
||||||
|
jobReportResponse.setJobReport(jobReport);
|
||||||
|
return jobReportResponse;
|
||||||
|
}
|
||||||
|
|
||||||
|
private ApplicationReport getFinishedApplicationReport() {
|
||||||
|
return BuilderUtils.newApplicationReport(BuilderUtils.newApplicationId(
|
||||||
|
1234, 5), "user", "queue", "appname", "host", 124, null,
|
||||||
|
YarnApplicationState.FINISHED, "diagnostics", "url", 0, 0,
|
||||||
|
FinalApplicationStatus.SUCCEEDED, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
private ApplicationReport getRunningApplicationReport(String host, int port) {
|
||||||
|
return BuilderUtils.newApplicationReport(BuilderUtils.newApplicationId(
|
||||||
|
1234, 5), "user", "queue", "appname", host, port, null,
|
||||||
|
YarnApplicationState.RUNNING, "diagnostics", "url", 0, 0,
|
||||||
|
FinalApplicationStatus.UNDEFINED, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
private ResourceMgrDelegate getRMDelegate() throws YarnRemoteException {
|
||||||
|
ResourceMgrDelegate rm = mock(ResourceMgrDelegate.class);
|
||||||
|
when(rm.getApplicationReport(jobId.getAppId())).thenReturn(null);
|
||||||
|
return rm;
|
||||||
|
}
|
||||||
|
|
||||||
|
private ClientServiceDelegate getClientServiceDelegate(
|
||||||
|
MRClientProtocol historyServerProxy, ResourceMgrDelegate rm) {
|
||||||
|
Configuration conf = new YarnConfiguration();
|
||||||
|
conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.YARN_FRAMEWORK_NAME);
|
||||||
|
ClientServiceDelegate clientServiceDelegate = new ClientServiceDelegate(
|
||||||
|
conf, rm, oldJobId, historyServerProxy);
|
||||||
|
return clientServiceDelegate;
|
||||||
|
}
|
||||||
|
|
||||||
|
private GetJobReportResponse getJobReportResponseFromHistoryServer() {
|
||||||
|
GetJobReportResponse jobReportResponse = Records
|
||||||
|
.newRecord(GetJobReportResponse.class);
|
||||||
|
JobReport jobReport = Records.newRecord(JobReport.class);
|
||||||
|
jobReport.setJobId(jobId);
|
||||||
|
jobReport.setJobState(JobState.SUCCEEDED);
|
||||||
|
jobReport.setMapProgress(1.0f);
|
||||||
|
jobReport.setReduceProgress(1.0f);
|
||||||
|
jobReport.setJobFile("TestJobFilePath");
|
||||||
|
jobReport.setTrackingUrl("TestTrackingUrl");
|
||||||
|
jobReportResponse.setJobReport(jobReport);
|
||||||
|
return jobReportResponse;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue