Merge r1503933 from trunk to branch-2 for YARN-513. Create common proxy client for communicating with RM (Xuan Gong & Jian He via bikas)
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1503935 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
0055e40226
commit
3990e8b478
|
@ -465,6 +465,9 @@ Release 2.1.0-beta - 2013-07-02
|
||||||
YARN-521. Augment AM - RM client module to be able to request containers
|
YARN-521. Augment AM - RM client module to be able to request containers
|
||||||
only at specific locations (Sandy Ryza via bikas)
|
only at specific locations (Sandy Ryza via bikas)
|
||||||
|
|
||||||
|
YARN-513. Create common proxy client for communicating with RM. (Xuan Gong
|
||||||
|
& Jian He via bikas)
|
||||||
|
|
||||||
OPTIMIZATIONS
|
OPTIMIZATIONS
|
||||||
|
|
||||||
YARN-512. Log aggregation root directory check is more expensive than it
|
YARN-512. Log aggregation root directory check is more expensive than it
|
||||||
|
|
|
@ -655,17 +655,17 @@ public class YarnConfiguration extends Configuration {
|
||||||
public static final long DEFAULT_NM_PROCESS_KILL_WAIT_MS =
|
public static final long DEFAULT_NM_PROCESS_KILL_WAIT_MS =
|
||||||
2000;
|
2000;
|
||||||
|
|
||||||
/** Max time to wait to establish a connection to RM when NM starts
|
/** Max time to wait to establish a connection to RM
|
||||||
*/
|
*/
|
||||||
public static final String RESOURCEMANAGER_CONNECT_WAIT_SECS =
|
public static final String RESOURCEMANAGER_CONNECT_MAX_WAIT_SECS =
|
||||||
NM_PREFIX + "resourcemanager.connect.wait.secs";
|
RM_PREFIX + "resourcemanager.connect.max.wait.secs";
|
||||||
public static final int DEFAULT_RESOURCEMANAGER_CONNECT_WAIT_SECS =
|
public static final int DEFAULT_RESOURCEMANAGER_CONNECT_MAX_WAIT_SECS =
|
||||||
15*60;
|
15*60;
|
||||||
|
|
||||||
/** Time interval between each NM attempt to connect to RM
|
/** Time interval between each attempt to connect to RM
|
||||||
*/
|
*/
|
||||||
public static final String RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_SECS =
|
public static final String RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_SECS =
|
||||||
NM_PREFIX + "resourcemanager.connect.retry_interval.secs";
|
RM_PREFIX + "resourcemanager.connect.retry_interval.secs";
|
||||||
public static final long DEFAULT_RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_SECS
|
public static final long DEFAULT_RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_SECS
|
||||||
= 30;
|
= 30;
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,65 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.yarn.client;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.InetSocketAddress;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.yarn.api.ApplicationClientProtocol;
|
||||||
|
import org.apache.hadoop.yarn.api.ApplicationMasterProtocol;
|
||||||
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
|
import org.apache.hadoop.yarn.server.api.ResourceManagerAdministrationProtocol;
|
||||||
|
|
||||||
|
public class ClientRMProxy<T> extends RMProxy<T>{
|
||||||
|
|
||||||
|
private static final Log LOG = LogFactory.getLog(ClientRMProxy.class);
|
||||||
|
|
||||||
|
public static <T> T createRMProxy(final Configuration conf,
|
||||||
|
final Class<T> protocol) throws IOException {
|
||||||
|
InetSocketAddress rmAddress = getRMAddress(conf, protocol);
|
||||||
|
return createRMProxy(conf, protocol, rmAddress);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static InetSocketAddress getRMAddress(Configuration conf, Class<?> protocol) {
|
||||||
|
if (protocol == ApplicationClientProtocol.class) {
|
||||||
|
return conf.getSocketAddr(YarnConfiguration.RM_ADDRESS,
|
||||||
|
YarnConfiguration.DEFAULT_RM_ADDRESS,
|
||||||
|
YarnConfiguration.DEFAULT_RM_PORT);
|
||||||
|
} else if (protocol == ResourceManagerAdministrationProtocol.class) {
|
||||||
|
return conf.getSocketAddr(
|
||||||
|
YarnConfiguration.RM_ADMIN_ADDRESS,
|
||||||
|
YarnConfiguration.DEFAULT_RM_ADMIN_ADDRESS,
|
||||||
|
YarnConfiguration.DEFAULT_RM_ADMIN_PORT);
|
||||||
|
} else if (protocol == ApplicationMasterProtocol.class) {
|
||||||
|
return conf.getSocketAddr(
|
||||||
|
YarnConfiguration.RM_SCHEDULER_ADDRESS,
|
||||||
|
YarnConfiguration.DEFAULT_RM_SCHEDULER_ADDRESS,
|
||||||
|
YarnConfiguration.DEFAULT_RM_SCHEDULER_PORT);
|
||||||
|
} else {
|
||||||
|
String message = "Unsupported protocol found when creating the proxy " +
|
||||||
|
"connection to ResourceManager: " +
|
||||||
|
((protocol != null) ? protocol.getClass().getName() : "null");
|
||||||
|
LOG.error(message);
|
||||||
|
throw new IllegalStateException(message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -19,7 +19,6 @@
|
||||||
package org.apache.hadoop.yarn.client.api;
|
package org.apache.hadoop.yarn.client.api;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.InetSocketAddress;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
@ -54,25 +53,6 @@ public abstract class YarnClient extends AbstractService {
|
||||||
return client;
|
return client;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Create a new instance of YarnClient.
|
|
||||||
*/
|
|
||||||
@Public
|
|
||||||
public static YarnClient createYarnClient(InetSocketAddress rmAddress) {
|
|
||||||
YarnClient client = new YarnClientImpl(rmAddress);
|
|
||||||
return client;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create a new instance of YarnClient.
|
|
||||||
*/
|
|
||||||
@Public
|
|
||||||
public static YarnClient createYarnClient(String name,
|
|
||||||
InetSocketAddress rmAddress) {
|
|
||||||
YarnClient client = new YarnClientImpl(name, rmAddress);
|
|
||||||
return client;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Private
|
@Private
|
||||||
protected YarnClient(String name) {
|
protected YarnClient(String name) {
|
||||||
super(name);
|
super(name);
|
||||||
|
|
|
@ -19,8 +19,6 @@
|
||||||
package org.apache.hadoop.yarn.client.api.impl;
|
package org.apache.hadoop.yarn.client.api.impl;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.InetSocketAddress;
|
|
||||||
import java.security.PrivilegedAction;
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
@ -42,7 +40,6 @@ import org.apache.hadoop.classification.InterfaceAudience.Private;
|
||||||
import org.apache.hadoop.classification.InterfaceStability.Unstable;
|
import org.apache.hadoop.classification.InterfaceStability.Unstable;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.ipc.RPC;
|
import org.apache.hadoop.ipc.RPC;
|
||||||
import org.apache.hadoop.security.UserGroupInformation;
|
|
||||||
import org.apache.hadoop.yarn.api.ApplicationMasterProtocol;
|
import org.apache.hadoop.yarn.api.ApplicationMasterProtocol;
|
||||||
import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
|
import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
|
||||||
import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
|
import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
|
||||||
|
@ -56,16 +53,16 @@ import org.apache.hadoop.yarn.api.records.NMToken;
|
||||||
import org.apache.hadoop.yarn.api.records.Priority;
|
import org.apache.hadoop.yarn.api.records.Priority;
|
||||||
import org.apache.hadoop.yarn.api.records.Resource;
|
import org.apache.hadoop.yarn.api.records.Resource;
|
||||||
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
||||||
|
import org.apache.hadoop.yarn.client.ClientRMProxy;
|
||||||
import org.apache.hadoop.yarn.client.api.AMRMClient;
|
import org.apache.hadoop.yarn.client.api.AMRMClient;
|
||||||
|
import org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest;
|
||||||
import org.apache.hadoop.yarn.client.api.InvalidContainerRequestException;
|
import org.apache.hadoop.yarn.client.api.InvalidContainerRequestException;
|
||||||
import org.apache.hadoop.yarn.client.api.NMTokenCache;
|
import org.apache.hadoop.yarn.client.api.NMTokenCache;
|
||||||
import org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest;
|
|
||||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
import org.apache.hadoop.yarn.exceptions.YarnException;
|
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||||
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
|
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
|
||||||
import org.apache.hadoop.yarn.factories.RecordFactory;
|
import org.apache.hadoop.yarn.factories.RecordFactory;
|
||||||
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
|
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
|
||||||
import org.apache.hadoop.yarn.ipc.YarnRPC;
|
|
||||||
import org.apache.hadoop.yarn.util.RackResolver;
|
import org.apache.hadoop.yarn.util.RackResolver;
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
|
@ -171,28 +168,11 @@ public class AMRMClientImpl<T extends ContainerRequest> extends AMRMClient<T> {
|
||||||
@Override
|
@Override
|
||||||
protected void serviceStart() throws Exception {
|
protected void serviceStart() throws Exception {
|
||||||
final YarnConfiguration conf = new YarnConfiguration(getConfig());
|
final YarnConfiguration conf = new YarnConfiguration(getConfig());
|
||||||
final YarnRPC rpc = YarnRPC.create(conf);
|
|
||||||
final InetSocketAddress rmAddress = conf.getSocketAddr(
|
|
||||||
YarnConfiguration.RM_SCHEDULER_ADDRESS,
|
|
||||||
YarnConfiguration.DEFAULT_RM_SCHEDULER_ADDRESS,
|
|
||||||
YarnConfiguration.DEFAULT_RM_SCHEDULER_PORT);
|
|
||||||
|
|
||||||
UserGroupInformation currentUser;
|
|
||||||
try {
|
try {
|
||||||
currentUser = UserGroupInformation.getCurrentUser();
|
rmClient = ClientRMProxy.createRMProxy(conf, ApplicationMasterProtocol.class);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new YarnRuntimeException(e);
|
throw new YarnRuntimeException(e);
|
||||||
}
|
}
|
||||||
|
|
||||||
// CurrentUser should already have AMToken loaded.
|
|
||||||
rmClient = currentUser.doAs(new PrivilegedAction<ApplicationMasterProtocol>() {
|
|
||||||
@Override
|
|
||||||
public ApplicationMasterProtocol run() {
|
|
||||||
return (ApplicationMasterProtocol) rpc.getProxy(ApplicationMasterProtocol.class, rmAddress,
|
|
||||||
conf);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
LOG.debug("Connecting to ResourceManager at " + rmAddress);
|
|
||||||
super.serviceStart();
|
super.serviceStart();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -59,11 +59,12 @@ import org.apache.hadoop.yarn.api.records.QueueUserACLInfo;
|
||||||
import org.apache.hadoop.yarn.api.records.Token;
|
import org.apache.hadoop.yarn.api.records.Token;
|
||||||
import org.apache.hadoop.yarn.api.records.YarnApplicationState;
|
import org.apache.hadoop.yarn.api.records.YarnApplicationState;
|
||||||
import org.apache.hadoop.yarn.api.records.YarnClusterMetrics;
|
import org.apache.hadoop.yarn.api.records.YarnClusterMetrics;
|
||||||
|
import org.apache.hadoop.yarn.client.ClientRMProxy;
|
||||||
import org.apache.hadoop.yarn.client.api.YarnClient;
|
import org.apache.hadoop.yarn.client.api.YarnClient;
|
||||||
import org.apache.hadoop.yarn.client.api.YarnClientApplication;
|
import org.apache.hadoop.yarn.client.api.YarnClientApplication;
|
||||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
import org.apache.hadoop.yarn.exceptions.YarnException;
|
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||||
import org.apache.hadoop.yarn.ipc.YarnRPC;
|
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
|
||||||
import org.apache.hadoop.yarn.util.Records;
|
import org.apache.hadoop.yarn.util.Records;
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
|
@ -81,16 +82,7 @@ public class YarnClientImpl extends YarnClient {
|
||||||
private static final String ROOT = "root";
|
private static final String ROOT = "root";
|
||||||
|
|
||||||
public YarnClientImpl() {
|
public YarnClientImpl() {
|
||||||
this(null);
|
super(YarnClientImpl.class.getName());
|
||||||
}
|
|
||||||
|
|
||||||
public YarnClientImpl(InetSocketAddress rmAddress) {
|
|
||||||
this(YarnClientImpl.class.getName(), rmAddress);
|
|
||||||
}
|
|
||||||
|
|
||||||
public YarnClientImpl(String name, InetSocketAddress rmAddress) {
|
|
||||||
super(name);
|
|
||||||
this.rmAddress = rmAddress;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static InetSocketAddress getRmAddress(Configuration conf) {
|
private static InetSocketAddress getRmAddress(Configuration conf) {
|
||||||
|
@ -100,9 +92,7 @@ public class YarnClientImpl extends YarnClient {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void serviceInit(Configuration conf) throws Exception {
|
protected void serviceInit(Configuration conf) throws Exception {
|
||||||
if (this.rmAddress == null) {
|
this.rmAddress = getRmAddress(conf);
|
||||||
this.rmAddress = getRmAddress(conf);
|
|
||||||
}
|
|
||||||
statePollIntervalMillis = conf.getLong(
|
statePollIntervalMillis = conf.getLong(
|
||||||
YarnConfiguration.YARN_CLIENT_APP_SUBMISSION_POLL_INTERVAL_MS,
|
YarnConfiguration.YARN_CLIENT_APP_SUBMISSION_POLL_INTERVAL_MS,
|
||||||
YarnConfiguration.DEFAULT_YARN_CLIENT_APP_SUBMISSION_POLL_INTERVAL_MS);
|
YarnConfiguration.DEFAULT_YARN_CLIENT_APP_SUBMISSION_POLL_INTERVAL_MS);
|
||||||
|
@ -111,12 +101,11 @@ public class YarnClientImpl extends YarnClient {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void serviceStart() throws Exception {
|
protected void serviceStart() throws Exception {
|
||||||
YarnRPC rpc = YarnRPC.create(getConfig());
|
try {
|
||||||
|
rmClient = ClientRMProxy.createRMProxy(getConfig(),
|
||||||
this.rmClient = (ApplicationClientProtocol) rpc.getProxy(
|
ApplicationClientProtocol.class);
|
||||||
ApplicationClientProtocol.class, rmAddress, getConfig());
|
} catch (IOException e) {
|
||||||
if (LOG.isDebugEnabled()) {
|
throw new YarnRuntimeException(e);
|
||||||
LOG.debug("Connecting to ResourceManager at " + rmAddress);
|
|
||||||
}
|
}
|
||||||
super.serviceStart();
|
super.serviceStart();
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,8 +19,6 @@
|
||||||
package org.apache.hadoop.yarn.client.cli;
|
package org.apache.hadoop.yarn.client.cli;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.InetSocketAddress;
|
|
||||||
import java.security.PrivilegedAction;
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
|
||||||
import org.apache.hadoop.classification.InterfaceAudience.Private;
|
import org.apache.hadoop.classification.InterfaceAudience.Private;
|
||||||
|
@ -31,11 +29,11 @@ import org.apache.hadoop.ipc.RemoteException;
|
||||||
import org.apache.hadoop.security.UserGroupInformation;
|
import org.apache.hadoop.security.UserGroupInformation;
|
||||||
import org.apache.hadoop.util.Tool;
|
import org.apache.hadoop.util.Tool;
|
||||||
import org.apache.hadoop.util.ToolRunner;
|
import org.apache.hadoop.util.ToolRunner;
|
||||||
|
import org.apache.hadoop.yarn.client.ClientRMProxy;
|
||||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
import org.apache.hadoop.yarn.exceptions.YarnException;
|
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||||
import org.apache.hadoop.yarn.factories.RecordFactory;
|
import org.apache.hadoop.yarn.factories.RecordFactory;
|
||||||
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
|
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
|
||||||
import org.apache.hadoop.yarn.ipc.YarnRPC;
|
|
||||||
import org.apache.hadoop.yarn.server.api.ResourceManagerAdministrationProtocol;
|
import org.apache.hadoop.yarn.server.api.ResourceManagerAdministrationProtocol;
|
||||||
import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshAdminAclsRequest;
|
import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshAdminAclsRequest;
|
||||||
import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshNodesRequest;
|
import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshNodesRequest;
|
||||||
|
@ -164,32 +162,10 @@ public class RMAdminCLI extends Configured implements Tool {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static UserGroupInformation getUGI(Configuration conf
|
|
||||||
) throws IOException {
|
|
||||||
return UserGroupInformation.getCurrentUser();
|
|
||||||
}
|
|
||||||
|
|
||||||
private ResourceManagerAdministrationProtocol createAdminProtocol() throws IOException {
|
private ResourceManagerAdministrationProtocol createAdminProtocol() throws IOException {
|
||||||
// Get the current configuration
|
// Get the current configuration
|
||||||
final YarnConfiguration conf = new YarnConfiguration(getConf());
|
final YarnConfiguration conf = new YarnConfiguration(getConf());
|
||||||
|
return ClientRMProxy.createRMProxy(conf, ResourceManagerAdministrationProtocol.class);
|
||||||
// Create the client
|
|
||||||
final InetSocketAddress addr = conf.getSocketAddr(
|
|
||||||
YarnConfiguration.RM_ADMIN_ADDRESS,
|
|
||||||
YarnConfiguration.DEFAULT_RM_ADMIN_ADDRESS,
|
|
||||||
YarnConfiguration.DEFAULT_RM_ADMIN_PORT);
|
|
||||||
final YarnRPC rpc = YarnRPC.create(conf);
|
|
||||||
|
|
||||||
ResourceManagerAdministrationProtocol adminProtocol =
|
|
||||||
getUGI(conf).doAs(new PrivilegedAction<ResourceManagerAdministrationProtocol>() {
|
|
||||||
@Override
|
|
||||||
public ResourceManagerAdministrationProtocol run() {
|
|
||||||
return (ResourceManagerAdministrationProtocol) rpc.getProxy(ResourceManagerAdministrationProtocol.class,
|
|
||||||
addr, conf);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
return adminProtocol;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private int refreshQueues() throws IOException, YarnException {
|
private int refreshQueues() throws IOException, YarnException {
|
||||||
|
|
|
@ -0,0 +1,125 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.yarn.client;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.ConnectException;
|
||||||
|
import java.net.InetSocketAddress;
|
||||||
|
import java.security.PrivilegedAction;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
import org.apache.hadoop.classification.InterfaceStability;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.io.retry.RetryPolicies;
|
||||||
|
import org.apache.hadoop.io.retry.RetryPolicy;
|
||||||
|
import org.apache.hadoop.io.retry.RetryProxy;
|
||||||
|
import org.apache.hadoop.security.UserGroupInformation;
|
||||||
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
|
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
|
||||||
|
import org.apache.hadoop.yarn.ipc.YarnRPC;
|
||||||
|
|
||||||
|
@InterfaceAudience.Public
|
||||||
|
@InterfaceStability.Evolving
|
||||||
|
public class RMProxy<T> {
|
||||||
|
|
||||||
|
private static final Log LOG = LogFactory.getLog(RMProxy.class);
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
public static <T> T createRMProxy(final Configuration conf,
|
||||||
|
final Class<T> protocol, InetSocketAddress rmAddress) throws IOException {
|
||||||
|
RetryPolicy retryPolicy = createRetryPolicy(conf);
|
||||||
|
T proxy = RMProxy.<T>getProxy(conf, protocol, rmAddress);
|
||||||
|
LOG.info("Connecting to ResourceManager at " + rmAddress);
|
||||||
|
return (T) RetryProxy.create(protocol, proxy, retryPolicy);
|
||||||
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
protected static <T> T getProxy(final Configuration conf,
|
||||||
|
final Class<T> protocol, final InetSocketAddress rmAddress)
|
||||||
|
throws IOException {
|
||||||
|
return (T) UserGroupInformation.getCurrentUser().doAs(
|
||||||
|
new PrivilegedAction<Object>() {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public T run() {
|
||||||
|
return (T) YarnRPC.create(conf).getProxy(protocol, rmAddress, conf);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
public static RetryPolicy createRetryPolicy(Configuration conf) {
|
||||||
|
long rmConnectWaitMS =
|
||||||
|
conf.getInt(
|
||||||
|
YarnConfiguration.RESOURCEMANAGER_CONNECT_MAX_WAIT_SECS,
|
||||||
|
YarnConfiguration.DEFAULT_RESOURCEMANAGER_CONNECT_MAX_WAIT_SECS)
|
||||||
|
* 1000;
|
||||||
|
long rmConnectionRetryIntervalMS =
|
||||||
|
conf.getLong(
|
||||||
|
YarnConfiguration.RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_SECS,
|
||||||
|
YarnConfiguration
|
||||||
|
.DEFAULT_RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_SECS)
|
||||||
|
* 1000;
|
||||||
|
|
||||||
|
if (rmConnectionRetryIntervalMS < 0) {
|
||||||
|
throw new YarnRuntimeException("Invalid Configuration. " +
|
||||||
|
YarnConfiguration.RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_SECS +
|
||||||
|
" should not be negative.");
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean waitForEver = (rmConnectWaitMS == -1000);
|
||||||
|
|
||||||
|
if (waitForEver) {
|
||||||
|
return RetryPolicies.RETRY_FOREVER;
|
||||||
|
} else {
|
||||||
|
if (rmConnectWaitMS < 0) {
|
||||||
|
throw new YarnRuntimeException("Invalid Configuration. "
|
||||||
|
+ YarnConfiguration.RESOURCEMANAGER_CONNECT_MAX_WAIT_SECS
|
||||||
|
+ " can be -1, but can not be other negative numbers");
|
||||||
|
}
|
||||||
|
|
||||||
|
// try connect once
|
||||||
|
if (rmConnectWaitMS < rmConnectionRetryIntervalMS) {
|
||||||
|
LOG.warn(YarnConfiguration.RESOURCEMANAGER_CONNECT_MAX_WAIT_SECS
|
||||||
|
+ " is smaller than "
|
||||||
|
+ YarnConfiguration.RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_SECS
|
||||||
|
+ ". Only try connect once.");
|
||||||
|
rmConnectWaitMS = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
RetryPolicy retryPolicy =
|
||||||
|
RetryPolicies.retryUpToMaximumTimeWithFixedSleep(rmConnectWaitMS,
|
||||||
|
rmConnectionRetryIntervalMS,
|
||||||
|
TimeUnit.MILLISECONDS);
|
||||||
|
|
||||||
|
Map<Class<? extends Exception>, RetryPolicy> exceptionToPolicyMap =
|
||||||
|
new HashMap<Class<? extends Exception>, RetryPolicy>();
|
||||||
|
exceptionToPolicyMap.put(ConnectException.class, retryPolicy);
|
||||||
|
//TO DO: after HADOOP-9576, IOException can be changed to EOFException
|
||||||
|
exceptionToPolicyMap.put(IOException.class, retryPolicy);
|
||||||
|
|
||||||
|
return RetryPolicies.retryByException(RetryPolicies.TRY_ONCE_THEN_FAIL,
|
||||||
|
exceptionToPolicyMap);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,55 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.yarn.server.api;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.InetSocketAddress;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.yarn.client.RMProxy;
|
||||||
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
|
|
||||||
|
public class ServerRMProxy<T> extends RMProxy<T>{
|
||||||
|
|
||||||
|
private static final Log LOG = LogFactory.getLog(ServerRMProxy.class);
|
||||||
|
|
||||||
|
public static <T> T createRMProxy(final Configuration conf,
|
||||||
|
final Class<T> protocol) throws IOException {
|
||||||
|
InetSocketAddress rmAddress = getRMAddress(conf, protocol);
|
||||||
|
return createRMProxy(conf, protocol, rmAddress);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static InetSocketAddress getRMAddress(Configuration conf, Class<?> protocol) {
|
||||||
|
if (protocol == ResourceTracker.class) {
|
||||||
|
return conf.getSocketAddr(
|
||||||
|
YarnConfiguration.RM_RESOURCE_TRACKER_ADDRESS,
|
||||||
|
YarnConfiguration.DEFAULT_RM_RESOURCE_TRACKER_ADDRESS,
|
||||||
|
YarnConfiguration.DEFAULT_RM_RESOURCE_TRACKER_PORT);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
String message = "Unsupported protocol found when creating the proxy " +
|
||||||
|
"connection to ResourceManager: " +
|
||||||
|
((protocol != null) ? protocol.getClass().getName() : "null");
|
||||||
|
LOG.error(message);
|
||||||
|
throw new IllegalStateException(message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -18,6 +18,7 @@
|
||||||
|
|
||||||
package org.apache.hadoop.yarn.server.api.impl.pb.client;
|
package org.apache.hadoop.yarn.server.api.impl.pb.client;
|
||||||
|
|
||||||
|
import java.io.Closeable;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.InetSocketAddress;
|
import java.net.InetSocketAddress;
|
||||||
|
|
||||||
|
@ -41,7 +42,7 @@ import org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb.RegisterNodeMan
|
||||||
|
|
||||||
import com.google.protobuf.ServiceException;
|
import com.google.protobuf.ServiceException;
|
||||||
|
|
||||||
public class ResourceTrackerPBClientImpl implements ResourceTracker {
|
public class ResourceTrackerPBClientImpl implements ResourceTracker, Closeable {
|
||||||
|
|
||||||
private ResourceTrackerPB proxy;
|
private ResourceTrackerPB proxy;
|
||||||
|
|
||||||
|
@ -50,7 +51,14 @@ private ResourceTrackerPB proxy;
|
||||||
proxy = (ResourceTrackerPB)RPC.getProxy(
|
proxy = (ResourceTrackerPB)RPC.getProxy(
|
||||||
ResourceTrackerPB.class, clientVersion, addr, conf);
|
ResourceTrackerPB.class, clientVersion, addr, conf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() {
|
||||||
|
if(this.proxy != null) {
|
||||||
|
RPC.stopProxy(this.proxy);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public RegisterNodeManagerResponse registerNodeManager(
|
public RegisterNodeManagerResponse registerNodeManager(
|
||||||
RegisterNodeManagerRequest request) throws YarnException,
|
RegisterNodeManagerRequest request) throws YarnException,
|
||||||
|
|
|
@ -19,7 +19,7 @@
|
||||||
package org.apache.hadoop.yarn.server.nodemanager;
|
package org.apache.hadoop.yarn.server.nodemanager;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.InetSocketAddress;
|
import java.net.ConnectException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
@ -33,6 +33,7 @@ import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.classification.InterfaceAudience.Private;
|
import org.apache.hadoop.classification.InterfaceAudience.Private;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.ipc.RPC;
|
||||||
import org.apache.hadoop.security.UserGroupInformation;
|
import org.apache.hadoop.security.UserGroupInformation;
|
||||||
import org.apache.hadoop.service.AbstractService;
|
import org.apache.hadoop.service.AbstractService;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||||
|
@ -47,9 +48,9 @@ import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||||
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
|
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
|
||||||
import org.apache.hadoop.yarn.factories.RecordFactory;
|
import org.apache.hadoop.yarn.factories.RecordFactory;
|
||||||
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
|
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
|
||||||
import org.apache.hadoop.yarn.ipc.YarnRPC;
|
|
||||||
import org.apache.hadoop.yarn.server.api.ResourceManagerConstants;
|
import org.apache.hadoop.yarn.server.api.ResourceManagerConstants;
|
||||||
import org.apache.hadoop.yarn.server.api.ResourceTracker;
|
import org.apache.hadoop.yarn.server.api.ResourceTracker;
|
||||||
|
import org.apache.hadoop.yarn.server.api.ServerRMProxy;
|
||||||
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatRequest;
|
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatRequest;
|
||||||
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
|
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
|
||||||
import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerRequest;
|
import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerRequest;
|
||||||
|
@ -77,7 +78,6 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
|
||||||
private NodeId nodeId;
|
private NodeId nodeId;
|
||||||
private long nextHeartBeatInterval;
|
private long nextHeartBeatInterval;
|
||||||
private ResourceTracker resourceTracker;
|
private ResourceTracker resourceTracker;
|
||||||
private InetSocketAddress rmAddress;
|
|
||||||
private Resource totalResource;
|
private Resource totalResource;
|
||||||
private int httpPort;
|
private int httpPort;
|
||||||
private volatile boolean isStopped;
|
private volatile boolean isStopped;
|
||||||
|
@ -91,9 +91,6 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
|
||||||
|
|
||||||
private final NodeHealthCheckerService healthChecker;
|
private final NodeHealthCheckerService healthChecker;
|
||||||
private final NodeManagerMetrics metrics;
|
private final NodeManagerMetrics metrics;
|
||||||
private long rmConnectWaitMS;
|
|
||||||
private long rmConnectionRetryIntervalMS;
|
|
||||||
private boolean waitForEver;
|
|
||||||
|
|
||||||
private Runnable statusUpdaterRunnable;
|
private Runnable statusUpdaterRunnable;
|
||||||
private Thread statusUpdater;
|
private Thread statusUpdater;
|
||||||
|
@ -110,11 +107,6 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void serviceInit(Configuration conf) throws Exception {
|
protected void serviceInit(Configuration conf) throws Exception {
|
||||||
this.rmAddress = conf.getSocketAddr(
|
|
||||||
YarnConfiguration.RM_RESOURCE_TRACKER_ADDRESS,
|
|
||||||
YarnConfiguration.DEFAULT_RM_RESOURCE_TRACKER_ADDRESS,
|
|
||||||
YarnConfiguration.DEFAULT_RM_RESOURCE_TRACKER_PORT);
|
|
||||||
|
|
||||||
int memoryMb =
|
int memoryMb =
|
||||||
conf.getInt(
|
conf.getInt(
|
||||||
YarnConfiguration.NM_PMEM_MB, YarnConfiguration.DEFAULT_NM_PMEM_MB);
|
YarnConfiguration.NM_PMEM_MB, YarnConfiguration.DEFAULT_NM_PMEM_MB);
|
||||||
|
@ -153,6 +145,7 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
|
||||||
try {
|
try {
|
||||||
// Registration has to be in start so that ContainerManager can get the
|
// Registration has to be in start so that ContainerManager can get the
|
||||||
// perNM tokens needed to authenticate ContainerTokens.
|
// perNM tokens needed to authenticate ContainerTokens.
|
||||||
|
this.resourceTracker = getRMClient();
|
||||||
registerWithRM();
|
registerWithRM();
|
||||||
super.serviceStart();
|
super.serviceStart();
|
||||||
startStatusUpdater();
|
startStatusUpdater();
|
||||||
|
@ -167,6 +160,7 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
|
||||||
protected void serviceStop() throws Exception {
|
protected void serviceStop() throws Exception {
|
||||||
// Interrupt the updater.
|
// Interrupt the updater.
|
||||||
this.isStopped = true;
|
this.isStopped = true;
|
||||||
|
stopRMProxy();
|
||||||
super.serviceStop();
|
super.serviceStop();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -188,6 +182,13 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
protected void stopRMProxy() {
|
||||||
|
if(this.resourceTracker != null) {
|
||||||
|
RPC.stopProxy(this.resourceTracker);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Private
|
@Private
|
||||||
protected boolean isTokenKeepAliveEnabled(Configuration conf) {
|
protected boolean isTokenKeepAliveEnabled(Configuration conf) {
|
||||||
return conf.getBoolean(YarnConfiguration.LOG_AGGREGATION_ENABLED,
|
return conf.getBoolean(YarnConfiguration.LOG_AGGREGATION_ENABLED,
|
||||||
|
@ -195,93 +196,22 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
|
||||||
&& UserGroupInformation.isSecurityEnabled();
|
&& UserGroupInformation.isSecurityEnabled();
|
||||||
}
|
}
|
||||||
|
|
||||||
protected ResourceTracker getRMClient() {
|
@VisibleForTesting
|
||||||
|
protected ResourceTracker getRMClient() throws IOException {
|
||||||
Configuration conf = getConfig();
|
Configuration conf = getConfig();
|
||||||
YarnRPC rpc = YarnRPC.create(conf);
|
return ServerRMProxy.createRMProxy(conf, ResourceTracker.class);
|
||||||
return (ResourceTracker) rpc.getProxy(ResourceTracker.class, rmAddress,
|
|
||||||
conf);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
protected void registerWithRM() throws YarnException, IOException {
|
protected void registerWithRM() throws YarnException, IOException {
|
||||||
Configuration conf = getConfig();
|
|
||||||
rmConnectWaitMS =
|
|
||||||
conf.getInt(
|
|
||||||
YarnConfiguration.RESOURCEMANAGER_CONNECT_WAIT_SECS,
|
|
||||||
YarnConfiguration.DEFAULT_RESOURCEMANAGER_CONNECT_WAIT_SECS)
|
|
||||||
* 1000;
|
|
||||||
rmConnectionRetryIntervalMS =
|
|
||||||
conf.getLong(
|
|
||||||
YarnConfiguration.RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_SECS,
|
|
||||||
YarnConfiguration
|
|
||||||
.DEFAULT_RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_SECS)
|
|
||||||
* 1000;
|
|
||||||
|
|
||||||
if(rmConnectionRetryIntervalMS < 0) {
|
|
||||||
throw new YarnRuntimeException("Invalid Configuration. " +
|
|
||||||
YarnConfiguration.RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_SECS +
|
|
||||||
" should not be negative.");
|
|
||||||
}
|
|
||||||
|
|
||||||
waitForEver = (rmConnectWaitMS == -1000);
|
|
||||||
|
|
||||||
if(! waitForEver) {
|
|
||||||
if(rmConnectWaitMS < 0) {
|
|
||||||
throw new YarnRuntimeException("Invalid Configuration. " +
|
|
||||||
YarnConfiguration.RESOURCEMANAGER_CONNECT_WAIT_SECS +
|
|
||||||
" can be -1, but can not be other negative numbers");
|
|
||||||
}
|
|
||||||
|
|
||||||
//try connect once
|
|
||||||
if(rmConnectWaitMS < rmConnectionRetryIntervalMS) {
|
|
||||||
LOG.warn(YarnConfiguration.RESOURCEMANAGER_CONNECT_WAIT_SECS
|
|
||||||
+ " is smaller than "
|
|
||||||
+ YarnConfiguration.RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_SECS
|
|
||||||
+ ". Only try connect once.");
|
|
||||||
rmConnectWaitMS = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int rmRetryCount = 0;
|
|
||||||
long waitStartTime = System.currentTimeMillis();
|
|
||||||
|
|
||||||
RegisterNodeManagerRequest request =
|
RegisterNodeManagerRequest request =
|
||||||
recordFactory.newRecordInstance(RegisterNodeManagerRequest.class);
|
recordFactory.newRecordInstance(RegisterNodeManagerRequest.class);
|
||||||
request.setHttpPort(this.httpPort);
|
request.setHttpPort(this.httpPort);
|
||||||
request.setResource(this.totalResource);
|
request.setResource(this.totalResource);
|
||||||
request.setNodeId(this.nodeId);
|
request.setNodeId(this.nodeId);
|
||||||
RegisterNodeManagerResponse regNMResponse;
|
RegisterNodeManagerResponse regNMResponse =
|
||||||
|
resourceTracker.registerNodeManager(request);
|
||||||
while(true) {
|
this.rmIdentifier = regNMResponse.getRMIdentifier();
|
||||||
try {
|
|
||||||
rmRetryCount++;
|
|
||||||
LOG.info("Connecting to ResourceManager at " + this.rmAddress
|
|
||||||
+ ". current no. of attempts is " + rmRetryCount);
|
|
||||||
this.resourceTracker = getRMClient();
|
|
||||||
regNMResponse =
|
|
||||||
this.resourceTracker.registerNodeManager(request);
|
|
||||||
this.rmIdentifier = regNMResponse.getRMIdentifier();
|
|
||||||
break;
|
|
||||||
} catch(Throwable e) {
|
|
||||||
LOG.warn("Trying to connect to ResourceManager, " +
|
|
||||||
"current no. of failed attempts is "+rmRetryCount);
|
|
||||||
if(System.currentTimeMillis() - waitStartTime < rmConnectWaitMS
|
|
||||||
|| waitForEver) {
|
|
||||||
try {
|
|
||||||
LOG.info("Sleeping for " + rmConnectionRetryIntervalMS/1000
|
|
||||||
+ " seconds before next connection retry to RM");
|
|
||||||
Thread.sleep(rmConnectionRetryIntervalMS);
|
|
||||||
} catch(InterruptedException ex) {
|
|
||||||
//done nothing
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
String errorMessage = "Failed to Connect to RM, " +
|
|
||||||
"no. of failed attempts is "+rmRetryCount;
|
|
||||||
LOG.error(errorMessage,e);
|
|
||||||
throw new YarnRuntimeException(errorMessage,e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// if the Resourcemanager instructs NM to shutdown.
|
// if the Resourcemanager instructs NM to shutdown.
|
||||||
if (NodeAction.SHUTDOWN.equals(regNMResponse.getNodeAction())) {
|
if (NodeAction.SHUTDOWN.equals(regNMResponse.getNodeAction())) {
|
||||||
String message =
|
String message =
|
||||||
|
@ -426,8 +356,6 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
|
||||||
// Send heartbeat
|
// Send heartbeat
|
||||||
try {
|
try {
|
||||||
NodeHeartbeatResponse response = null;
|
NodeHeartbeatResponse response = null;
|
||||||
int rmRetryCount = 0;
|
|
||||||
long waitStartTime = System.currentTimeMillis();
|
|
||||||
NodeStatus nodeStatus = getNodeStatusAndUpdateContainersInContext();
|
NodeStatus nodeStatus = getNodeStatusAndUpdateContainersInContext();
|
||||||
nodeStatus.setResponseId(lastHeartBeatID);
|
nodeStatus.setResponseId(lastHeartBeatID);
|
||||||
|
|
||||||
|
@ -440,31 +368,7 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
|
||||||
request
|
request
|
||||||
.setLastKnownNMTokenMasterKey(NodeStatusUpdaterImpl.this.context
|
.setLastKnownNMTokenMasterKey(NodeStatusUpdaterImpl.this.context
|
||||||
.getNMTokenSecretManager().getCurrentKey());
|
.getNMTokenSecretManager().getCurrentKey());
|
||||||
while (!isStopped) {
|
response = resourceTracker.nodeHeartbeat(request);
|
||||||
try {
|
|
||||||
rmRetryCount++;
|
|
||||||
response = resourceTracker.nodeHeartbeat(request);
|
|
||||||
break;
|
|
||||||
} catch (Throwable e) {
|
|
||||||
LOG.warn("Trying to heartbeat to ResourceManager, "
|
|
||||||
+ "current no. of failed attempts is " + rmRetryCount);
|
|
||||||
if(System.currentTimeMillis() - waitStartTime < rmConnectWaitMS
|
|
||||||
|| waitForEver) {
|
|
||||||
try {
|
|
||||||
LOG.info("Sleeping for " + rmConnectionRetryIntervalMS/1000
|
|
||||||
+ " seconds before next heartbeat to RM");
|
|
||||||
Thread.sleep(rmConnectionRetryIntervalMS);
|
|
||||||
} catch(InterruptedException ex) {
|
|
||||||
//done nothing
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
String errorMessage = "Failed to heartbeat to RM, " +
|
|
||||||
"no. of failed attempts is "+rmRetryCount;
|
|
||||||
LOG.error(errorMessage,e);
|
|
||||||
throw new YarnRuntimeException(errorMessage,e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
//get next heartbeat interval from response
|
//get next heartbeat interval from response
|
||||||
nextHeartBeatInterval = response.getNextHeartBeatInterval();
|
nextHeartBeatInterval = response.getNextHeartBeatInterval();
|
||||||
updateMasterKeys(response);
|
updateMasterKeys(response);
|
||||||
|
@ -508,11 +412,11 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
|
||||||
dispatcher.getEventHandler().handle(
|
dispatcher.getEventHandler().handle(
|
||||||
new CMgrCompletedAppsEvent(appsToCleanup));
|
new CMgrCompletedAppsEvent(appsToCleanup));
|
||||||
}
|
}
|
||||||
} catch (YarnRuntimeException e) {
|
} catch (ConnectException e) {
|
||||||
//catch and throw the exception if tried MAX wait time to connect RM
|
//catch and throw the exception if tried MAX wait time to connect RM
|
||||||
dispatcher.getEventHandler().handle(
|
dispatcher.getEventHandler().handle(
|
||||||
new NodeManagerEvent(NodeManagerEventType.SHUTDOWN));
|
new NodeManagerEvent(NodeManagerEventType.SHUTDOWN));
|
||||||
throw e;
|
throw new YarnRuntimeException(e);
|
||||||
} catch (Throwable e) {
|
} catch (Throwable e) {
|
||||||
// TODO Better error handling. Thread can die with the rest of the
|
// TODO Better error handling. Thread can die with the rest of the
|
||||||
// NM still running.
|
// NM still running.
|
||||||
|
|
|
@ -61,6 +61,10 @@ public class MockNodeStatusUpdater extends NodeStatusUpdaterImpl {
|
||||||
protected ResourceTracker getRMClient() {
|
protected ResourceTracker getRMClient() {
|
||||||
return resourceTracker;
|
return resourceTracker;
|
||||||
}
|
}
|
||||||
|
@Override
|
||||||
|
protected void stopRMProxy() {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
private static class MockResourceTracker implements ResourceTracker {
|
private static class MockResourceTracker implements ResourceTracker {
|
||||||
private int heartBeatID;
|
private int heartBeatID;
|
||||||
|
|
|
@ -107,6 +107,11 @@ public class TestEventFlow {
|
||||||
return new LocalRMInterface();
|
return new LocalRMInterface();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void stopRMProxy() {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void startStatusUpdater() {
|
protected void startStatusUpdater() {
|
||||||
return; // Don't start any updating thread.
|
return; // Don't start any updating thread.
|
||||||
|
|
|
@ -41,6 +41,8 @@ import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FileContext;
|
import org.apache.hadoop.fs.FileContext;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.io.retry.RetryPolicy;
|
||||||
|
import org.apache.hadoop.io.retry.RetryProxy;
|
||||||
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
|
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
|
||||||
import org.apache.hadoop.net.NetUtils;
|
import org.apache.hadoop.net.NetUtils;
|
||||||
import org.apache.hadoop.service.ServiceOperations;
|
import org.apache.hadoop.service.ServiceOperations;
|
||||||
|
@ -53,6 +55,7 @@ import org.apache.hadoop.yarn.api.records.ContainerState;
|
||||||
import org.apache.hadoop.yarn.api.records.ContainerStatus;
|
import org.apache.hadoop.yarn.api.records.ContainerStatus;
|
||||||
import org.apache.hadoop.yarn.api.records.NodeId;
|
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||||
import org.apache.hadoop.yarn.api.records.Resource;
|
import org.apache.hadoop.yarn.api.records.Resource;
|
||||||
|
import org.apache.hadoop.yarn.client.RMProxy;
|
||||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
import org.apache.hadoop.yarn.event.Dispatcher;
|
import org.apache.hadoop.yarn.event.Dispatcher;
|
||||||
import org.apache.hadoop.yarn.event.EventHandler;
|
import org.apache.hadoop.yarn.event.EventHandler;
|
||||||
|
@ -60,9 +63,9 @@ import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||||
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
|
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
|
||||||
import org.apache.hadoop.yarn.factories.RecordFactory;
|
import org.apache.hadoop.yarn.factories.RecordFactory;
|
||||||
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
|
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
|
||||||
import org.apache.hadoop.yarn.ipc.RPCUtil;
|
|
||||||
import org.apache.hadoop.yarn.security.ContainerTokenIdentifier;
|
import org.apache.hadoop.yarn.security.ContainerTokenIdentifier;
|
||||||
import org.apache.hadoop.yarn.server.api.ResourceTracker;
|
import org.apache.hadoop.yarn.server.api.ResourceTracker;
|
||||||
|
import org.apache.hadoop.yarn.server.api.ServerRMProxy;
|
||||||
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatRequest;
|
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatRequest;
|
||||||
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
|
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
|
||||||
import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerRequest;
|
import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerRequest;
|
||||||
|
@ -103,11 +106,17 @@ public class TestNodeStatusUpdater {
|
||||||
volatile int heartBeatID = 0;
|
volatile int heartBeatID = 0;
|
||||||
volatile Throwable nmStartError = null;
|
volatile Throwable nmStartError = null;
|
||||||
private final List<NodeId> registeredNodes = new ArrayList<NodeId>();
|
private final List<NodeId> registeredNodes = new ArrayList<NodeId>();
|
||||||
private final Configuration conf = createNMConfig();
|
private boolean triggered = false;
|
||||||
|
private Configuration conf;
|
||||||
private NodeManager nm;
|
private NodeManager nm;
|
||||||
private boolean containerStatusBackupSuccessfully = true;
|
private boolean containerStatusBackupSuccessfully = true;
|
||||||
private List<ContainerStatus> completedContainerStatusList = new ArrayList<ContainerStatus>();
|
private List<ContainerStatus> completedContainerStatusList = new ArrayList<ContainerStatus>();
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void setUp() {
|
||||||
|
conf = createNMConfig();
|
||||||
|
}
|
||||||
|
|
||||||
@After
|
@After
|
||||||
public void tearDown() {
|
public void tearDown() {
|
||||||
this.registeredNodes.clear();
|
this.registeredNodes.clear();
|
||||||
|
@ -274,6 +283,11 @@ public class TestNodeStatusUpdater {
|
||||||
protected ResourceTracker getRMClient() {
|
protected ResourceTracker getRMClient() {
|
||||||
return resourceTracker;
|
return resourceTracker;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void stopRMProxy() {
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private class MyNodeStatusUpdater2 extends NodeStatusUpdaterImpl {
|
private class MyNodeStatusUpdater2 extends NodeStatusUpdaterImpl {
|
||||||
|
@ -290,6 +304,10 @@ public class TestNodeStatusUpdater {
|
||||||
return resourceTracker;
|
return resourceTracker;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void stopRMProxy() {
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private class MyNodeStatusUpdater3 extends NodeStatusUpdaterImpl {
|
private class MyNodeStatusUpdater3 extends NodeStatusUpdaterImpl {
|
||||||
|
@ -307,7 +325,12 @@ public class TestNodeStatusUpdater {
|
||||||
protected ResourceTracker getRMClient() {
|
protected ResourceTracker getRMClient() {
|
||||||
return resourceTracker;
|
return resourceTracker;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void stopRMProxy() {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected boolean isTokenKeepAliveEnabled(Configuration conf) {
|
protected boolean isTokenKeepAliveEnabled(Configuration conf) {
|
||||||
return true;
|
return true;
|
||||||
|
@ -315,21 +338,16 @@ public class TestNodeStatusUpdater {
|
||||||
}
|
}
|
||||||
|
|
||||||
private class MyNodeStatusUpdater4 extends NodeStatusUpdaterImpl {
|
private class MyNodeStatusUpdater4 extends NodeStatusUpdaterImpl {
|
||||||
public ResourceTracker resourceTracker =
|
|
||||||
new MyResourceTracker(this.context);
|
|
||||||
private Context context;
|
private Context context;
|
||||||
private long waitStartTime;
|
|
||||||
private final long rmStartIntervalMS;
|
private final long rmStartIntervalMS;
|
||||||
private final boolean rmNeverStart;
|
private final boolean rmNeverStart;
|
||||||
private volatile boolean triggered = false;
|
public ResourceTracker resourceTracker;
|
||||||
private long durationWhenTriggered = -1;
|
|
||||||
|
|
||||||
public MyNodeStatusUpdater4(Context context, Dispatcher dispatcher,
|
public MyNodeStatusUpdater4(Context context, Dispatcher dispatcher,
|
||||||
NodeHealthCheckerService healthChecker, NodeManagerMetrics metrics,
|
NodeHealthCheckerService healthChecker, NodeManagerMetrics metrics,
|
||||||
long rmStartIntervalMS, boolean rmNeverStart) {
|
long rmStartIntervalMS, boolean rmNeverStart) {
|
||||||
super(context, dispatcher, healthChecker, metrics);
|
super(context, dispatcher, healthChecker, metrics);
|
||||||
this.context = context;
|
this.context = context;
|
||||||
this.waitStartTime = System.currentTimeMillis();
|
|
||||||
this.rmStartIntervalMS = rmStartIntervalMS;
|
this.rmStartIntervalMS = rmStartIntervalMS;
|
||||||
this.rmNeverStart = rmNeverStart;
|
this.rmNeverStart = rmNeverStart;
|
||||||
}
|
}
|
||||||
|
@ -337,25 +355,16 @@ public class TestNodeStatusUpdater {
|
||||||
@Override
|
@Override
|
||||||
protected void serviceStart() throws Exception {
|
protected void serviceStart() throws Exception {
|
||||||
//record the startup time
|
//record the startup time
|
||||||
this.waitStartTime = System.currentTimeMillis();
|
|
||||||
super.serviceStart();
|
super.serviceStart();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected ResourceTracker getRMClient() {
|
protected ResourceTracker getRMClient() throws IOException {
|
||||||
if (!triggered) {
|
RetryPolicy retryPolicy = RMProxy.createRetryPolicy(conf);
|
||||||
long t = System.currentTimeMillis();
|
resourceTracker =
|
||||||
long duration = t - waitStartTime;
|
(ResourceTracker) RetryProxy.create(ResourceTracker.class,
|
||||||
if (duration <= rmStartIntervalMS
|
new MyResourceTracker6(this.context, rmStartIntervalMS,
|
||||||
|| rmNeverStart) {
|
rmNeverStart), retryPolicy);
|
||||||
throw new YarnRuntimeException("Faking RM start failure as start " +
|
|
||||||
"delay timer has not expired.");
|
|
||||||
} else {
|
|
||||||
//triggering
|
|
||||||
triggered = true;
|
|
||||||
durationWhenTriggered = duration;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return resourceTracker;
|
return resourceTracker;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -363,37 +372,35 @@ public class TestNodeStatusUpdater {
|
||||||
return triggered;
|
return triggered;
|
||||||
}
|
}
|
||||||
|
|
||||||
private long getWaitStartTime() {
|
|
||||||
return waitStartTime;
|
|
||||||
}
|
|
||||||
|
|
||||||
private long getDurationWhenTriggered() {
|
|
||||||
return durationWhenTriggered;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
protected void stopRMProxy() {
|
||||||
return "MyNodeStatusUpdater4{" +
|
return;
|
||||||
"rmNeverStart=" + rmNeverStart +
|
|
||||||
", triggered=" + triggered +
|
|
||||||
", duration=" + durationWhenTriggered +
|
|
||||||
", rmStartIntervalMS=" + rmStartIntervalMS +
|
|
||||||
'}';
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
private class MyNodeStatusUpdater5 extends NodeStatusUpdaterImpl {
|
private class MyNodeStatusUpdater5 extends NodeStatusUpdaterImpl {
|
||||||
private ResourceTracker resourceTracker;
|
private ResourceTracker resourceTracker;
|
||||||
|
private Configuration conf;
|
||||||
|
|
||||||
public MyNodeStatusUpdater5(Context context, Dispatcher dispatcher,
|
public MyNodeStatusUpdater5(Context context, Dispatcher dispatcher,
|
||||||
NodeHealthCheckerService healthChecker, NodeManagerMetrics metrics) {
|
NodeHealthCheckerService healthChecker, NodeManagerMetrics metrics, Configuration conf) {
|
||||||
super(context, dispatcher, healthChecker, metrics);
|
super(context, dispatcher, healthChecker, metrics);
|
||||||
resourceTracker = new MyResourceTracker5();
|
resourceTracker = new MyResourceTracker5();
|
||||||
|
this.conf = conf;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected ResourceTracker getRMClient() {
|
protected ResourceTracker getRMClient() {
|
||||||
return resourceTracker;
|
RetryPolicy retryPolicy = RMProxy.createRetryPolicy(conf);
|
||||||
|
return (ResourceTracker) RetryProxy.create(ResourceTracker.class,
|
||||||
|
resourceTracker, retryPolicy);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void stopRMProxy() {
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -417,15 +424,18 @@ public class TestNodeStatusUpdater {
|
||||||
public boolean isStopped = false;
|
public boolean isStopped = false;
|
||||||
private NodeStatusUpdater nodeStatusUpdater;
|
private NodeStatusUpdater nodeStatusUpdater;
|
||||||
private CyclicBarrier syncBarrier;
|
private CyclicBarrier syncBarrier;
|
||||||
public MyNodeManager2 (CyclicBarrier syncBarrier) {
|
private Configuration conf;
|
||||||
|
|
||||||
|
public MyNodeManager2 (CyclicBarrier syncBarrier, Configuration conf) {
|
||||||
this.syncBarrier = syncBarrier;
|
this.syncBarrier = syncBarrier;
|
||||||
|
this.conf = conf;
|
||||||
}
|
}
|
||||||
@Override
|
@Override
|
||||||
protected NodeStatusUpdater createNodeStatusUpdater(Context context,
|
protected NodeStatusUpdater createNodeStatusUpdater(Context context,
|
||||||
Dispatcher dispatcher, NodeHealthCheckerService healthChecker) {
|
Dispatcher dispatcher, NodeHealthCheckerService healthChecker) {
|
||||||
nodeStatusUpdater =
|
nodeStatusUpdater =
|
||||||
new MyNodeStatusUpdater5(context, dispatcher, healthChecker,
|
new MyNodeStatusUpdater5(context, dispatcher, healthChecker,
|
||||||
metrics);
|
metrics, conf);
|
||||||
return nodeStatusUpdater;
|
return nodeStatusUpdater;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -577,7 +587,7 @@ public class TestNodeStatusUpdater {
|
||||||
.get(4).getState() == ContainerState.RUNNING
|
.get(4).getState() == ContainerState.RUNNING
|
||||||
&& request.getNodeStatus().getContainersStatuses().get(4)
|
&& request.getNodeStatus().getContainersStatuses().get(4)
|
||||||
.getContainerId().getId() == 5);
|
.getContainerId().getId() == 5);
|
||||||
throw new YarnRuntimeException("Lost the heartbeat response");
|
throw new java.net.ConnectException("Lost the heartbeat response");
|
||||||
} else if (heartBeatID == 2) {
|
} else if (heartBeatID == 2) {
|
||||||
Assert.assertEquals(request.getNodeStatus().getContainersStatuses()
|
Assert.assertEquals(request.getNodeStatus().getContainersStatuses()
|
||||||
.size(), 7);
|
.size(), 7);
|
||||||
|
@ -646,7 +656,63 @@ public class TestNodeStatusUpdater {
|
||||||
public NodeHeartbeatResponse nodeHeartbeat(NodeHeartbeatRequest request)
|
public NodeHeartbeatResponse nodeHeartbeat(NodeHeartbeatRequest request)
|
||||||
throws YarnException, IOException {
|
throws YarnException, IOException {
|
||||||
heartBeatID++;
|
heartBeatID++;
|
||||||
throw RPCUtil.getRemoteException("NodeHeartbeat exception");
|
throw new java.net.ConnectException(
|
||||||
|
"NodeHeartbeat exception");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private class MyResourceTracker6 implements ResourceTracker {
|
||||||
|
|
||||||
|
private final Context context;
|
||||||
|
private long rmStartIntervalMS;
|
||||||
|
private boolean rmNeverStart;
|
||||||
|
private final long waitStartTime;
|
||||||
|
|
||||||
|
public MyResourceTracker6(Context context, long rmStartIntervalMS,
|
||||||
|
boolean rmNeverStart) {
|
||||||
|
this.context = context;
|
||||||
|
this.rmStartIntervalMS = rmStartIntervalMS;
|
||||||
|
this.rmNeverStart = rmNeverStart;
|
||||||
|
this.waitStartTime = System.currentTimeMillis();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public RegisterNodeManagerResponse registerNodeManager(
|
||||||
|
RegisterNodeManagerRequest request) throws YarnException, IOException,
|
||||||
|
IOException {
|
||||||
|
if (System.currentTimeMillis() - waitStartTime <= rmStartIntervalMS
|
||||||
|
|| rmNeverStart) {
|
||||||
|
throw new java.net.ConnectException("Faking RM start failure as start "
|
||||||
|
+ "delay timer has not expired.");
|
||||||
|
} else {
|
||||||
|
NodeId nodeId = request.getNodeId();
|
||||||
|
Resource resource = request.getResource();
|
||||||
|
LOG.info("Registering " + nodeId.toString());
|
||||||
|
// NOTE: this really should be checking against the config value
|
||||||
|
InetSocketAddress expected = NetUtils.getConnectAddress(
|
||||||
|
conf.getSocketAddr(YarnConfiguration.NM_ADDRESS, null, -1));
|
||||||
|
Assert.assertEquals(NetUtils.getHostPortString(expected),
|
||||||
|
nodeId.toString());
|
||||||
|
Assert.assertEquals(5 * 1024, resource.getMemory());
|
||||||
|
registeredNodes.add(nodeId);
|
||||||
|
|
||||||
|
RegisterNodeManagerResponse response = recordFactory
|
||||||
|
.newRecordInstance(RegisterNodeManagerResponse.class);
|
||||||
|
triggered = true;
|
||||||
|
return response;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public NodeHeartbeatResponse nodeHeartbeat(NodeHeartbeatRequest request)
|
||||||
|
throws YarnException, IOException {
|
||||||
|
NodeStatus nodeStatus = request.getNodeStatus();
|
||||||
|
nodeStatus.setResponseId(heartBeatID++);
|
||||||
|
|
||||||
|
NodeHeartbeatResponse nhResponse = YarnServerBuilderUtils.
|
||||||
|
newNodeHeartbeatResponse(heartBeatID, NodeAction.NORMAL, null,
|
||||||
|
null, null, null, 1000L);
|
||||||
|
return nhResponse;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -843,8 +909,7 @@ public class TestNodeStatusUpdater {
|
||||||
final long connectionRetryIntervalSecs = 1;
|
final long connectionRetryIntervalSecs = 1;
|
||||||
//Waiting for rmStartIntervalMS, RM will be started
|
//Waiting for rmStartIntervalMS, RM will be started
|
||||||
final long rmStartIntervalMS = 2*1000;
|
final long rmStartIntervalMS = 2*1000;
|
||||||
YarnConfiguration conf = createNMConfig();
|
conf.setLong(YarnConfiguration.RESOURCEMANAGER_CONNECT_MAX_WAIT_SECS,
|
||||||
conf.setLong(YarnConfiguration.RESOURCEMANAGER_CONNECT_WAIT_SECS,
|
|
||||||
connectionWaitSecs);
|
connectionWaitSecs);
|
||||||
conf.setLong(YarnConfiguration
|
conf.setLong(YarnConfiguration
|
||||||
.RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_SECS,
|
.RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_SECS,
|
||||||
|
@ -907,8 +972,6 @@ public class TestNodeStatusUpdater {
|
||||||
}
|
}
|
||||||
long duration = System.currentTimeMillis() - waitStartTime;
|
long duration = System.currentTimeMillis() - waitStartTime;
|
||||||
MyNodeStatusUpdater4 myUpdater = (MyNodeStatusUpdater4) updater;
|
MyNodeStatusUpdater4 myUpdater = (MyNodeStatusUpdater4) updater;
|
||||||
Assert.assertTrue("Updater was never started",
|
|
||||||
myUpdater.getWaitStartTime()>0);
|
|
||||||
Assert.assertTrue("NM started before updater triggered",
|
Assert.assertTrue("NM started before updater triggered",
|
||||||
myUpdater.isTriggered());
|
myUpdater.isTriggered());
|
||||||
Assert.assertTrue("NM should have connected to RM after "
|
Assert.assertTrue("NM should have connected to RM after "
|
||||||
|
@ -1037,13 +1100,13 @@ public class TestNodeStatusUpdater {
|
||||||
final long connectionWaitSecs = 1;
|
final long connectionWaitSecs = 1;
|
||||||
final long connectionRetryIntervalSecs = 1;
|
final long connectionRetryIntervalSecs = 1;
|
||||||
YarnConfiguration conf = createNMConfig();
|
YarnConfiguration conf = createNMConfig();
|
||||||
conf.setLong(YarnConfiguration.RESOURCEMANAGER_CONNECT_WAIT_SECS,
|
conf.setLong(YarnConfiguration.RESOURCEMANAGER_CONNECT_MAX_WAIT_SECS,
|
||||||
connectionWaitSecs);
|
connectionWaitSecs);
|
||||||
conf.setLong(YarnConfiguration
|
conf.setLong(YarnConfiguration
|
||||||
.RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_SECS,
|
.RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_SECS,
|
||||||
connectionRetryIntervalSecs);
|
connectionRetryIntervalSecs);
|
||||||
CyclicBarrier syncBarrier = new CyclicBarrier(2);
|
CyclicBarrier syncBarrier = new CyclicBarrier(2);
|
||||||
nm = new MyNodeManager2(syncBarrier);
|
nm = new MyNodeManager2(syncBarrier, conf);
|
||||||
nm.init(conf);
|
nm.init(conf);
|
||||||
nm.start();
|
nm.start();
|
||||||
try {
|
try {
|
||||||
|
|
|
@ -117,6 +117,11 @@ public abstract class BaseContainerManagerTest {
|
||||||
return new LocalRMInterface();
|
return new LocalRMInterface();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void stopRMProxy() {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void startStatusUpdater() {
|
protected void startStatusUpdater() {
|
||||||
return; // Don't start any updating thread.
|
return; // Don't start any updating thread.
|
||||||
|
|
|
@ -390,6 +390,11 @@ public class MiniYARNCluster extends CompositeService {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void stopRMProxy() {
|
||||||
|
return;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue