YARN-9425. Make initialDelay configurable for FederationStateStoreService#scheduledExecutorService (#4731). Contributed by groot and Shen Yinjie.

Signed-off-by: Ayush Saxena <ayushsaxena@apache.org>
This commit is contained in:
Ashutosh Gupta 2022-08-21 23:10:00 +01:00 committed by GitHub
parent 7f176d080c
commit c294a414b9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 69 additions and 3 deletions

View File

@ -3920,6 +3920,13 @@ public class YarnConfiguration extends Configuration {
public static final String DEFAULT_FEDERATION_REGISTRY_BASE_KEY = public static final String DEFAULT_FEDERATION_REGISTRY_BASE_KEY =
"yarnfederation/"; "yarnfederation/";
public static final String FEDERATION_STATESTORE_HEARTBEAT_INITIAL_DELAY =
FEDERATION_PREFIX + "state-store.heartbeat.initial-delay";
// 30 secs
public static final int
DEFAULT_FEDERATION_STATESTORE_HEARTBEAT_INITIAL_DELAY = 30;
public static final String FEDERATION_STATESTORE_HEARTBEAT_INTERVAL_SECS = public static final String FEDERATION_STATESTORE_HEARTBEAT_INTERVAL_SECS =
FEDERATION_PREFIX + "state-store.heartbeat-interval-secs"; FEDERATION_PREFIX + "state-store.heartbeat-interval-secs";

View File

@ -3624,6 +3624,16 @@
<name>yarn.federation.enabled</name> <name>yarn.federation.enabled</name>
<value>false</value> <value>false</value>
</property> </property>
<property>
<description>
Initial delay for federation state-store heartbeat service. Value is followed by a unit
specifier: ns, us, ms, s, m, h, d for nanoseconds, microseconds, milliseconds, seconds,
minutes, hours, days respectively. Values should provide units,
but seconds are assumed
</description>
<name>yarn.federation.state-store.heartbeat.initial-delay</name>
<value>30s</value>
</property>
<property> <property>
<description> <description>
Machine list file to be loaded by the FederationSubCluster Resolver Machine list file to be loaded by the FederationSubCluster Resolver

View File

@ -96,6 +96,7 @@ public class FederationStateStoreService extends AbstractService
private FederationStateStore stateStoreClient = null; private FederationStateStore stateStoreClient = null;
private SubClusterId subClusterId; private SubClusterId subClusterId;
private long heartbeatInterval; private long heartbeatInterval;
private long heartbeatInitialDelay;
private RMContext rmContext; private RMContext rmContext;
public FederationStateStoreService(RMContext rmContext) { public FederationStateStoreService(RMContext rmContext) {
@ -126,10 +127,24 @@ public class FederationStateStoreService extends AbstractService
heartbeatInterval = conf.getLong( heartbeatInterval = conf.getLong(
YarnConfiguration.FEDERATION_STATESTORE_HEARTBEAT_INTERVAL_SECS, YarnConfiguration.FEDERATION_STATESTORE_HEARTBEAT_INTERVAL_SECS,
YarnConfiguration.DEFAULT_FEDERATION_STATESTORE_HEARTBEAT_INTERVAL_SECS); YarnConfiguration.DEFAULT_FEDERATION_STATESTORE_HEARTBEAT_INTERVAL_SECS);
if (heartbeatInterval <= 0) { if (heartbeatInterval <= 0) {
heartbeatInterval = heartbeatInterval =
YarnConfiguration.DEFAULT_FEDERATION_STATESTORE_HEARTBEAT_INTERVAL_SECS; YarnConfiguration.DEFAULT_FEDERATION_STATESTORE_HEARTBEAT_INTERVAL_SECS;
} }
heartbeatInitialDelay = conf.getTimeDuration(
YarnConfiguration.FEDERATION_STATESTORE_HEARTBEAT_INITIAL_DELAY,
YarnConfiguration.DEFAULT_FEDERATION_STATESTORE_HEARTBEAT_INITIAL_DELAY,
TimeUnit.SECONDS);
if (heartbeatInitialDelay <= 0) {
LOG.warn("{} configured value is wrong, must be > 0; using default value of {}",
YarnConfiguration.FEDERATION_STATESTORE_HEARTBEAT_INITIAL_DELAY,
YarnConfiguration.DEFAULT_FEDERATION_STATESTORE_HEARTBEAT_INITIAL_DELAY);
heartbeatInitialDelay =
YarnConfiguration.DEFAULT_FEDERATION_STATESTORE_HEARTBEAT_INITIAL_DELAY;
}
LOG.info("Initialized federation membership service."); LOG.info("Initialized federation membership service.");
super.serviceInit(conf); super.serviceInit(conf);
@ -206,9 +221,9 @@ public class FederationStateStoreService extends AbstractService
scheduledExecutorService = scheduledExecutorService =
HadoopExecutors.newSingleThreadScheduledExecutor(); HadoopExecutors.newSingleThreadScheduledExecutor();
scheduledExecutorService.scheduleWithFixedDelay(stateStoreHeartbeat, scheduledExecutorService.scheduleWithFixedDelay(stateStoreHeartbeat,
heartbeatInterval, heartbeatInterval, TimeUnit.SECONDS); heartbeatInitialDelay, heartbeatInterval, TimeUnit.SECONDS);
LOG.info("Started federation membership heartbeat with interval: {}", LOG.info("Started federation membership heartbeat with interval: {} and initial delay: {}",
heartbeatInterval); heartbeatInterval, heartbeatInitialDelay);
} }
@VisibleForTesting @VisibleForTesting

View File

@ -25,6 +25,7 @@ import javax.xml.bind.JAXBException;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.ha.HAServiceProtocol; import org.apache.hadoop.ha.HAServiceProtocol;
import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.server.federation.store.FederationStateStore; import org.apache.hadoop.yarn.server.federation.store.FederationStateStore;
@ -173,4 +174,37 @@ public class TestFederationRMStateStoreService {
return response.getCapability(); return response.getCapability();
} }
@Test
public void testFederationStateStoreServiceInitialHeartbeatDelay() throws Exception {
conf.setBoolean(YarnConfiguration.FEDERATION_ENABLED, true);
conf.setInt(YarnConfiguration.FEDERATION_STATESTORE_HEARTBEAT_INITIAL_DELAY, 10);
conf.set(YarnConfiguration.RM_CLUSTER_ID, subClusterId.getId());
GenericTestUtils.LogCapturer logCapture =
GenericTestUtils.LogCapturer.captureLogs(FederationStateStoreService.LOG);
final MockRM rm = new MockRM(conf);
// Initially there should be no entry for the sub-cluster
rm.init(conf);
stateStore = rm.getFederationStateStoreService().getStateStoreClient();
GetSubClusterInfoResponse response = stateStore.getSubCluster(request);
Assert.assertNull(response);
// Validate if sub-cluster is registered
rm.start();
String capability = checkSubClusterInfo(SubClusterState.SC_NEW);
Assert.assertTrue(capability.isEmpty());
// Heartbeat to see if sub-cluster transitions to running
FederationStateStoreHeartbeat storeHeartbeat =
rm.getFederationStateStoreService().getStateStoreHeartbeatThread();
storeHeartbeat.run();
capability = checkSubClusterInfo(SubClusterState.SC_RUNNING);
checkClusterMetricsInfo(capability, 0);
Assert.assertTrue(logCapture.getOutput().contains(
"Started federation membership heartbeat with interval: 300 and initial delay: 10"));
rm.stop();
}
} }