YARN-8137. Parallelize node addition in SLS. Contributed by Abhishek Modi.
This commit is contained in:
parent
860cc28a5a
commit
fd24fd0ff7
|
@ -33,6 +33,9 @@ import java.util.Random;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
import java.util.concurrent.ExecutorService;
|
||||||
|
import java.util.concurrent.Executors;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
import com.fasterxml.jackson.core.JsonFactory;
|
import com.fasterxml.jackson.core.JsonFactory;
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
@ -85,6 +88,7 @@ import org.apache.hadoop.yarn.sls.utils.SLSUtils;
|
||||||
import org.apache.hadoop.yarn.util.UTCClock;
|
import org.apache.hadoop.yarn.util.UTCClock;
|
||||||
import org.apache.hadoop.yarn.util.resource.ResourceUtils;
|
import org.apache.hadoop.yarn.util.resource.ResourceUtils;
|
||||||
import org.apache.hadoop.yarn.util.resource.Resources;
|
import org.apache.hadoop.yarn.util.resource.Resources;
|
||||||
|
import org.eclipse.jetty.util.ConcurrentHashSet;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
@ -96,9 +100,10 @@ public class SLSRunner extends Configured implements Tool {
|
||||||
private static TaskRunner runner = new TaskRunner();
|
private static TaskRunner runner = new TaskRunner();
|
||||||
private String[] inputTraces;
|
private String[] inputTraces;
|
||||||
private Map<String, Integer> queueAppNumMap;
|
private Map<String, Integer> queueAppNumMap;
|
||||||
|
private int poolSize;
|
||||||
|
|
||||||
// NM simulator
|
// NM simulator
|
||||||
private HashMap<NodeId, NMSimulator> nmMap;
|
private Map<NodeId, NMSimulator> nmMap;
|
||||||
private Resource nodeManagerResource;
|
private Resource nodeManagerResource;
|
||||||
private String nodeFile;
|
private String nodeFile;
|
||||||
|
|
||||||
|
@ -158,7 +163,7 @@ public class SLSRunner extends Configured implements Tool {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void init(Configuration tempConf) throws ClassNotFoundException {
|
private void init(Configuration tempConf) throws ClassNotFoundException {
|
||||||
nmMap = new HashMap<>();
|
nmMap = new ConcurrentHashMap<>();
|
||||||
queueAppNumMap = new HashMap<>();
|
queueAppNumMap = new HashMap<>();
|
||||||
amMap = new ConcurrentHashMap<>();
|
amMap = new ConcurrentHashMap<>();
|
||||||
amClassMap = new HashMap<>();
|
amClassMap = new HashMap<>();
|
||||||
|
@ -167,7 +172,7 @@ public class SLSRunner extends Configured implements Tool {
|
||||||
setConf(tempConf);
|
setConf(tempConf);
|
||||||
|
|
||||||
// runner
|
// runner
|
||||||
int poolSize = tempConf.getInt(SLSConfiguration.RUNNER_POOL_SIZE,
|
poolSize = tempConf.getInt(SLSConfiguration.RUNNER_POOL_SIZE,
|
||||||
SLSConfiguration.RUNNER_POOL_SIZE_DEFAULT);
|
SLSConfiguration.RUNNER_POOL_SIZE_DEFAULT);
|
||||||
SLSRunner.runner.setQueueSize(poolSize);
|
SLSRunner.runner.setQueueSize(poolSize);
|
||||||
// <AMType, Class> map
|
// <AMType, Class> map
|
||||||
|
@ -283,7 +288,8 @@ public class SLSRunner extends Configured implements Tool {
|
||||||
rm.start();
|
rm.start();
|
||||||
}
|
}
|
||||||
|
|
||||||
private void startNM() throws YarnException, IOException {
|
private void startNM() throws YarnException, IOException,
|
||||||
|
InterruptedException {
|
||||||
// nm configuration
|
// nm configuration
|
||||||
int heartbeatInterval = getConf().getInt(
|
int heartbeatInterval = getConf().getInt(
|
||||||
SLSConfiguration.NM_HEARTBEAT_INTERVAL_MS,
|
SLSConfiguration.NM_HEARTBEAT_INTERVAL_MS,
|
||||||
|
@ -333,8 +339,15 @@ public class SLSRunner extends Configured implements Tool {
|
||||||
|
|
||||||
// create NM simulators
|
// create NM simulators
|
||||||
Random random = new Random();
|
Random random = new Random();
|
||||||
Set<String> rackSet = new HashSet<String>();
|
Set<String> rackSet = new ConcurrentHashSet<>();
|
||||||
|
int threadPoolSize = Math.max(poolSize,
|
||||||
|
SLSConfiguration.RUNNER_POOL_SIZE_DEFAULT);
|
||||||
|
ExecutorService executorService = Executors.
|
||||||
|
newFixedThreadPool(threadPoolSize);
|
||||||
for (Map.Entry<String, Resource> entry : nodeResourceMap.entrySet()) {
|
for (Map.Entry<String, Resource> entry : nodeResourceMap.entrySet()) {
|
||||||
|
executorService.submit(new Runnable() {
|
||||||
|
@Override public void run() {
|
||||||
|
try {
|
||||||
// we randomize the heartbeat start time from zero to 1 interval
|
// we randomize the heartbeat start time from zero to 1 interval
|
||||||
NMSimulator nm = new NMSimulator();
|
NMSimulator nm = new NMSimulator();
|
||||||
Resource nmResource = nodeManagerResource;
|
Resource nmResource = nodeManagerResource;
|
||||||
|
@ -342,12 +355,20 @@ public class SLSRunner extends Configured implements Tool {
|
||||||
if (entry.getValue() != null) {
|
if (entry.getValue() != null) {
|
||||||
nmResource = entry.getValue();
|
nmResource = entry.getValue();
|
||||||
}
|
}
|
||||||
nm.init(hostName, nmResource, random.nextInt(heartbeatInterval),
|
nm.init(hostName, nmResource,
|
||||||
|
random.nextInt(heartbeatInterval),
|
||||||
heartbeatInterval, rm, resourceUtilizationRatio);
|
heartbeatInterval, rm, resourceUtilizationRatio);
|
||||||
nmMap.put(nm.getNode().getNodeID(), nm);
|
nmMap.put(nm.getNode().getNodeID(), nm);
|
||||||
runner.schedule(nm);
|
runner.schedule(nm);
|
||||||
rackSet.add(nm.getNode().getRackName());
|
rackSet.add(nm.getNode().getRackName());
|
||||||
|
} catch (IOException | YarnException e) {
|
||||||
|
LOG.error("Got an error while adding node", e);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
executorService.shutdown();
|
||||||
|
executorService.awaitTermination(10, TimeUnit.MINUTES);
|
||||||
numRacks = rackSet.size();
|
numRacks = rackSet.size();
|
||||||
numNMs = nmMap.size();
|
numNMs = nmMap.size();
|
||||||
}
|
}
|
||||||
|
@ -839,7 +860,7 @@ public class SLSRunner extends Configured implements Tool {
|
||||||
(long)(Math.ceil(maxRuntime / 1000.0)));
|
(long)(Math.ceil(maxRuntime / 1000.0)));
|
||||||
}
|
}
|
||||||
|
|
||||||
public HashMap<NodeId, NMSimulator> getNmMap() {
|
public Map<NodeId, NMSimulator> getNmMap() {
|
||||||
return nmMap;
|
return nmMap;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue