From 809e3f2453aa26339180f2310625e968c2c3cfc8 Mon Sep 17 00:00:00 2001 From: Sunil G Date: Mon, 25 Feb 2019 11:30:46 +0530 Subject: [PATCH] YARN-9121. Replace GpuDiscoverer.getInstance() to a readable object for easy access control. Contributed by Szilard Nemeth. (cherry picked from commit 5e91ebd91a405e1585ef02b8fbf03f10d1398adf) --- .../resources/gpu/GpuResourceHandlerImpl.java | 9 +++++--- .../resourceplugin/ResourcePluginManager.java | 7 +++++- .../resourceplugin/gpu/GpuDiscoverer.java | 9 -------- .../gpu/GpuNodeResourceUpdateHandler.java | 10 ++++++--- .../resourceplugin/gpu/GpuResourcePlugin.java | 22 +++++++++++-------- .../resources/gpu/TestGpuResourceHandler.java | 20 +++++++++-------- 6 files changed, 43 insertions(+), 34 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/GpuResourceHandlerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/GpuResourceHandlerImpl.java index d3be720c492..2c9baf23051 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/GpuResourceHandlerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/GpuResourceHandlerImpl.java @@ -52,14 +52,17 @@ public class GpuResourceHandlerImpl implements ResourceHandler { private final GpuResourceAllocator gpuAllocator; private final CGroupsHandler cGroupsHandler; private final PrivilegedOperationExecutor privilegedOperationExecutor; + private final GpuDiscoverer gpuDiscoverer; public GpuResourceHandlerImpl(Context nmContext, CGroupsHandler cGroupsHandler, - PrivilegedOperationExecutor privilegedOperationExecutor) { + PrivilegedOperationExecutor privilegedOperationExecutor, + GpuDiscoverer gpuDiscoverer) { this.nmContext = nmContext; this.cGroupsHandler = cGroupsHandler; this.privilegedOperationExecutor = privilegedOperationExecutor; - gpuAllocator = new GpuResourceAllocator(nmContext); + this.gpuAllocator = new GpuResourceAllocator(nmContext); + this.gpuDiscoverer = gpuDiscoverer; } @Override @@ -67,7 +70,7 @@ public class GpuResourceHandlerImpl implements ResourceHandler { throws ResourceHandlerException { List usableGpus; try { - usableGpus = GpuDiscoverer.getInstance().getGpusUsableByYarn(); + usableGpus = gpuDiscoverer.getGpusUsableByYarn(); if (usableGpus == null || usableGpus.isEmpty()) { String message = "GPU is enabled on the NodeManager, but couldn't find " + "any usable GPU devices, please double check configuration!"; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/ResourcePluginManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/ResourcePluginManager.java index 14218c16d20..c1f291058e4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/ResourcePluginManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/ResourcePluginManager.java @@ -25,6 +25,8 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.server.nodemanager.Context; import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.fpga.FpgaResourcePlugin; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu.GpuDiscoverer; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu.GpuNodeResourceUpdateHandler; import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu.GpuResourcePlugin; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -84,7 +86,10 @@ public class ResourcePluginManager { ResourcePlugin plugin = null; if (resourceName.equals(GPU_URI)) { - plugin = new GpuResourcePlugin(); + final GpuDiscoverer gpuDiscoverer = new GpuDiscoverer(); + final GpuNodeResourceUpdateHandler updateHandler = + new GpuNodeResourceUpdateHandler(gpuDiscoverer); + plugin = new GpuResourcePlugin(updateHandler, gpuDiscoverer); } else if (resourceName.equals(FPGA_URI)) { plugin = new FpgaResourcePlugin(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java index 92792b7ba64..334a86c2c82 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java @@ -58,11 +58,6 @@ public class GpuDiscoverer { // command should not run more than 10 sec. private static final int MAX_EXEC_TIMEOUT_MS = 10 * 1000; private static final int MAX_REPEATED_ERROR_ALLOWED = 10; - private static GpuDiscoverer instance; - - static { - instance = new GpuDiscoverer(); - } private Configuration conf = null; private String pathOfGpuBinary = null; @@ -293,8 +288,4 @@ public class GpuDiscoverer { String getPathOfGpuBinary() { return pathOfGpuBinary; } - - public static GpuDiscoverer getInstance() { - return instance; - } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuNodeResourceUpdateHandler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuNodeResourceUpdateHandler.java index 8b19048c8be..4b2258d557f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuNodeResourceUpdateHandler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuNodeResourceUpdateHandler.java @@ -35,16 +35,20 @@ import static org.apache.hadoop.yarn.api.records.ResourceInformation.GPU_URI; public class GpuNodeResourceUpdateHandler extends NodeResourceUpdaterPlugin { private static final Logger LOG = LoggerFactory.getLogger(GpuNodeResourceUpdateHandler.class); + private final GpuDiscoverer gpuDiscoverer; + + public GpuNodeResourceUpdateHandler(GpuDiscoverer gpuDiscoverer) { + this.gpuDiscoverer = gpuDiscoverer; + } @Override public void updateConfiguredResource(Resource res) throws YarnException { LOG.info("Initializing configured GPU resources for the NodeManager."); - List usableGpus = GpuDiscoverer.getInstance() - .getGpusUsableByYarn(); + List usableGpus = gpuDiscoverer.getGpusUsableByYarn(); if (usableGpus == null || usableGpus.isEmpty()) { String message = "GPU is enabled, " + - "but couldn't find any usable GPUs on the NodeManager!"; + "but could not find any usable GPUs on the NodeManager!"; LOG.error(message); // No gpu can be used by YARN. throw new YarnException(message); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java index e49d2f24bd9..393d76e1f5b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java @@ -18,7 +18,6 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu; -import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.server.nodemanager.Context; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor; @@ -34,18 +33,23 @@ import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.GpuDeviceInforma import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.NMGpuResourceInfo; import java.util.List; -import java.util.Map; public class GpuResourcePlugin implements ResourcePlugin { + private final GpuNodeResourceUpdateHandler resourceDiscoverHandler; + private final GpuDiscoverer gpuDiscoverer; private GpuResourceHandlerImpl gpuResourceHandler = null; - private GpuNodeResourceUpdateHandler resourceDiscoverHandler = null; private DockerCommandPlugin dockerCommandPlugin = null; + public GpuResourcePlugin(GpuNodeResourceUpdateHandler resourceDiscoverHandler, + GpuDiscoverer gpuDiscoverer) { + this.resourceDiscoverHandler = resourceDiscoverHandler; + this.gpuDiscoverer = gpuDiscoverer; + } + @Override public synchronized void initialize(Context context) throws YarnException { - resourceDiscoverHandler = new GpuNodeResourceUpdateHandler(); - GpuDiscoverer.getInstance().initialize(context.getConf()); - dockerCommandPlugin = + this.gpuDiscoverer.initialize(context.getConf()); + this.dockerCommandPlugin = GpuDockerCommandPluginFactory.createGpuDockerCommandPlugin( context.getConf()); } @@ -56,7 +60,7 @@ public class GpuResourcePlugin implements ResourcePlugin { PrivilegedOperationExecutor privilegedOperationExecutor) { if (gpuResourceHandler == null) { gpuResourceHandler = new GpuResourceHandlerImpl(context, cGroupsHandler, - privilegedOperationExecutor); + privilegedOperationExecutor, gpuDiscoverer); } return gpuResourceHandler; @@ -77,9 +81,9 @@ public class GpuResourcePlugin implements ResourcePlugin { } @Override - public NMResourceInfo getNMResourceInfo() throws YarnException { + public synchronized NMResourceInfo getNMResourceInfo() throws YarnException { GpuDeviceInformation gpuDeviceInformation = - GpuDiscoverer.getInstance().getGpuDeviceInformation(); + gpuDiscoverer.getGpuDeviceInformation(); GpuResourceAllocator gpuResourceAllocator = gpuResourceHandler.getGpuAllocator(); List totalGpus = gpuResourceAllocator.getAllowedGpusCopy(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/TestGpuResourceHandler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/TestGpuResourceHandler.java index 10e5cd1c850..1c41ef2c118 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/TestGpuResourceHandler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/TestGpuResourceHandler.java @@ -71,6 +71,7 @@ public class TestGpuResourceHandler { private GpuResourceHandlerImpl gpuResourceHandler; private NMStateStoreService mockNMStateStore; private ConcurrentHashMap runningContainersMap; + private GpuDiscoverer gpuDiscoverer; @Before public void setup() { @@ -88,8 +89,9 @@ public class TestGpuResourceHandler { runningContainersMap = new ConcurrentHashMap<>(); when(nmctx.getContainers()).thenReturn(runningContainersMap); + gpuDiscoverer = new GpuDiscoverer(); gpuResourceHandler = new GpuResourceHandlerImpl(nmctx, mockCGroupsHandler, - mockPrivilegedExecutor); + mockPrivilegedExecutor, gpuDiscoverer); } @Test @@ -97,7 +99,7 @@ public class TestGpuResourceHandler { Configuration conf = new YarnConfiguration(); conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:0"); - GpuDiscoverer.getInstance().initialize(conf); + gpuDiscoverer.initialize(conf); gpuResourceHandler.bootstrap(conf); verify(mockCGroupsHandler, times(1)).initializeCGroupController( @@ -161,7 +163,7 @@ public class TestGpuResourceHandler { throws Exception { Configuration conf = new YarnConfiguration(); conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:0,1:1,2:3,3:4"); - GpuDiscoverer.getInstance().initialize(conf); + gpuDiscoverer.initialize(conf); gpuResourceHandler.bootstrap(conf); Assert.assertEquals(4, @@ -250,7 +252,7 @@ public class TestGpuResourceHandler { throws Exception { Configuration conf = new YarnConfiguration(); conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:0,1:1,2:3,3:4"); - GpuDiscoverer.getInstance().initialize(conf); + gpuDiscoverer.initialize(conf); gpuResourceHandler.bootstrap(conf); Assert.assertEquals(4, @@ -279,7 +281,7 @@ public class TestGpuResourceHandler { public void testAllocationWithoutAllowedGpus() throws Exception { Configuration conf = new YarnConfiguration(); conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, " "); - GpuDiscoverer.getInstance().initialize(conf); + gpuDiscoverer.initialize(conf); try { gpuResourceHandler.bootstrap(conf); @@ -314,7 +316,7 @@ public class TestGpuResourceHandler { public void testAllocationStored() throws Exception { Configuration conf = new YarnConfiguration(); conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:0,1:1,2:3,3:4"); - GpuDiscoverer.getInstance().initialize(conf); + gpuDiscoverer.initialize(conf); gpuResourceHandler.bootstrap(conf); Assert.assertEquals(4, @@ -360,9 +362,9 @@ public class TestGpuResourceHandler { GpuResourceHandlerImpl gpuNULLStateResourceHandler = new GpuResourceHandlerImpl(nmnctx, mockCGroupsHandler, - mockPrivilegedExecutor); + mockPrivilegedExecutor, gpuDiscoverer); - GpuDiscoverer.getInstance().initialize(conf); + gpuDiscoverer.initialize(conf); gpuNULLStateResourceHandler.bootstrap(conf); Assert.assertEquals(4, @@ -382,7 +384,7 @@ public class TestGpuResourceHandler { public void testRecoverResourceAllocation() throws Exception { Configuration conf = new YarnConfiguration(); conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:0,1:1,2:3,3:4"); - GpuDiscoverer.getInstance().initialize(conf); + gpuDiscoverer.initialize(conf); gpuResourceHandler.bootstrap(conf); Assert.assertEquals(4,