From 3c2bd19fa5e4eebd11f12c0981f222e60109e71a Mon Sep 17 00:00:00 2001 From: Jian He Date: Fri, 3 Jun 2016 11:10:42 -0700 Subject: [PATCH] YARN-5190. Registering/unregistering container metrics in ContainerMonitorImpl and ContainerImpl causing uncaught exception in ContainerMonitorImpl. Contributed by Junping Du (cherry picked from commit 99cc439e29794f8e61bebe03b2a7ca4b6743ec92) --- .../metrics2/impl/MetricsSystemImpl.java | 1 + .../metrics2/lib/DefaultMetricsSystem.java | 9 +++++++++ .../monitor/ContainerMetrics.java | 18 +++++++++++++----- .../monitor/ContainersMonitorImpl.java | 16 ++++++++++++---- .../monitor/TestContainerMetrics.java | 4 +++- 5 files changed, 38 insertions(+), 10 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSystemImpl.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSystemImpl.java index 866c846233d..3ce2a329d6a 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSystemImpl.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSystemImpl.java @@ -255,6 +255,7 @@ public class MetricsSystemImpl extends MetricsSystem implements MetricsSource { if (namedCallbacks.containsKey(name)) { namedCallbacks.remove(name); } + DefaultMetricsSystem.removeSourceName(name); } synchronized diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/DefaultMetricsSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/DefaultMetricsSystem.java index d3a5508393e..3c4aa78e4d1 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/DefaultMetricsSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/DefaultMetricsSystem.java @@ -107,6 +107,11 @@ public enum DefaultMetricsSystem { INSTANCE.removeObjectName(name.toString()); } + @InterfaceAudience.Private + public static void removeSourceName(String name) { + INSTANCE.removeSource(name); + } + @InterfaceAudience.Private public static String sourceName(String name, boolean dupOK) { return INSTANCE.newSourceName(name, dupOK); @@ -127,6 +132,10 @@ public enum DefaultMetricsSystem { mBeanNames.map.remove(name); } + synchronized void removeSource(String name) { + sourceNames.map.remove(name); + } + synchronized String newSourceName(String name, boolean dupOK) { if (sourceNames.map.containsKey(name)) { if (dupOK) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerMetrics.java index 48128c16878..706d4f9ec1b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerMetrics.java @@ -160,6 +160,12 @@ public class ContainerMetrics implements MetricsSource { DefaultMetricsSystem.instance(), containerId, flushPeriodMs, delayMs); } + public synchronized static ContainerMetrics getContainerMetrics( + ContainerId containerId) { + // could be null + return usageMetrics.get(containerId); + } + synchronized static ContainerMetrics forContainer( MetricsSystem ms, ContainerId containerId, long flushPeriodMs, long delayMs) { @@ -205,12 +211,14 @@ public class ContainerMetrics implements MetricsSource { } public synchronized void finished() { - this.finished = true; - if (timer != null) { - timer.cancel(); - timer = null; + if (!finished) { + this.finished = true; + if (timer != null) { + timer.cancel(); + timer = null; + } + scheduleTimerTaskForUnregistration(); } - scheduleTimerTaskForUnregistration(); } public void recordMemoryUsage(int memoryMBs) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java index 446e7a1270e..8c907c77117 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java @@ -615,15 +615,16 @@ public class ContainersMonitorImpl extends AbstractService implements } ContainerId containerId = monitoringEvent.getContainerId(); - ContainerMetrics usageMetrics = ContainerMetrics - .forContainer(containerId, containerMetricsPeriodMs, - containerMetricsUnregisterDelayMs); + ContainerMetrics usageMetrics; int vmemLimitMBs; int pmemLimitMBs; int cpuVcores; switch (monitoringEvent.getType()) { case START_MONITORING_CONTAINER: + usageMetrics = ContainerMetrics + .forContainer(containerId, containerMetricsPeriodMs, + containerMetricsUnregisterDelayMs); ContainerStartMonitoringEvent startEvent = (ContainerStartMonitoringEvent) monitoringEvent; usageMetrics.recordStateChangeDurations( @@ -636,9 +637,16 @@ public class ContainersMonitorImpl extends AbstractService implements vmemLimitMBs, pmemLimitMBs, cpuVcores); break; case STOP_MONITORING_CONTAINER: - usageMetrics.finished(); + usageMetrics = ContainerMetrics.getContainerMetrics( + containerId); + if (usageMetrics != null) { + usageMetrics.finished(); + } break; case CHANGE_MONITORING_CONTAINER_RESOURCE: + usageMetrics = ContainerMetrics + .forContainer(containerId, containerMetricsPeriodMs, + containerMetricsUnregisterDelayMs); ChangeMonitoringContainerResourceEvent changeEvent = (ChangeMonitoringContainerResourceEvent) monitoringEvent; Resource resource = changeEvent.getResource(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainerMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainerMetrics.java index 2beb927e896..040647e5742 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainerMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainerMetrics.java @@ -146,7 +146,6 @@ public class TestContainerMetrics { system.sampleMetrics(); system.sampleMetrics(); Thread.sleep(100); - system.stop(); // verify metrics1 is unregistered assertTrue(metrics1 != ContainerMetrics.forContainer( system, containerId1, 1, 0)); @@ -156,6 +155,9 @@ public class TestContainerMetrics { // verify metrics3 is still registered assertTrue(metrics3 == ContainerMetrics.forContainer( system, containerId3, 1, 0)); + // YARN-5190: move stop() to the end to verify registering containerId1 and + // containerId2 won't get MetricsException thrown. + system.stop(); system.shutdown(); } }