From 99cc439e29794f8e61bebe03b2a7ca4b6743ec92 Mon Sep 17 00:00:00 2001 From: Jian He Date: Fri, 3 Jun 2016 11:10:42 -0700 Subject: [PATCH] YARN-5190. Registering/unregistering container metrics in ContainerMonitorImpl and ContainerImpl causing uncaught exception in ContainerMonitorImpl. Contributed by Junping Du --- .../metrics2/impl/MetricsSystemImpl.java | 1 + .../metrics2/lib/DefaultMetricsSystem.java | 9 +++++++++ .../monitor/ContainerMetrics.java | 18 +++++++++++++----- .../monitor/ContainersMonitorImpl.java | 16 ++++++++++++---- .../monitor/TestContainerMetrics.java | 4 +++- 5 files changed, 38 insertions(+), 10 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSystemImpl.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSystemImpl.java index ef7306b747e..6986edb42c4 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSystemImpl.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSystemImpl.java @@ -255,6 +255,7 @@ public class MetricsSystemImpl extends MetricsSystem implements MetricsSource { if (namedCallbacks.containsKey(name)) { namedCallbacks.remove(name); } + DefaultMetricsSystem.removeSourceName(name); } synchronized diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/DefaultMetricsSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/DefaultMetricsSystem.java index c761b583937..935f47f3a09 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/DefaultMetricsSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/DefaultMetricsSystem.java @@ -115,6 +115,11 @@ public enum DefaultMetricsSystem { INSTANCE.removeObjectName(name.toString()); } + @InterfaceAudience.Private + public static void removeSourceName(String name) { + INSTANCE.removeSource(name); + } + @InterfaceAudience.Private public static String sourceName(String name, boolean dupOK) { return INSTANCE.newSourceName(name, dupOK); @@ -135,6 +140,10 @@ public enum DefaultMetricsSystem { mBeanNames.map.remove(name); } + synchronized void removeSource(String name) { + sourceNames.map.remove(name); + } + synchronized String newSourceName(String name, boolean dupOK) { if (sourceNames.map.containsKey(name)) { if (dupOK) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerMetrics.java index d59abdaa2b1..31a9aa75b86 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerMetrics.java @@ -198,6 +198,12 @@ public class ContainerMetrics implements MetricsSource { DefaultMetricsSystem.instance(), containerId, flushPeriodMs, delayMs); } + public synchronized static ContainerMetrics getContainerMetrics( + ContainerId containerId) { + // could be null + return usageMetrics.get(containerId); + } + synchronized static ContainerMetrics forContainer( MetricsSystem ms, ContainerId containerId, long flushPeriodMs, long delayMs) { @@ -237,12 +243,14 @@ public class ContainerMetrics implements MetricsSource { } public synchronized void finished() { - this.finished = true; - if (timer != null) { - timer.cancel(); - timer = null; + if (!finished) { + this.finished = true; + if (timer != null) { + timer.cancel(); + timer = null; + } + scheduleTimerTaskForUnregistration(); } - scheduleTimerTaskForUnregistration(); } public void recordMemoryUsage(int memoryMBs) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java index 3fbae102550..cfe6f8016d9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java @@ -619,15 +619,16 @@ public class ContainersMonitorImpl extends AbstractService implements } ContainerId containerId = monitoringEvent.getContainerId(); - ContainerMetrics usageMetrics = ContainerMetrics - .forContainer(containerId, containerMetricsPeriodMs, - containerMetricsUnregisterDelayMs); + ContainerMetrics usageMetrics; int vmemLimitMBs; int pmemLimitMBs; int cpuVcores; switch (monitoringEvent.getType()) { case START_MONITORING_CONTAINER: + usageMetrics = ContainerMetrics + .forContainer(containerId, containerMetricsPeriodMs, + containerMetricsUnregisterDelayMs); ContainerStartMonitoringEvent startEvent = (ContainerStartMonitoringEvent) monitoringEvent; usageMetrics.recordStateChangeDurations( @@ -640,9 +641,16 @@ public class ContainersMonitorImpl extends AbstractService implements vmemLimitMBs, pmemLimitMBs, cpuVcores); break; case STOP_MONITORING_CONTAINER: - usageMetrics.finished(); + usageMetrics = ContainerMetrics.getContainerMetrics( + containerId); + if (usageMetrics != null) { + usageMetrics.finished(); + } break; case CHANGE_MONITORING_CONTAINER_RESOURCE: + usageMetrics = ContainerMetrics + .forContainer(containerId, containerMetricsPeriodMs, + containerMetricsUnregisterDelayMs); ChangeMonitoringContainerResourceEvent changeEvent = (ChangeMonitoringContainerResourceEvent) monitoringEvent; Resource resource = changeEvent.getResource(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainerMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainerMetrics.java index 69664896033..1840d62f125 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainerMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainerMetrics.java @@ -142,7 +142,6 @@ public class TestContainerMetrics { system.sampleMetrics(); system.sampleMetrics(); Thread.sleep(100); - system.stop(); // verify metrics1 is unregistered assertTrue(metrics1 != ContainerMetrics.forContainer( system, containerId1, 1, 0)); @@ -152,6 +151,9 @@ public class TestContainerMetrics { // verify metrics3 is still registered assertTrue(metrics3 == ContainerMetrics.forContainer( system, containerId3, 1, 0)); + // YARN-5190: move stop() to the end to verify registering containerId1 and + // containerId2 won't get MetricsException thrown. + system.stop(); system.shutdown(); }