From a732ab38047299c545cfc978e25f6b2871b5a4b3 Mon Sep 17 00:00:00 2001 From: Amar Kamat Date: Wed, 15 Jun 2011 11:37:13 +0000 Subject: [PATCH] MAPREDUCE-2107. [Gridmix] Total heap usage emulation in Gridmix. (Amar Kamat and Ravi Gummadi via amarrk) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1136000 13f79535-47bb-0310-9956-ffa450edef68 --- mapreduce/CHANGES.txt | 3 + .../hadoop/mapred/gridmix/GridmixJob.java | 89 ++++ .../CumulativeCpuUsageEmulatorPlugin.java | 8 +- .../TotalHeapUsageEmulatorPlugin.java | 258 ++++++++++ .../gridmix/TestGridmixMemoryEmulation.java | 453 ++++++++++++++++++ .../hadoop/mapred/gridmix/TestHighRamJob.java | 2 +- .../gridmix/TestResourceUsageEmulators.java | 8 +- .../documentation/content/xdocs/gridmix.xml | 29 +- 8 files changed, 839 insertions(+), 11 deletions(-) create mode 100644 mapreduce/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/TotalHeapUsageEmulatorPlugin.java create mode 100644 mapreduce/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/TestGridmixMemoryEmulation.java diff --git a/mapreduce/CHANGES.txt b/mapreduce/CHANGES.txt index b7283aa1089..1cbd829cc79 100644 --- a/mapreduce/CHANGES.txt +++ b/mapreduce/CHANGES.txt @@ -11,6 +11,9 @@ Trunk (unreleased changes) NEW FEATURES + MAPREDUCE-2107. [Gridmix] Total heap usage emulation in Gridmix. + (Amar Kamat and Ravi Gummadi via amarrk) + MAPREDUCE-2106. [Gridmix] Cumulative CPU usage emulation in Gridmix. (amarrk) diff --git a/mapreduce/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/GridmixJob.java b/mapreduce/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/GridmixJob.java index 4d9866db30f..9b6ed69f575 100644 --- a/mapreduce/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/GridmixJob.java +++ b/mapreduce/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/GridmixJob.java @@ -19,12 +19,15 @@ package org.apache.hadoop.mapred.gridmix; import java.io.DataOutputStream; import java.io.IOException; +import java.util.ArrayList; import java.util.Formatter; import java.util.List; import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.Delayed; import java.util.concurrent.TimeUnit; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import java.security.PrivilegedExceptionAction; import org.apache.hadoop.conf.Configuration; @@ -86,6 +89,11 @@ abstract class GridmixJob implements Callable, Delayed { // configuration key to enable/disable High-Ram feature emulation static final String GRIDMIX_HIGHRAM_EMULATION_ENABLE = "gridmix.highram-emulation.enable"; + // configuration key to enable/disable task jvm options + static final String GRIDMIX_TASK_JVM_OPTIONS_ENABLE = + "gridmix.task.jvm-options.enable"; + private static final Pattern maxHeapPattern = + Pattern.compile("-Xmx[0-9]+[kKmMgGtT]?+"); private static void setJobQueue(Job job, String queue) { if (queue != null) { @@ -137,6 +145,19 @@ abstract class GridmixJob implements Callable, Delayed { ret.getConfiguration()); } + // configure task jvm options if enabled + // this knob can be turned off if there is a mismatch between the + // target (simulation) cluster and the original cluster. Such a + // mismatch can result in job failures (due to memory issues) on the + // target (simulated) cluster. + // + // TODO If configured, scale the original task's JVM (heap related) + // options to suit the target (simulation) cluster + if (conf.getBoolean(GRIDMIX_TASK_JVM_OPTIONS_ENABLE, true)) { + configureTaskJVMOptions(jobdesc.getJobConf(), + ret.getConfiguration()); + } + return ret; } }); @@ -148,6 +169,74 @@ abstract class GridmixJob implements Callable, Delayed { submissionMillis, TimeUnit.MILLISECONDS); outdir = new Path(outRoot, "" + seq); } + + @SuppressWarnings("deprecation") + protected static void configureTaskJVMOptions(Configuration originalJobConf, + Configuration simulatedJobConf){ + // Get the heap related java opts used for the original job and set the + // same for the simulated job. + // set task task heap options + configureTaskJVMMaxHeapOptions(originalJobConf, simulatedJobConf, + JobConf.MAPRED_TASK_JAVA_OPTS); + // set map task heap options + configureTaskJVMMaxHeapOptions(originalJobConf, simulatedJobConf, + MRJobConfig.MAP_JAVA_OPTS); + + // set reduce task heap options + configureTaskJVMMaxHeapOptions(originalJobConf, simulatedJobConf, + MRJobConfig.REDUCE_JAVA_OPTS); + } + + // Configures the task's max heap options using the specified key + private static void configureTaskJVMMaxHeapOptions(Configuration srcConf, + Configuration destConf, + String key) { + String srcHeapOpts = srcConf.get(key); + if (srcHeapOpts != null) { + List srcMaxOptsList = new ArrayList(); + // extract the max heap options and ignore the rest + extractMaxHeapOpts(srcHeapOpts, srcMaxOptsList, + new ArrayList()); + if (srcMaxOptsList.size() > 0) { + List destOtherOptsList = new ArrayList(); + // extract the other heap options and ignore the max options in the + // destination configuration + String destHeapOpts = destConf.get(key); + if (destHeapOpts != null) { + extractMaxHeapOpts(destHeapOpts, new ArrayList(), + destOtherOptsList); + } + + // the source configuration might have some task level max heap opts set + // remove these opts from the destination configuration and replace + // with the options set in the original configuration + StringBuilder newHeapOpts = new StringBuilder(); + + for (String otherOpt : destOtherOptsList) { + newHeapOpts.append(otherOpt).append(" "); + } + + for (String opts : srcMaxOptsList) { + newHeapOpts.append(opts).append(" "); + } + + // set the final heap opts + destConf.set(key, newHeapOpts.toString().trim()); + } + } + } + + private static void extractMaxHeapOpts(String javaOptions, + List maxOpts, List others) { + for (String opt : javaOptions.split(" ")) { + Matcher matcher = maxHeapPattern.matcher(opt); + if (matcher.find()) { + maxOpts.add(opt); + } else { + others.add(opt); + } + } + } // Scales the desired job-level configuration parameter. This API makes sure // that the ratio of the job level configuration parameter to the cluster diff --git a/mapreduce/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/CumulativeCpuUsageEmulatorPlugin.java b/mapreduce/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/CumulativeCpuUsageEmulatorPlugin.java index 1d362d0bc15..8f4af1add0c 100644 --- a/mapreduce/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/CumulativeCpuUsageEmulatorPlugin.java +++ b/mapreduce/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/CumulativeCpuUsageEmulatorPlugin.java @@ -34,7 +34,7 @@ import org.apache.hadoop.tools.rumen.ResourceUsageMetrics; * *

{@link CumulativeCpuUsageEmulatorPlugin} emulates the CPU usage in steps. * The frequency of emulation can be configured via - * {@link #CPU_EMULATION_FREQUENCY}. + * {@link #CPU_EMULATION_PROGRESS_INTERVAL}. * CPU usage values are matched via emulation only on the interval boundaries. *

* @@ -70,8 +70,8 @@ implements ResourceUsageEmulatorPlugin { private long lastSeenCpuUsageCpuUsage = 0; // Configuration parameters - public static final String CPU_EMULATION_FREQUENCY = - "gridmix.emulators.resource-usage.cpu.frequency"; + public static final String CPU_EMULATION_PROGRESS_INTERVAL = + "gridmix.emulators.resource-usage.cpu.emulation-interval"; private static final float DEFAULT_EMULATION_FREQUENCY = 0.1F; // 10 times /** @@ -302,7 +302,7 @@ implements ResourceUsageEmulatorPlugin { this.monitor = monitor; this.progress = progress; - emulationInterval = conf.getFloat(CPU_EMULATION_FREQUENCY, + emulationInterval = conf.getFloat(CPU_EMULATION_PROGRESS_INTERVAL, DEFAULT_EMULATION_FREQUENCY); // calibrate the core cpu-usage utility diff --git a/mapreduce/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/TotalHeapUsageEmulatorPlugin.java b/mapreduce/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/TotalHeapUsageEmulatorPlugin.java new file mode 100644 index 00000000000..a50358a41aa --- /dev/null +++ b/mapreduce/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/TotalHeapUsageEmulatorPlugin.java @@ -0,0 +1,258 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.mapred.gridmix.emulators.resourceusage; + +import java.io.IOException; +import java.util.ArrayList; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapred.gridmix.Progressive; +import org.apache.hadoop.mapreduce.util.ResourceCalculatorPlugin; +import org.apache.hadoop.tools.rumen.ResourceUsageMetrics; + +/** + *

A {@link ResourceUsageEmulatorPlugin} that emulates the total heap + * usage by loading the JVM heap memory. Adding smaller chunks of data to the + * heap will essentially use up some heap space thus forcing the JVM to expand + * its heap and thus resulting into increase in the heap usage.

+ * + *

{@link TotalHeapUsageEmulatorPlugin} emulates the heap usage in steps. + * The frequency of emulation can be configured via + * {@link #HEAP_EMULATION_PROGRESS_INTERVAL}. + * Heap usage values are matched via emulation only at specific interval + * boundaries. + *

+ * + * {@link TotalHeapUsageEmulatorPlugin} is a wrapper program for managing + * the heap usage emulation feature. It internally uses an emulation algorithm + * (called as core and described using {@link HeapUsageEmulatorCore}) for + * performing the actual emulation. Multiple calls to this core engine should + * use up some amount of heap. + */ +public class TotalHeapUsageEmulatorPlugin +implements ResourceUsageEmulatorPlugin { + // Configuration parameters + // the core engine to emulate heap usage + protected HeapUsageEmulatorCore emulatorCore; + // the progress bar + private Progressive progress; + // decides if this plugin can emulate heap usage or not + private boolean enabled = true; + // the progress boundaries/interval where emulation should be done + private float emulationInterval; + // target heap usage to emulate + private long targetHeapUsageInMB = 0; + + /** + * The frequency (based on task progress) with which memory-emulation code is + * run. If the value is set to 0.1 then the emulation will happen at 10% of + * the task's progress. The default value of this parameter is + * {@link #DEFAULT_EMULATION_PROGRESS_INTERVAL}. + */ + public static final String HEAP_EMULATION_PROGRESS_INTERVAL = + "gridmix.emulators.resource-usage.heap.emulation-interval"; + + // Default value for emulation interval + private static final float DEFAULT_EMULATION_PROGRESS_INTERVAL = 0.1F; // 10 % + + private float prevEmulationProgress = 0F; + + /** + * The minimum buffer reserved for other non-emulation activities. + */ + public static final String MIN_HEAP_FREE_RATIO = + "gridmix.emulators.resource-usage.heap.min-free-ratio"; + + private float minFreeHeapRatio; + + private static final float DEFAULT_MIN_FREE_HEAP_RATIO = 0.3F; + + /** + * Determines the unit increase per call to the core engine's load API. This + * is expressed as a percentage of the difference between the expected total + * heap usage and the current usage. + */ + public static final String HEAP_LOAD_RATIO = + "gridmix.emulators.resource-usage.heap.load-ratio"; + + private float heapLoadRatio; + + private static final float DEFAULT_HEAP_LOAD_RATIO = 0.1F; + + public static int ONE_MB = 1024 * 1024; + + /** + * Defines the core heap usage emulation algorithm. This engine is expected + * to perform certain memory intensive operations to consume some + * amount of heap. {@link #load(long)} should load the current heap and + * increase the heap usage by the specified value. This core engine can be + * initialized using the {@link #initialize(ResourceCalculatorPlugin, long)} + * API to suit the underlying hardware better. + */ + public interface HeapUsageEmulatorCore { + /** + * Performs some memory intensive operations to use up some heap. + */ + public void load(long sizeInMB); + + /** + * Initialize the core. + */ + public void initialize(ResourceCalculatorPlugin monitor, + long totalHeapUsageInMB); + + /** + * Reset the resource usage + */ + public void reset(); + } + + /** + * This is the core engine to emulate the heap usage. The only responsibility + * of this class is to perform certain memory intensive operations to make + * sure that some desired value of heap is used. + */ + public static class DefaultHeapUsageEmulator + implements HeapUsageEmulatorCore { + // store the unit loads in a list + protected static ArrayList heapSpace = new ArrayList(); + + /** + * Increase heap usage by current process by the given amount. + * This is done by creating objects each of size 1MB. + */ + public void load(long sizeInMB) { + for (long i = 0; i < sizeInMB; ++i) { + // Create another String object of size 1MB + heapSpace.add((Object)new byte[ONE_MB]); + } + } + + /** + * This will initialize the core and check if the core can emulate the + * desired target on the underlying hardware. + */ + public void initialize(ResourceCalculatorPlugin monitor, + long totalHeapUsageInMB) { + long maxPhysicalMemoryInMB = monitor.getPhysicalMemorySize() / ONE_MB ; + if(maxPhysicalMemoryInMB < totalHeapUsageInMB) { + throw new RuntimeException("Total heap the can be used is " + + maxPhysicalMemoryInMB + + " bytes while the emulator is configured to emulate a total of " + + totalHeapUsageInMB + " bytes"); + } + } + + /** + * Clear references to all the GridMix-allocated special objects so that + * heap usage is reduced. + */ + @Override + public void reset() { + heapSpace.clear(); + } + } + + public TotalHeapUsageEmulatorPlugin() { + this(new DefaultHeapUsageEmulator()); + } + + /** + * For testing. + */ + public TotalHeapUsageEmulatorPlugin(HeapUsageEmulatorCore core) { + emulatorCore = core; + } + + protected long getTotalHeapUsageInMB() { + return Runtime.getRuntime().totalMemory() / ONE_MB; + } + + protected long getMaxHeapUsageInMB() { + return Runtime.getRuntime().maxMemory() / ONE_MB; + } + + @Override + public void emulate() throws IOException, InterruptedException { + if (enabled) { + float currentProgress = progress.getProgress(); + if (prevEmulationProgress < currentProgress + && ((currentProgress - prevEmulationProgress) >= emulationInterval + || currentProgress == 1)) { + + long maxHeapSizeInMB = getMaxHeapUsageInMB(); + long committedHeapSizeInMB = getTotalHeapUsageInMB(); + + // Increase committed heap usage, if needed + // Using a linear weighing function for computing the expected usage + long expectedHeapUsageInMB = + Math.min(maxHeapSizeInMB, + (long) (targetHeapUsageInMB * currentProgress)); + if (expectedHeapUsageInMB < maxHeapSizeInMB + && committedHeapSizeInMB < expectedHeapUsageInMB) { + long bufferInMB = (long)(minFreeHeapRatio * expectedHeapUsageInMB); + long currentDifferenceInMB = + expectedHeapUsageInMB - committedHeapSizeInMB; + long currentIncrementLoadSizeInMB = + (long)(currentDifferenceInMB * heapLoadRatio); + // Make sure that at least 1 MB is incremented. + currentIncrementLoadSizeInMB = + Math.max(1, currentIncrementLoadSizeInMB); + while (committedHeapSizeInMB + bufferInMB < expectedHeapUsageInMB) { + // add blocks in order of X% of the difference, X = 10% by default + emulatorCore.load(currentIncrementLoadSizeInMB); + committedHeapSizeInMB = getTotalHeapUsageInMB(); + } + } + + // store the emulation progress boundary + prevEmulationProgress = currentProgress; + } + + // reset the core so that the garbage is reclaimed + emulatorCore.reset(); + } + } + + @Override + public void initialize(Configuration conf, ResourceUsageMetrics metrics, + ResourceCalculatorPlugin monitor, + Progressive progress) { + // get the target heap usage + targetHeapUsageInMB = metrics.getHeapUsage() / ONE_MB; + if (targetHeapUsageInMB <= 0 ) { + enabled = false; + return; + } else { + // calibrate the core heap-usage utility + emulatorCore.initialize(monitor, targetHeapUsageInMB); + enabled = true; + } + + this.progress = progress; + emulationInterval = + conf.getFloat(HEAP_EMULATION_PROGRESS_INTERVAL, + DEFAULT_EMULATION_PROGRESS_INTERVAL); + + minFreeHeapRatio = conf.getFloat(MIN_HEAP_FREE_RATIO, + DEFAULT_MIN_FREE_HEAP_RATIO); + + heapLoadRatio = conf.getFloat(HEAP_LOAD_RATIO, DEFAULT_HEAP_LOAD_RATIO); + + prevEmulationProgress = 0; + } +} \ No newline at end of file diff --git a/mapreduce/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/TestGridmixMemoryEmulation.java b/mapreduce/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/TestGridmixMemoryEmulation.java new file mode 100644 index 00000000000..422ec123d25 --- /dev/null +++ b/mapreduce/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/TestGridmixMemoryEmulation.java @@ -0,0 +1,453 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.mapred.gridmix; + +import org.junit.Test; +import static org.junit.Assert.*; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapred.DummyResourceCalculatorPlugin; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.gridmix.DebugJobProducer.MockJob; +import org.apache.hadoop.mapred.gridmix.TestHighRamJob.DummyGridmixJob; +import org.apache.hadoop.mapred.gridmix.TestResourceUsageEmulators.FakeProgressive; +import org.apache.hadoop.mapred.gridmix.emulators.resourceusage.TotalHeapUsageEmulatorPlugin; +import org.apache.hadoop.mapred.gridmix.emulators.resourceusage.TotalHeapUsageEmulatorPlugin.DefaultHeapUsageEmulator; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.MRJobConfig; +import org.apache.hadoop.mapreduce.util.ResourceCalculatorPlugin; +import org.apache.hadoop.tools.rumen.ResourceUsageMetrics; + +/** + * Test Gridmix memory emulation. + */ +public class TestGridmixMemoryEmulation { + /** + * This is a dummy class that fakes heap usage. + */ + private static class FakeHeapUsageEmulatorCore + extends DefaultHeapUsageEmulator { + private int numCalls = 0; + + @Override + public void load(long sizeInMB) { + ++numCalls; + super.load(sizeInMB); + } + + // Get the total number of times load() was invoked + int getNumCalls() { + return numCalls; + } + + // Get the total number of 1mb objects stored within + long getHeapUsageInMB() { + return heapSpace.size(); + } + + @Override + public void reset() { + // no op to stop emulate() from resetting + } + + /** + * For re-testing purpose. + */ + void resetFake() { + numCalls = 0; + super.reset(); + } + } + + /** + * This is a dummy class that fakes the heap usage emulator plugin. + */ + private static class FakeHeapUsageEmulatorPlugin + extends TotalHeapUsageEmulatorPlugin { + private FakeHeapUsageEmulatorCore core; + + public FakeHeapUsageEmulatorPlugin(FakeHeapUsageEmulatorCore core) { + super(core); + this.core = core; + } + + @Override + protected long getMaxHeapUsageInMB() { + return Long.MAX_VALUE / ONE_MB; + } + + @Override + protected long getTotalHeapUsageInMB() { + return core.getHeapUsageInMB(); + } + } + + /** + * Test {@link TotalHeapUsageEmulatorPlugin}'s core heap usage emulation + * engine. + */ + @Test + public void testHeapUsageEmulator() throws IOException { + FakeHeapUsageEmulatorCore heapEmulator = new FakeHeapUsageEmulatorCore(); + + long testSizeInMB = 10; // 10 mb + long previousHeap = heapEmulator.getHeapUsageInMB(); + heapEmulator.load(testSizeInMB); + long currentHeap = heapEmulator.getHeapUsageInMB(); + + // check if the heap has increased by expected value + assertEquals("Default heap emulator failed to load 10mb", + previousHeap + testSizeInMB, currentHeap); + + // test reset + heapEmulator.resetFake(); + assertEquals("Default heap emulator failed to reset", + 0, heapEmulator.getHeapUsageInMB()); + } + + /** + * Test {@link TotalHeapUsageEmulatorPlugin}. + */ + @Test + public void testTotalHeapUsageEmulatorPlugin() throws Exception { + Configuration conf = new Configuration(); + // set the dummy resource calculator for testing + ResourceCalculatorPlugin monitor = new DummyResourceCalculatorPlugin(); + long maxHeapUsage = 1024 * TotalHeapUsageEmulatorPlugin.ONE_MB; // 1GB + conf.setLong(DummyResourceCalculatorPlugin.MAXPMEM_TESTING_PROPERTY, + maxHeapUsage); + monitor.setConf(conf); + + // no buffer to be reserved + conf.setFloat(TotalHeapUsageEmulatorPlugin.MIN_HEAP_FREE_RATIO, 0F); + // only 1 call to be made per cycle + conf.setFloat(TotalHeapUsageEmulatorPlugin.HEAP_LOAD_RATIO, 1F); + long targetHeapUsageInMB = 200; // 200mb + + // fake progress indicator + FakeProgressive fakeProgress = new FakeProgressive(); + + // fake heap usage generator + FakeHeapUsageEmulatorCore fakeCore = new FakeHeapUsageEmulatorCore(); + + // a heap usage emulator with fake core + FakeHeapUsageEmulatorPlugin heapPlugin = + new FakeHeapUsageEmulatorPlugin(fakeCore); + + // test with invalid or missing resource usage value + ResourceUsageMetrics invalidUsage = + TestResourceUsageEmulators.createMetrics(0); + heapPlugin.initialize(conf, invalidUsage, null, null); + + // test if disabled heap emulation plugin's emulate() call is a no-operation + // this will test if the emulation plugin is disabled or not + int numCallsPre = fakeCore.getNumCalls(); + long heapUsagePre = fakeCore.getHeapUsageInMB(); + heapPlugin.emulate(); + int numCallsPost = fakeCore.getNumCalls(); + long heapUsagePost = fakeCore.getHeapUsageInMB(); + + // test if no calls are made heap usage emulator core + assertEquals("Disabled heap usage emulation plugin works!", + numCallsPre, numCallsPost); + // test if no calls are made heap usage emulator core + assertEquals("Disabled heap usage emulation plugin works!", + heapUsagePre, heapUsagePost); + + // test with wrong/invalid configuration + Boolean failed = null; + invalidUsage = + TestResourceUsageEmulators.createMetrics(maxHeapUsage + + TotalHeapUsageEmulatorPlugin.ONE_MB); + try { + heapPlugin.initialize(conf, invalidUsage, monitor, null); + failed = false; + } catch (Exception e) { + failed = true; + } + assertNotNull("Fail case failure!", failed); + assertTrue("Expected failure!", failed); + + // test with valid resource usage value + ResourceUsageMetrics metrics = + TestResourceUsageEmulators.createMetrics(targetHeapUsageInMB + * TotalHeapUsageEmulatorPlugin.ONE_MB); + + // test with default emulation interval + // in every interval, the emulator will add 100% of the expected usage + // (since gridmix.emulators.resource-usage.heap.load-ratio=1) + // so at 10%, emulator will add 10% (difference), at 20% it will add 10% ... + // So to emulate 200MB, it will add + // 20mb + 20mb + 20mb + 20mb + .. = 200mb + testEmulationAccuracy(conf, fakeCore, monitor, metrics, heapPlugin, 200, + 10); + + // test with custom value for emulation interval of 20% + conf.setFloat(TotalHeapUsageEmulatorPlugin.HEAP_EMULATION_PROGRESS_INTERVAL, + 0.2F); + // 40mb + 40mb + 40mb + 40mb + 40mb = 200mb + testEmulationAccuracy(conf, fakeCore, monitor, metrics, heapPlugin, 200, 5); + + // test with custom value of free heap ratio and load ratio = 1 + conf.setFloat(TotalHeapUsageEmulatorPlugin.HEAP_LOAD_RATIO, 1F); + conf.setFloat(TotalHeapUsageEmulatorPlugin.MIN_HEAP_FREE_RATIO, 0.5F); + // 40mb + 0mb + 80mb + 0mb + 0mb = 120mb + testEmulationAccuracy(conf, fakeCore, monitor, metrics, heapPlugin, 120, 2); + + // test with custom value of heap load ratio and min free heap ratio = 0 + conf.setFloat(TotalHeapUsageEmulatorPlugin.HEAP_LOAD_RATIO, 0.5F); + conf.setFloat(TotalHeapUsageEmulatorPlugin.MIN_HEAP_FREE_RATIO, 0F); + // 20mb (call#1) + 20mb (call#1) + 20mb (call#2) + 20mb (call#2) +.. = 200mb + testEmulationAccuracy(conf, fakeCore, monitor, metrics, heapPlugin, 200, + 10); + + // test with custom value of free heap ratio = 0.3 and load ratio = 0.5 + conf.setFloat(TotalHeapUsageEmulatorPlugin.MIN_HEAP_FREE_RATIO, 0.25F); + conf.setFloat(TotalHeapUsageEmulatorPlugin.HEAP_LOAD_RATIO, 0.5F); + // 20mb (call#1) + 20mb (call#1) + 30mb (call#2) + 0mb (call#2) + // + 30mb (call#3) + 0mb (call#3) + 35mb (call#4) + 0mb (call#4) + // + 37mb (call#5) + 0mb (call#5) = 162mb + testEmulationAccuracy(conf, fakeCore, monitor, metrics, heapPlugin, 162, 6); + + // test if emulation interval boundary is respected + fakeProgress = new FakeProgressive(); // initialize + conf.setFloat(TotalHeapUsageEmulatorPlugin.MIN_HEAP_FREE_RATIO, 0F); + conf.setFloat(TotalHeapUsageEmulatorPlugin.HEAP_LOAD_RATIO, 1F); + conf.setFloat(TotalHeapUsageEmulatorPlugin.HEAP_EMULATION_PROGRESS_INTERVAL, + 0.25F); + heapPlugin.initialize(conf, metrics, monitor, fakeProgress); + fakeCore.resetFake(); + // take a snapshot after the initialization + long initHeapUsage = fakeCore.getHeapUsageInMB(); + long initNumCallsUsage = fakeCore.getNumCalls(); + // test with 0 progress + testEmulationBoundary(0F, fakeCore, fakeProgress, heapPlugin, initHeapUsage, + initNumCallsUsage, "[no-op, 0 progress]"); + // test with 24% progress + testEmulationBoundary(0.24F, fakeCore, fakeProgress, heapPlugin, + initHeapUsage, initNumCallsUsage, + "[no-op, 24% progress]"); + // test with 25% progress + testEmulationBoundary(0.25F, fakeCore, fakeProgress, heapPlugin, + targetHeapUsageInMB / 4, 1, "[op, 25% progress]"); + // test with 80% progress + testEmulationBoundary(0.80F, fakeCore, fakeProgress, heapPlugin, + (targetHeapUsageInMB * 4) / 5, 2, "[op, 80% progress]"); + + // now test if the final call with 100% progress ramps up the heap usage + testEmulationBoundary(1F, fakeCore, fakeProgress, heapPlugin, + targetHeapUsageInMB, 3, "[op, 100% progress]"); + } + + // test whether the heap usage emulator achieves the desired target using + // desired calls to the underling core engine. + private static void testEmulationAccuracy(Configuration conf, + FakeHeapUsageEmulatorCore fakeCore, + ResourceCalculatorPlugin monitor, + ResourceUsageMetrics metrics, + TotalHeapUsageEmulatorPlugin heapPlugin, + long expectedTotalHeapUsageInMB, + long expectedTotalNumCalls) + throws Exception { + FakeProgressive fakeProgress = new FakeProgressive(); + fakeCore.resetFake(); + heapPlugin.initialize(conf, metrics, monitor, fakeProgress); + int numLoops = 0; + while (fakeProgress.getProgress() < 1) { + ++numLoops; + float progress = numLoops / 100.0F; + fakeProgress.setProgress(progress); + heapPlugin.emulate(); + } + + // test if the resource plugin shows the expected usage + assertEquals("Cumulative heap usage emulator plugin failed (total usage)!", + expectedTotalHeapUsageInMB, fakeCore.getHeapUsageInMB(), 1L); + // test if the resource plugin shows the expected num calls + assertEquals("Cumulative heap usage emulator plugin failed (num calls)!", + expectedTotalNumCalls, fakeCore.getNumCalls(), 0L); + } + + // tests if the heap usage emulation plugin emulates only at the expected + // progress gaps + private static void testEmulationBoundary(float progress, + FakeHeapUsageEmulatorCore fakeCore, FakeProgressive fakeProgress, + TotalHeapUsageEmulatorPlugin heapPlugin, long expectedTotalHeapUsageInMB, + long expectedTotalNumCalls, String info) throws Exception { + fakeProgress.setProgress(progress); + heapPlugin.emulate(); + // test heap usage + assertEquals("Emulation interval test for heap usage failed " + info + "!", + expectedTotalHeapUsageInMB, fakeCore.getHeapUsageInMB(), 0L); + // test num calls + assertEquals("Emulation interval test for heap usage failed " + info + "!", + expectedTotalNumCalls, fakeCore.getNumCalls(), 0L); + } + + /** + * Test the specified task java heap options. + */ + @SuppressWarnings("deprecation") + private void testJavaHeapOptions(String mapOptions, + String reduceOptions, String taskOptions, String defaultMapOptions, + String defaultReduceOptions, String defaultTaskOptions, + String expectedMapOptions, String expectedReduceOptions, + String expectedTaskOptions) throws Exception { + Configuration simulatedConf = new Configuration(); + // reset the configuration parameters + simulatedConf.unset(MRJobConfig.MAP_JAVA_OPTS); + simulatedConf.unset(MRJobConfig.REDUCE_JAVA_OPTS); + simulatedConf.unset(JobConf.MAPRED_TASK_JAVA_OPTS); + + // set the default map task options + if (defaultMapOptions != null) { + simulatedConf.set(MRJobConfig.MAP_JAVA_OPTS, defaultMapOptions); + } + // set the default reduce task options + if (defaultReduceOptions != null) { + simulatedConf.set(MRJobConfig.REDUCE_JAVA_OPTS, defaultReduceOptions); + } + // set the default task options + if (defaultTaskOptions != null) { + simulatedConf.set(JobConf.MAPRED_TASK_JAVA_OPTS, defaultTaskOptions); + } + + Configuration originalConf = new Configuration(); + // reset the configuration parameters + originalConf.unset(MRJobConfig.MAP_JAVA_OPTS); + originalConf.unset(MRJobConfig.REDUCE_JAVA_OPTS); + originalConf.unset(JobConf.MAPRED_TASK_JAVA_OPTS); + + // set the map task options + if (mapOptions != null) { + originalConf.set(MRJobConfig.MAP_JAVA_OPTS, mapOptions); + } + // set the reduce task options + if (reduceOptions != null) { + originalConf.set(MRJobConfig.REDUCE_JAVA_OPTS, reduceOptions); + } + // set the task options + if (taskOptions != null) { + originalConf.set(JobConf.MAPRED_TASK_JAVA_OPTS, taskOptions); + } + + // configure the task jvm's heap options + GridmixJob.configureTaskJVMOptions(originalConf, simulatedConf); + + assertEquals("Map heap options mismatch!", expectedMapOptions, + simulatedConf.get(MRJobConfig.MAP_JAVA_OPTS)); + assertEquals("Reduce heap options mismatch!", expectedReduceOptions, + simulatedConf.get(MRJobConfig.REDUCE_JAVA_OPTS)); + assertEquals("Task heap options mismatch!", expectedTaskOptions, + simulatedConf.get(JobConf.MAPRED_TASK_JAVA_OPTS)); + } + + /** + * Test task-level java heap options configuration in {@link GridmixJob}. + */ + @Test + public void testJavaHeapOptions() throws Exception { + // test missing opts + testJavaHeapOptions(null, null, null, null, null, null, null, null, + null); + + // test original heap opts and missing default opts + testJavaHeapOptions("-Xms10m", "-Xms20m", "-Xms30m", null, null, null, + null, null, null); + + // test missing opts with default opts + testJavaHeapOptions(null, null, null, "-Xms10m", "-Xms20m", "-Xms30m", + "-Xms10m", "-Xms20m", "-Xms30m"); + + // test empty option + testJavaHeapOptions("", "", "", null, null, null, null, null, null); + + // test empty default option and no original heap options + testJavaHeapOptions(null, null, null, "", "", "", "", "", ""); + + // test empty opts and default opts + testJavaHeapOptions("", "", "", "-Xmx10m -Xms1m", "-Xmx50m -Xms2m", + "-Xms2m -Xmx100m", "-Xmx10m -Xms1m", "-Xmx50m -Xms2m", + "-Xms2m -Xmx100m"); + + // test custom heap opts with no default opts + testJavaHeapOptions("-Xmx10m", "-Xmx20m", "-Xmx30m", null, null, null, + "-Xmx10m", "-Xmx20m", "-Xmx30m"); + + // test heap opts with default opts (multiple value) + testJavaHeapOptions("-Xms5m -Xmx200m", "-Xms15m -Xmx300m", + "-Xms25m -Xmx50m", "-XXabc", "-XXxyz", "-XXdef", + "-XXabc -Xmx200m", "-XXxyz -Xmx300m", "-XXdef -Xmx50m"); + + // test heap opts with default opts (duplication of -Xmx) + testJavaHeapOptions("-Xms5m -Xmx200m", "-Xms15m -Xmx300m", + "-Xms25m -Xmx50m", "-XXabc -Xmx500m", "-XXxyz -Xmx600m", + "-XXdef -Xmx700m", "-XXabc -Xmx200m", "-XXxyz -Xmx300m", + "-XXdef -Xmx50m"); + + // test heap opts with default opts (single value) + testJavaHeapOptions("-Xmx10m", "-Xmx20m", "-Xmx50m", "-Xms2m", + "-Xms3m", "-Xms5m", "-Xms2m -Xmx10m", "-Xms3m -Xmx20m", + "-Xms5m -Xmx50m"); + + // test heap opts with default opts (duplication of -Xmx) + testJavaHeapOptions("-Xmx10m", "-Xmx20m", "-Xmx50m", "-Xmx2m", + "-Xmx3m", "-Xmx5m", "-Xmx10m", "-Xmx20m", "-Xmx50m"); + } + + /** + * Test disabled task heap options configuration in {@link GridmixJob}. + */ + @Test + @SuppressWarnings("deprecation") + public void testJavaHeapOptionsDisabled() throws Exception { + Configuration gridmixConf = new Configuration(); + gridmixConf.setBoolean(GridmixJob.GRIDMIX_TASK_JVM_OPTIONS_ENABLE, false); + + // set the default values of simulated job + gridmixConf.set(MRJobConfig.MAP_JAVA_OPTS, "-Xmx1m"); + gridmixConf.set(MRJobConfig.REDUCE_JAVA_OPTS, "-Xmx2m"); + gridmixConf.set(JobConf.MAPRED_TASK_JAVA_OPTS, "-Xmx3m"); + + // set the default map and reduce task options for original job + final JobConf originalConf = new JobConf(); + originalConf.set(MRJobConfig.MAP_JAVA_OPTS, "-Xmx10m"); + originalConf.set(MRJobConfig.REDUCE_JAVA_OPTS, "-Xmx20m"); + originalConf.set(JobConf.MAPRED_TASK_JAVA_OPTS, "-Xmx30m"); + + // define a mock job + MockJob story = new MockJob(originalConf) { + public JobConf getJobConf() { + return originalConf; + } + }; + + GridmixJob job = new DummyGridmixJob(gridmixConf, story); + Job simulatedJob = job.getJob(); + Configuration simulatedConf = simulatedJob.getConfiguration(); + + assertEquals("Map heap options works when disabled!", "-Xmx1m", + simulatedConf.get(MRJobConfig.MAP_JAVA_OPTS)); + assertEquals("Reduce heap options works when disabled!", "-Xmx2m", + simulatedConf.get(MRJobConfig.REDUCE_JAVA_OPTS)); + assertEquals("Task heap options works when disabled!", "-Xmx3m", + simulatedConf.get(JobConf.MAPRED_TASK_JAVA_OPTS)); + } +} diff --git a/mapreduce/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/TestHighRamJob.java b/mapreduce/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/TestHighRamJob.java index 204271b0c71..5523d731b50 100644 --- a/mapreduce/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/TestHighRamJob.java +++ b/mapreduce/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/TestHighRamJob.java @@ -41,7 +41,7 @@ public class TestHighRamJob { /** * A dummy {@link GridmixJob} that opens up the simulated job for testing. */ - private static class DummyGridmixJob extends GridmixJob { + protected static class DummyGridmixJob extends GridmixJob { public DummyGridmixJob(Configuration conf, JobStory desc) throws IOException { super(conf, System.currentTimeMillis(), desc, new Path("test"), diff --git a/mapreduce/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/TestResourceUsageEmulators.java b/mapreduce/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/TestResourceUsageEmulators.java index 8dd38820984..d7c653acd30 100644 --- a/mapreduce/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/TestResourceUsageEmulators.java +++ b/mapreduce/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/TestResourceUsageEmulators.java @@ -427,7 +427,7 @@ public class TestResourceUsageEmulators { } // Creates a ResourceUsageMetrics object from the target usage - private static ResourceUsageMetrics createMetrics(long target) { + static ResourceUsageMetrics createMetrics(long target) { ResourceUsageMetrics metrics = new ResourceUsageMetrics(); metrics.setCumulativeCpuUsage(target); metrics.setVirtualMemoryUsage(target); @@ -487,7 +487,7 @@ public class TestResourceUsageEmulators { targetCpuUsage, targetCpuUsage / unitCpuUsage); // test with custom value for emulation interval of 20% - conf.setFloat(CumulativeCpuUsageEmulatorPlugin.CPU_EMULATION_FREQUENCY, + conf.setFloat(CumulativeCpuUsageEmulatorPlugin.CPU_EMULATION_PROGRESS_INTERVAL, 0.2F); testEmulationAccuracy(conf, fakeCore, monitor, metrics, cpuPlugin, targetCpuUsage, targetCpuUsage / unitCpuUsage); @@ -497,7 +497,7 @@ public class TestResourceUsageEmulators { fakeProgress = new FakeProgressive(); // initialize fakeCore.reset(); fakeCore.setUnitUsage(1); - conf.setFloat(CumulativeCpuUsageEmulatorPlugin.CPU_EMULATION_FREQUENCY, + conf.setFloat(CumulativeCpuUsageEmulatorPlugin.CPU_EMULATION_PROGRESS_INTERVAL, 0.25F); cpuPlugin.initialize(conf, metrics, monitor, fakeProgress); // take a snapshot after the initialization @@ -534,7 +534,7 @@ public class TestResourceUsageEmulators { fakeProgress = new FakeProgressive(); // initialize fakeCore.reset(); fakeCore.setUnitUsage(unitCpuUsage); - conf.setFloat(CumulativeCpuUsageEmulatorPlugin.CPU_EMULATION_FREQUENCY, + conf.setFloat(CumulativeCpuUsageEmulatorPlugin.CPU_EMULATION_PROGRESS_INTERVAL, 0.40F); cpuPlugin.initialize(conf, metrics, monitor, fakeProgress); // take a snapshot after the initialization diff --git a/mapreduce/src/docs/src/documentation/content/xdocs/gridmix.xml b/mapreduce/src/docs/src/documentation/content/xdocs/gridmix.xml index c196147281a..8be94c03df4 100644 --- a/mapreduce/src/docs/src/documentation/content/xdocs/gridmix.xml +++ b/mapreduce/src/docs/src/documentation/content/xdocs/gridmix.xml @@ -198,6 +198,15 @@ hadoop jar <gridmix-jar> org.apache.hadoop.mapred.gridmix.Gridmix \ The maximum size of the input files. The default limit is 100 TiB. + + + gridmix.task.jvm-options.enable + + Enables Gridmix to configure the simulated task's max heap + options using the values obtained from the original task (i.e via + trace). + +
@@ -703,10 +712,26 @@ hadoop jar <gridmix-jar> org.apache.hadoop.mapred.gridmix.Gridmix \ CPU usage emulator is designed in such a way that it only emulates at specific progress boundaries of the task. This interval can be configured using - gridmix.emulators.resource-usage.cpu.frequency. The - default value for this parameter is 0.1 i.e + gridmix.emulators.resource-usage.cpu.emulation-interval. + The default value for this parameter is 0.1 i.e 10%. +
  • Total heap usage emulator: + GridMix uses the total heap usage value published by Rumen + and makes sure that the total heap usage of the simulated + task is close to the value published by Rumen. GridMix can be + configured to emulate cumulative heap usage by adding + org.apache.hadoop.mapred.gridmix.emulators.resourceusage + .TotalHeapUsageEmulatorPlugin to the list of emulator + plugins configured for the + gridmix.emulators.resource-usage.plugins parameter. + Heap usage emulator is designed in such a way that + it only emulates at specific progress boundaries of the task. This + interval can be configured using + gridmix.emulators.resource-usage.heap.emulation-interval + . The default value for this parameter is 0.1 + i.e 10% progress interval. +
  • Note that GridMix will emulate resource usages only for jobs of type LOADJOB.