From 90194ca1cbd695d48c3705121c2ac9a8554578a2 Mon Sep 17 00:00:00 2001 From: Karthik Kambatla Date: Thu, 20 Nov 2014 15:36:57 -0800 Subject: [PATCH] MAPREDUCE-6169. MergeQueue should release reference to the current item from key and value at the end of the iteration to save memory. (Zhihai Xu via kasha) --- hadoop-mapreduce-project/CHANGES.txt | 4 ++++ .../main/java/org/apache/hadoop/mapred/Merger.java | 11 ++++++++++- .../hadoop/mapreduce/task/reduce/TestMerger.java | 2 ++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index a58d6024ab2..7e8a8824d3b 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -237,6 +237,10 @@ Release 2.7.0 - UNRELEASED OPTIMIZATIONS + MAPREDUCE-6169. MergeQueue should release reference to the current item + from key and value at the end of the iteration to save memory. + (Zhihai Xu via kasha) + BUG FIXES MAPREDUCE-5918. LineRecordReader can return the same decompressor to diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Merger.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Merger.java index b44e7423052..fffa92a0214 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Merger.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Merger.java @@ -528,9 +528,17 @@ public class Merger { } } + private void resetKeyValue() { + key = null; + value.reset(new byte[] {}, 0); + diskIFileValue.reset(new byte[] {}, 0); + } + public boolean next() throws IOException { - if (size() == 0) + if (size() == 0) { + resetKeyValue(); return false; + } if (minSegment != null) { //minSegment is non-null for all invocations of next except the first @@ -539,6 +547,7 @@ public class Merger { adjustPriorityQueue(minSegment); if (size() == 0) { minSegment = null; + resetKeyValue(); return false; } } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/task/reduce/TestMerger.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/task/reduce/TestMerger.java index 651dd387553..6e3bedf06ab 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/task/reduce/TestMerger.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/task/reduce/TestMerger.java @@ -294,6 +294,8 @@ public class TestMerger { // Now there should be no more input Assert.assertFalse(mergeQueue.next()); Assert.assertEquals(1.0f, mergeQueue.getProgress().get(), epsilon); + Assert.assertTrue(mergeQueue.getKey() == null); + Assert.assertEquals(0, mergeQueue.getValue().getData().length); } private Progressable getReporter() {