diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 7c1cea65716..3b8749d379c 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -273,6 +273,9 @@ Release 2.4.1 - UNRELEASED MAPREDUCE-5835. Killing Task might cause the job to go to ERROR state (Ming Ma via jlowe) + MAPREDUCE-5821. Avoid unintentional reallocation of byte arrays in segments + during merge. (Todd Lipcon via cdouglas) + Release 2.4.0 - 2014-04-07 INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Merger.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Merger.java index b4362ac5096..9493871138d 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Merger.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Merger.java @@ -537,6 +537,8 @@ public class Merger { } } minSegment = top(); + long startPos = minSegment.getPosition(); + key = minSegment.getKey(); if (!minSegment.inMemory()) { //When we load the value from an inmemory segment, we reset //the "value" DIB in this class to the inmem segment's byte[]. @@ -547,11 +549,11 @@ public class Merger { //segment, we reset the "value" DIB to the byte[] in that (so //we reuse the disk segment DIB whenever we consider //a disk segment). + minSegment.getValue(diskIFileValue); value.reset(diskIFileValue.getData(), diskIFileValue.getLength()); + } else { + minSegment.getValue(value); } - long startPos = minSegment.getPosition(); - key = minSegment.getKey(); - minSegment.getValue(value); long endPos = minSegment.getPosition(); totalBytesProcessed += endPos - startPos; mergeProgress.set(totalBytesProcessed * progPerByte);