HBASE-15171 Avoid counting duplicate kv and generating lots of small hfiles in PutSortReducer (Yu Li)

2016-01-27 09:42:56 -08:00 · 2016-01-27 09:42:56 -08:00 · 47c4147940
parent 845d00a16b
commit 47c4147940
1 changed files with 5 additions and 3 deletions
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/PutSortReducer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/PutSortReducer.java
@ -22,12 +22,12 @@ import java.util.Iterator;
 import java.util.List;
 import java.util.TreeSet;

-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.classification.InterfaceStability;
 import org.apache.hadoop.hbase.Cell;
 import org.apache.hadoop.hbase.CellComparator;
 import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.KeyValueUtil;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.classification.InterfaceStability;
 import org.apache.hadoop.hbase.client.Put;
 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
 import org.apache.hadoop.mapreduce.Reducer;
@ -68,9 +68,11 @@ public class PutSortReducer extends
          for (Cell cell: cells) {
            KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
            map.add(kv);
-            curSize += kv.heapSize();
          }
        }
+        for(KeyValue kv: map){
+          curSize +=kv.heapSize();
+        }
      }
      context.setStatus("Read " + map.size() + " entries of " + map.getClass()
          + "(" + StringUtils.humanReadableInt(curSize) + ")");