HBASE-15171 Avoid counting duplicate kv and generating lots of small hfiles in PutSortReducer (Yu Li)

This commit is contained in:
tedyu 2016-01-27 09:42:56 -08:00
parent 845d00a16b
commit 47c4147940
1 changed files with 5 additions and 3 deletions

View File

@ -22,12 +22,12 @@ import java.util.Iterator;
import java.util.List;
import java.util.TreeSet;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.classification.InterfaceStability;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellComparator;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValueUtil;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.classification.InterfaceStability;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.mapreduce.Reducer;
@ -68,9 +68,11 @@ public class PutSortReducer extends
for (Cell cell: cells) {
KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
map.add(kv);
curSize += kv.heapSize();
}
}
for(KeyValue kv: map){
curSize +=kv.heapSize();
}
}
context.setStatus("Read " + map.size() + " entries of " + map.getClass()
+ "(" + StringUtils.humanReadableInt(curSize) + ")");