HBASE-1397 Better distribution in the PerformanceEvaluation MapReduce when rows run to the Billions -- more randomness

git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@773317 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2009-05-10 09:04:21 +00:00
parent 72c0393ea7
commit 39f470727c

View File

@ -26,7 +26,9 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@ -44,6 +46,8 @@ import org.apache.hadoop.hbase.io.BatchUpdate;
import org.apache.hadoop.hbase.io.RowResult;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.util.Hash;
import org.apache.hadoop.hbase.util.MurmurHash;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
@ -314,16 +318,24 @@ public class PerformanceEvaluation implements HConstants {
fs.mkdirs(subdir);
Path inputFile = new Path(subdir, "input.txt");
PrintStream out = new PrintStream(fs.create(inputFile));
// Make input random.
Map<Integer, String> m = new TreeMap<Integer, String>();
Hash h = MurmurHash.getInstance();
int perClientRows = (this.R / this.N);
try {
for (int i = 0; i < 10; i++) {
for (int j = 0; j < N; j++) {
out.println("startRow=" + ((j * perClientRows) + (i * perClientRows)) +
String s = "startRow=" + ((j * perClientRows) + (i * (perClientRows/10))) +
", perClientRunRows=" + (perClientRows / 10) +
", totalRows=" + this.R +
", clients=" + this.N);
", clients=" + this.N;
int hash = h.hash(Bytes.toBytes(s));
m.put(hash, s);
}
}
for (Map.Entry<Integer, String> e: m.entrySet()) {
out.println(e.getValue());
}
} finally {
out.close();
}