HADOOP-2522 Separate MapFile benchmark from PerformanceEvaluation
git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk/src/contrib/hbase@609422 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
24958ffd44
commit
5893f72c17
|
@ -152,6 +152,8 @@ Trunk (unreleased changes)
|
|||
HADOOP-2458 HStoreFile.writeSplitInfo should just call
|
||||
HStoreFile.Reference.write
|
||||
HADOOP-2471 Add reading/writing MapFile to PerformanceEvaluation suite
|
||||
HADOOP-2522 Separate MapFile benchmark from PerformanceEvaluation
|
||||
(Tom White via Stack)
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,268 @@
|
|||
/**
|
||||
* Copyright 2007 The Apache Software Foundation
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.commons.math.random.RandomData;
|
||||
import org.apache.commons.math.random.RandomDataImpl;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.io.MapFile;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* This class runs performance benchmarks for {@link MapFile}.
|
||||
* </p>
|
||||
*/
|
||||
public class MapFilePerformanceEvaluation {
|
||||
|
||||
private static final int ROW_LENGTH = 1000;
|
||||
private static final int ROW_COUNT = 1000000;
|
||||
|
||||
static final Logger LOG =
|
||||
Logger.getLogger(MapFilePerformanceEvaluation.class.getName());
|
||||
|
||||
static Text format(final int i, final Text text) {
|
||||
String v = Integer.toString(i);
|
||||
text.set("0000000000".substring(v.length()) + v);
|
||||
return text;
|
||||
}
|
||||
|
||||
private void runBenchmarks() throws Exception {
|
||||
Configuration conf = new Configuration();
|
||||
FileSystem fs = FileSystem.get(conf);
|
||||
Path mf = fs.makeQualified(new Path("performanceevaluation.mapfile"));
|
||||
if (fs.exists(mf)) {
|
||||
fs.delete(mf);
|
||||
}
|
||||
|
||||
runBenchmark(new SequentialWriteBenchmark(conf, fs, mf, ROW_COUNT),
|
||||
ROW_COUNT);
|
||||
runBenchmark(new UniformRandomReadBenchmark(conf, fs, mf, ROW_COUNT),
|
||||
ROW_COUNT);
|
||||
runBenchmark(new GaussianRandomReadBenchmark(conf, fs, mf, ROW_COUNT),
|
||||
ROW_COUNT);
|
||||
runBenchmark(new SequentialReadBenchmark(conf, fs, mf, ROW_COUNT),
|
||||
ROW_COUNT);
|
||||
|
||||
}
|
||||
|
||||
private void runBenchmark(RowOrientedBenchmark benchmark, int rowCount)
|
||||
throws Exception {
|
||||
LOG.info("Running " + benchmark.getClass().getSimpleName() + " for " +
|
||||
rowCount + " rows.");
|
||||
long elapsedTime = benchmark.run();
|
||||
LOG.info("Running " + benchmark.getClass().getSimpleName() + " for " +
|
||||
rowCount + " rows took " + elapsedTime + "ms.");
|
||||
}
|
||||
|
||||
static abstract class RowOrientedBenchmark {
|
||||
|
||||
protected final Configuration conf;
|
||||
protected final FileSystem fs;
|
||||
protected final Path mf;
|
||||
protected final int totalRows;
|
||||
protected Text key;
|
||||
protected Text val;
|
||||
|
||||
public RowOrientedBenchmark(Configuration conf, FileSystem fs, Path mf,
|
||||
int totalRows) {
|
||||
this.conf = conf;
|
||||
this.fs = fs;
|
||||
this.mf = mf;
|
||||
this.totalRows = totalRows;
|
||||
this.key = new Text();
|
||||
this.val = new Text();
|
||||
}
|
||||
|
||||
void setUp() throws Exception {
|
||||
// do nothing
|
||||
}
|
||||
|
||||
abstract void doRow(int i) throws Exception;
|
||||
|
||||
protected int getReportingPeriod() {
|
||||
return this.totalRows / 10;
|
||||
}
|
||||
|
||||
void tearDown() throws Exception {
|
||||
// do nothing
|
||||
}
|
||||
|
||||
/**
|
||||
* Run benchmark
|
||||
* @return elapsed time.
|
||||
* @throws Exception
|
||||
*/
|
||||
long run() throws Exception {
|
||||
long elapsedTime;
|
||||
setUp();
|
||||
long startTime = System.currentTimeMillis();
|
||||
try {
|
||||
for (int i = 0; i < totalRows; i++) {
|
||||
if (i > 0 && i % getReportingPeriod() == 0) {
|
||||
LOG.info("Processed " + i + " rows.");
|
||||
}
|
||||
doRow(i);
|
||||
}
|
||||
elapsedTime = System.currentTimeMillis() - startTime;
|
||||
} finally {
|
||||
tearDown();
|
||||
}
|
||||
return elapsedTime;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static class SequentialWriteBenchmark extends RowOrientedBenchmark {
|
||||
|
||||
protected MapFile.Writer writer;
|
||||
private Random random = new Random();
|
||||
private byte[] bytes = new byte[ROW_LENGTH];
|
||||
|
||||
public SequentialWriteBenchmark(Configuration conf, FileSystem fs, Path mf,
|
||||
int totalRows) {
|
||||
super(conf, fs, mf, totalRows);
|
||||
}
|
||||
|
||||
@Override
|
||||
void setUp() throws Exception {
|
||||
writer = new MapFile.Writer(conf, fs, mf.toString(),
|
||||
Text.class, Text.class);
|
||||
}
|
||||
|
||||
@Override
|
||||
void doRow(int i) throws Exception {
|
||||
val.set(generateValue());
|
||||
writer.append(format(i, key), val);
|
||||
}
|
||||
|
||||
private byte[] generateValue() {
|
||||
random.nextBytes(bytes);
|
||||
return bytes;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int getReportingPeriod() {
|
||||
return this.totalRows; // don't report progress
|
||||
}
|
||||
|
||||
@Override
|
||||
void tearDown() throws Exception {
|
||||
writer.close();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static abstract class ReadBenchmark extends RowOrientedBenchmark {
|
||||
|
||||
protected MapFile.Reader reader;
|
||||
|
||||
public ReadBenchmark(Configuration conf, FileSystem fs, Path mf,
|
||||
int totalRows) {
|
||||
super(conf, fs, mf, totalRows);
|
||||
}
|
||||
|
||||
@Override
|
||||
void setUp() throws Exception {
|
||||
reader = new MapFile.Reader(fs, mf.toString(), conf);
|
||||
}
|
||||
|
||||
@Override
|
||||
void tearDown() throws Exception {
|
||||
reader.close();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static class SequentialReadBenchmark extends ReadBenchmark {
|
||||
|
||||
public SequentialReadBenchmark(Configuration conf, FileSystem fs,
|
||||
Path mf, int totalRows) {
|
||||
super(conf, fs, mf, totalRows);
|
||||
}
|
||||
|
||||
@Override
|
||||
void doRow(@SuppressWarnings("unused") int i) throws Exception {
|
||||
reader.next(key, val);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int getReportingPeriod() {
|
||||
return this.totalRows; // don't report progress
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static class UniformRandomReadBenchmark extends ReadBenchmark {
|
||||
|
||||
private Random random = new Random();
|
||||
|
||||
public UniformRandomReadBenchmark(Configuration conf, FileSystem fs,
|
||||
Path mf, int totalRows) {
|
||||
super(conf, fs, mf, totalRows);
|
||||
}
|
||||
|
||||
@Override
|
||||
void doRow(@SuppressWarnings("unused") int i) throws Exception {
|
||||
reader.get(getRandomRow(), val);
|
||||
}
|
||||
|
||||
private Text getRandomRow() {
|
||||
return format(random.nextInt(totalRows), key);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static class GaussianRandomReadBenchmark extends ReadBenchmark {
|
||||
|
||||
private RandomData randomData = new RandomDataImpl();
|
||||
|
||||
public GaussianRandomReadBenchmark(Configuration conf, FileSystem fs,
|
||||
Path mf, int totalRows) {
|
||||
super(conf, fs, mf, totalRows);
|
||||
}
|
||||
|
||||
@Override
|
||||
void doRow(@SuppressWarnings("unused") int i) throws Exception {
|
||||
reader.get(getGaussianRandomRow(), val);
|
||||
}
|
||||
|
||||
private Text getGaussianRandomRow() {
|
||||
int r = (int) randomData.nextGaussian(totalRows / 2, totalRows / 10);
|
||||
return format(r, key);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @param args
|
||||
* @throws IOException
|
||||
*/
|
||||
public static void main(String[] args) throws Exception {
|
||||
new MapFilePerformanceEvaluation().runBenchmarks();
|
||||
}
|
||||
|
||||
}
|
|
@ -34,11 +34,9 @@ import org.apache.hadoop.conf.Configuration;
|
|||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.io.LongWritable;
|
||||
import org.apache.hadoop.io.MapFile;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.io.Writable;
|
||||
import org.apache.hadoop.io.WritableComparable;
|
||||
import org.apache.hadoop.io.MapFile.Writer;
|
||||
import org.apache.hadoop.mapred.JobClient;
|
||||
import org.apache.hadoop.mapred.JobConf;
|
||||
import org.apache.hadoop.mapred.MapReduceBase;
|
||||
|
@ -86,7 +84,6 @@ public class PerformanceEvaluation implements HConstants {
|
|||
private static final String SEQUENTIAL_READ = "sequentialRead";
|
||||
private static final String SEQUENTIAL_WRITE = "sequentialWrite";
|
||||
private static final String SCAN = "scan";
|
||||
private static final String MAPFILE = "mapfile";
|
||||
|
||||
private static final List<String> COMMANDS =
|
||||
Arrays.asList(new String [] {RANDOM_READ,
|
||||
|
@ -94,8 +91,7 @@ public class PerformanceEvaluation implements HConstants {
|
|||
RANDOM_WRITE,
|
||||
SEQUENTIAL_READ,
|
||||
SEQUENTIAL_WRITE,
|
||||
SCAN,
|
||||
MAPFILE});
|
||||
SCAN});
|
||||
|
||||
volatile HBaseConfiguration conf;
|
||||
private boolean miniCluster = false;
|
||||
|
@ -552,59 +548,6 @@ public class PerformanceEvaluation implements HConstants {
|
|||
LOG.error("Failed", e);
|
||||
}
|
||||
}
|
||||
|
||||
private void doMapFile() throws IOException {
|
||||
final int ROW_COUNT = 1000000;
|
||||
Random random = new Random();
|
||||
Configuration c = new Configuration();
|
||||
FileSystem fs = FileSystem.get(c);
|
||||
Path mf = fs.makeQualified(new Path("performanceevaluation.mapfile"));
|
||||
if (fs.exists(mf)) {
|
||||
fs.delete(mf);
|
||||
}
|
||||
Writer writer = new MapFile.Writer(c, fs, mf.toString(),
|
||||
Text.class, Text.class);
|
||||
LOG.info("Writing " + ROW_COUNT + " rows to " + mf.toString());
|
||||
long startTime = System.currentTimeMillis();
|
||||
// Add 1M rows.
|
||||
for (int i = 0; i < ROW_COUNT; i++) {
|
||||
writer.append(PerformanceEvaluation.format(i),
|
||||
new Text(PerformanceEvaluation.generateValue(random)));
|
||||
}
|
||||
writer.close();
|
||||
LOG.info("Writing " + ROW_COUNT + " records took " +
|
||||
(System.currentTimeMillis() - startTime) + "ms (Note: generation of keys " +
|
||||
"and values is done inline and has been seen to consume " +
|
||||
"significant time: e.g. ~30% of cpu time");
|
||||
// Do random reads.
|
||||
LOG.info("Reading " + ROW_COUNT + " random rows");
|
||||
MapFile.Reader reader = new MapFile.Reader(fs, mf.toString(), c);
|
||||
startTime = System.currentTimeMillis();
|
||||
for (int i = 0; i < ROW_COUNT; i++) {
|
||||
if (i > 0 && i % (ROW_COUNT / 10) == 0) {
|
||||
LOG.info("Read " + i);
|
||||
}
|
||||
reader.get(PerformanceEvaluation.getRandomRow(random, ROW_COUNT),
|
||||
new Text());
|
||||
}
|
||||
reader.close();
|
||||
LOG.info("Reading " + ROW_COUNT + " random records took " +
|
||||
(System.currentTimeMillis() - startTime) + "ms (Note: generation of " +
|
||||
"random key is done in line and takes a significant amount of cpu " +
|
||||
"time: e.g 10-15%");
|
||||
// Do random reads.
|
||||
LOG.info("Reading " + ROW_COUNT + " rows sequentially");
|
||||
reader = new MapFile.Reader(fs, mf.toString(), c);
|
||||
startTime = System.currentTimeMillis();
|
||||
Text key = new Text();
|
||||
Text val = new Text();
|
||||
for (int i = 0; reader.next(key, val); i++) {
|
||||
continue;
|
||||
}
|
||||
reader.close();
|
||||
LOG.info("Reading " + ROW_COUNT + " records serially took " +
|
||||
(System.currentTimeMillis() - startTime) + "ms");
|
||||
}
|
||||
|
||||
private void runTest(final String cmd) throws IOException {
|
||||
if (cmd.equals(RANDOM_READ_MEM)) {
|
||||
|
@ -619,9 +562,7 @@ public class PerformanceEvaluation implements HConstants {
|
|||
}
|
||||
|
||||
try {
|
||||
if (cmd.equals(MAPFILE)) {
|
||||
doMapFile();
|
||||
} else if (N == 1) {
|
||||
if (N == 1) {
|
||||
// If there is only one client and one HRegionServer, we assume nothing
|
||||
// has been set up at all.
|
||||
runNIsOne(cmd);
|
||||
|
@ -661,7 +602,6 @@ public class PerformanceEvaluation implements HConstants {
|
|||
System.err.println(" sequentialRead Run sequential read test");
|
||||
System.err.println(" sequentialWrite Run sequential write test");
|
||||
System.err.println(" scan Run scan test");
|
||||
System.err.println(" mapfile Do read, write tests against mapfile");
|
||||
System.err.println();
|
||||
System.err.println("Args:");
|
||||
System.err.println(" nclients Integer. Required. Total number of " +
|
||||
|
|
Loading…
Reference in New Issue