HADOOP-2522 Separate MapFile benchmark from PerformanceEvaluation

git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk/src/contrib/hbase@609422 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2008-01-06 21:58:16 +00:00
parent 24958ffd44
commit 5893f72c17
3 changed files with 272 additions and 62 deletions

View File

@ -152,6 +152,8 @@ Trunk (unreleased changes)
HADOOP-2458 HStoreFile.writeSplitInfo should just call
HStoreFile.Reference.write
HADOOP-2471 Add reading/writing MapFile to PerformanceEvaluation suite
HADOOP-2522 Separate MapFile benchmark from PerformanceEvaluation
(Tom White via Stack)

View File

@ -0,0 +1,268 @@
/**
* Copyright 2007 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase;
import java.io.IOException;
import java.util.Random;
import org.apache.commons.math.random.RandomData;
import org.apache.commons.math.random.RandomDataImpl;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.MapFile;
import org.apache.hadoop.io.Text;
import org.apache.log4j.Logger;
/**
* <p>
* This class runs performance benchmarks for {@link MapFile}.
* </p>
*/
public class MapFilePerformanceEvaluation {
private static final int ROW_LENGTH = 1000;
private static final int ROW_COUNT = 1000000;
static final Logger LOG =
Logger.getLogger(MapFilePerformanceEvaluation.class.getName());
static Text format(final int i, final Text text) {
String v = Integer.toString(i);
text.set("0000000000".substring(v.length()) + v);
return text;
}
private void runBenchmarks() throws Exception {
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
Path mf = fs.makeQualified(new Path("performanceevaluation.mapfile"));
if (fs.exists(mf)) {
fs.delete(mf);
}
runBenchmark(new SequentialWriteBenchmark(conf, fs, mf, ROW_COUNT),
ROW_COUNT);
runBenchmark(new UniformRandomReadBenchmark(conf, fs, mf, ROW_COUNT),
ROW_COUNT);
runBenchmark(new GaussianRandomReadBenchmark(conf, fs, mf, ROW_COUNT),
ROW_COUNT);
runBenchmark(new SequentialReadBenchmark(conf, fs, mf, ROW_COUNT),
ROW_COUNT);
}
private void runBenchmark(RowOrientedBenchmark benchmark, int rowCount)
throws Exception {
LOG.info("Running " + benchmark.getClass().getSimpleName() + " for " +
rowCount + " rows.");
long elapsedTime = benchmark.run();
LOG.info("Running " + benchmark.getClass().getSimpleName() + " for " +
rowCount + " rows took " + elapsedTime + "ms.");
}
static abstract class RowOrientedBenchmark {
protected final Configuration conf;
protected final FileSystem fs;
protected final Path mf;
protected final int totalRows;
protected Text key;
protected Text val;
public RowOrientedBenchmark(Configuration conf, FileSystem fs, Path mf,
int totalRows) {
this.conf = conf;
this.fs = fs;
this.mf = mf;
this.totalRows = totalRows;
this.key = new Text();
this.val = new Text();
}
void setUp() throws Exception {
// do nothing
}
abstract void doRow(int i) throws Exception;
protected int getReportingPeriod() {
return this.totalRows / 10;
}
void tearDown() throws Exception {
// do nothing
}
/**
* Run benchmark
* @return elapsed time.
* @throws Exception
*/
long run() throws Exception {
long elapsedTime;
setUp();
long startTime = System.currentTimeMillis();
try {
for (int i = 0; i < totalRows; i++) {
if (i > 0 && i % getReportingPeriod() == 0) {
LOG.info("Processed " + i + " rows.");
}
doRow(i);
}
elapsedTime = System.currentTimeMillis() - startTime;
} finally {
tearDown();
}
return elapsedTime;
}
}
static class SequentialWriteBenchmark extends RowOrientedBenchmark {
protected MapFile.Writer writer;
private Random random = new Random();
private byte[] bytes = new byte[ROW_LENGTH];
public SequentialWriteBenchmark(Configuration conf, FileSystem fs, Path mf,
int totalRows) {
super(conf, fs, mf, totalRows);
}
@Override
void setUp() throws Exception {
writer = new MapFile.Writer(conf, fs, mf.toString(),
Text.class, Text.class);
}
@Override
void doRow(int i) throws Exception {
val.set(generateValue());
writer.append(format(i, key), val);
}
private byte[] generateValue() {
random.nextBytes(bytes);
return bytes;
}
@Override
protected int getReportingPeriod() {
return this.totalRows; // don't report progress
}
@Override
void tearDown() throws Exception {
writer.close();
}
}
static abstract class ReadBenchmark extends RowOrientedBenchmark {
protected MapFile.Reader reader;
public ReadBenchmark(Configuration conf, FileSystem fs, Path mf,
int totalRows) {
super(conf, fs, mf, totalRows);
}
@Override
void setUp() throws Exception {
reader = new MapFile.Reader(fs, mf.toString(), conf);
}
@Override
void tearDown() throws Exception {
reader.close();
}
}
static class SequentialReadBenchmark extends ReadBenchmark {
public SequentialReadBenchmark(Configuration conf, FileSystem fs,
Path mf, int totalRows) {
super(conf, fs, mf, totalRows);
}
@Override
void doRow(@SuppressWarnings("unused") int i) throws Exception {
reader.next(key, val);
}
@Override
protected int getReportingPeriod() {
return this.totalRows; // don't report progress
}
}
static class UniformRandomReadBenchmark extends ReadBenchmark {
private Random random = new Random();
public UniformRandomReadBenchmark(Configuration conf, FileSystem fs,
Path mf, int totalRows) {
super(conf, fs, mf, totalRows);
}
@Override
void doRow(@SuppressWarnings("unused") int i) throws Exception {
reader.get(getRandomRow(), val);
}
private Text getRandomRow() {
return format(random.nextInt(totalRows), key);
}
}
static class GaussianRandomReadBenchmark extends ReadBenchmark {
private RandomData randomData = new RandomDataImpl();
public GaussianRandomReadBenchmark(Configuration conf, FileSystem fs,
Path mf, int totalRows) {
super(conf, fs, mf, totalRows);
}
@Override
void doRow(@SuppressWarnings("unused") int i) throws Exception {
reader.get(getGaussianRandomRow(), val);
}
private Text getGaussianRandomRow() {
int r = (int) randomData.nextGaussian(totalRows / 2, totalRows / 10);
return format(r, key);
}
}
/**
* @param args
* @throws IOException
*/
public static void main(String[] args) throws Exception {
new MapFilePerformanceEvaluation().runBenchmarks();
}
}

View File

@ -34,11 +34,9 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.MapFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.MapFile.Writer;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
@ -86,7 +84,6 @@ public class PerformanceEvaluation implements HConstants {
private static final String SEQUENTIAL_READ = "sequentialRead";
private static final String SEQUENTIAL_WRITE = "sequentialWrite";
private static final String SCAN = "scan";
private static final String MAPFILE = "mapfile";
private static final List<String> COMMANDS =
Arrays.asList(new String [] {RANDOM_READ,
@ -94,8 +91,7 @@ public class PerformanceEvaluation implements HConstants {
RANDOM_WRITE,
SEQUENTIAL_READ,
SEQUENTIAL_WRITE,
SCAN,
MAPFILE});
SCAN});
volatile HBaseConfiguration conf;
private boolean miniCluster = false;
@ -552,59 +548,6 @@ public class PerformanceEvaluation implements HConstants {
LOG.error("Failed", e);
}
}
private void doMapFile() throws IOException {
final int ROW_COUNT = 1000000;
Random random = new Random();
Configuration c = new Configuration();
FileSystem fs = FileSystem.get(c);
Path mf = fs.makeQualified(new Path("performanceevaluation.mapfile"));
if (fs.exists(mf)) {
fs.delete(mf);
}
Writer writer = new MapFile.Writer(c, fs, mf.toString(),
Text.class, Text.class);
LOG.info("Writing " + ROW_COUNT + " rows to " + mf.toString());
long startTime = System.currentTimeMillis();
// Add 1M rows.
for (int i = 0; i < ROW_COUNT; i++) {
writer.append(PerformanceEvaluation.format(i),
new Text(PerformanceEvaluation.generateValue(random)));
}
writer.close();
LOG.info("Writing " + ROW_COUNT + " records took " +
(System.currentTimeMillis() - startTime) + "ms (Note: generation of keys " +
"and values is done inline and has been seen to consume " +
"significant time: e.g. ~30% of cpu time");
// Do random reads.
LOG.info("Reading " + ROW_COUNT + " random rows");
MapFile.Reader reader = new MapFile.Reader(fs, mf.toString(), c);
startTime = System.currentTimeMillis();
for (int i = 0; i < ROW_COUNT; i++) {
if (i > 0 && i % (ROW_COUNT / 10) == 0) {
LOG.info("Read " + i);
}
reader.get(PerformanceEvaluation.getRandomRow(random, ROW_COUNT),
new Text());
}
reader.close();
LOG.info("Reading " + ROW_COUNT + " random records took " +
(System.currentTimeMillis() - startTime) + "ms (Note: generation of " +
"random key is done in line and takes a significant amount of cpu " +
"time: e.g 10-15%");
// Do random reads.
LOG.info("Reading " + ROW_COUNT + " rows sequentially");
reader = new MapFile.Reader(fs, mf.toString(), c);
startTime = System.currentTimeMillis();
Text key = new Text();
Text val = new Text();
for (int i = 0; reader.next(key, val); i++) {
continue;
}
reader.close();
LOG.info("Reading " + ROW_COUNT + " records serially took " +
(System.currentTimeMillis() - startTime) + "ms");
}
private void runTest(final String cmd) throws IOException {
if (cmd.equals(RANDOM_READ_MEM)) {
@ -619,9 +562,7 @@ public class PerformanceEvaluation implements HConstants {
}
try {
if (cmd.equals(MAPFILE)) {
doMapFile();
} else if (N == 1) {
if (N == 1) {
// If there is only one client and one HRegionServer, we assume nothing
// has been set up at all.
runNIsOne(cmd);
@ -661,7 +602,6 @@ public class PerformanceEvaluation implements HConstants {
System.err.println(" sequentialRead Run sequential read test");
System.err.println(" sequentialWrite Run sequential write test");
System.err.println(" scan Run scan test");
System.err.println(" mapfile Do read, write tests against mapfile");
System.err.println();
System.err.println("Args:");
System.err.println(" nclients Integer. Required. Total number of " +