HBASE-7383 create integration test for HBASE-5416 (improving scan performance for certain filters) (Sergey)

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1433224 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Zhihong Yu 2013-01-14 23:57:48 +00:00
parent 9575f0f0a0
commit 96c32807dd
12 changed files with 659 additions and 176 deletions

View File

@ -0,0 +1,89 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
package org.apache.hadoop.hbase.util.test;
import java.util.Set;
/**
* A generator of random data (keys/cfs/columns/values) for load testing.
* Contains LoadTestKVGenerator as a matter of convenience...
*/
public abstract class LoadTestDataGenerator {
protected final LoadTestKVGenerator kvGenerator;
/**
* Initializes the object.
* @param minValueSize minimum size of the value generated by
* {@link #generateValue(byte[], byte[], byte[])}.
* @param maxValueSize maximum size of the value generated by
* {@link #generateValue(byte[], byte[], byte[])}.
*/
public LoadTestDataGenerator(int minValueSize, int maxValueSize) {
this.kvGenerator = new LoadTestKVGenerator(minValueSize, maxValueSize);
}
/**
* Generates a deterministic, unique hashed row key from a number. That way, the user can
* keep track of numbers, without messing with byte array and ensuring key distribution.
* @param keyBase Base number for a key, such as a loop counter.
*/
public abstract byte[] getDeterministicUniqueKey(long keyBase);
/**
* Gets column families for the load test table.
* @return The array of byte[]s representing column family names.
*/
public abstract byte[][] getColumnFamilies();
/**
* Generates an applicable set of columns to be used for a particular key and family.
* @param rowKey The row key to generate for.
* @param cf The column family name to generate for.
* @return The array of byte[]s representing column names.
*/
public abstract byte[][] generateColumnsForCf(byte[] rowKey, byte[] cf);
/**
* Generates a value to be used for a particular row/cf/column.
* @param rowKey The row key to generate for.
* @param cf The column family name to generate for.
* @param column The column name to generate for.
* @return The value to use.
*/
public abstract byte[] generateValue(byte[] rowKey, byte[] cf, byte[] column);
/**
* Checks that columns for a rowKey and cf are valid if generated via
* {@link #generateColumnsForCf(byte[], byte[])}
* @param rowKey The row key to verify for.
* @param cf The column family name to verify for.
* @param columnSet The column set (for example, encountered by read).
* @return True iff valid.
*/
public abstract boolean verify(byte[] rowKey, byte[] cf, Set<byte[]> columnSet);
/**
* Checks that value for a rowKey/cf/column is valid if generated via
* {@link #generateValue(byte[], byte[], byte[])}
* @param rowKey The row key to verify for.
* @param cf The column family name to verify for.
* @param column The column name to verify for.
* @param value The value (for example, encountered by read).
* @return True iff valid.
*/
public abstract boolean verify(byte[] rowKey, byte[] cf, byte[] column, byte[] value);
}

View File

@ -16,6 +16,7 @@
*/ */
package org.apache.hadoop.hbase.util.test; package org.apache.hadoop.hbase.util.test;
import java.util.Map;
import java.util.Random; import java.util.Random;
import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Bytes;
@ -27,8 +28,6 @@ import org.apache.hadoop.hbase.util.MD5Hash;
* hash. Values are generated by selecting value size in the configured range * hash. Values are generated by selecting value size in the configured range
* and generating a pseudo-random sequence of bytes seeded by key, column * and generating a pseudo-random sequence of bytes seeded by key, column
* qualifier, and value size. * qualifier, and value size.
* <p>
* Not thread-safe, so a separate instance is needed for every writer thread/
*/ */
public class LoadTestKVGenerator { public class LoadTestKVGenerator {
@ -49,13 +48,13 @@ public class LoadTestKVGenerator {
/** /**
* Verifies that the given byte array is the same as what would be generated * Verifies that the given byte array is the same as what would be generated
* for the given row key and qualifier. We are assuming that the value size * for the given seed strings (row/cf/column/...). We are assuming that the
* is correct, and only verify the actual bytes. However, if the min/max * value size is correct, and only verify the actual bytes. However, if the
* value sizes are set sufficiently high, an accidental match should be * min/max value sizes are set sufficiently high, an accidental match should be
* extremely improbable. * extremely improbable.
*/ */
public static boolean verify(String rowKey, String qual, byte[] value) { public static boolean verify(byte[] value, byte[]... seedStrings) {
byte[] expectedData = getValueForRowColumn(rowKey, qual, value.length); byte[] expectedData = getValueForRowColumn(value.length, seedStrings);
return Bytes.equals(expectedData, value); return Bytes.equals(expectedData, value);
} }
@ -74,27 +73,31 @@ public class LoadTestKVGenerator {
/** /**
* Generates a value for the given key index and column qualifier. Size is * Generates a value for the given key index and column qualifier. Size is
* selected randomly in the configured range. The generated value depends * selected randomly in the configured range. The generated value depends
* only on the combination of the key, qualifier, and the selected value * only on the combination of the strings passed (key/cf/column/...) and the selected
* size. This allows to verify the actual value bytes when reading, as done * value size. This allows to verify the actual value bytes when reading, as done
* in {@link #verify(String, String, byte[])}. * in {#verify(byte[], byte[]...)}
* This method is as thread-safe as Random class. It appears that the worst bug ever
* found with the latter is that multiple threads will get some duplicate values, which
* we don't care about.
*/ */
public byte[] generateRandomSizeValue(long key, String qual) { public byte[] generateRandomSizeValue(byte[]... seedStrings) {
String rowKey = md5PrefixedKey(key);
int dataSize = minValueSize; int dataSize = minValueSize;
if(minValueSize != maxValueSize){ if(minValueSize != maxValueSize) {
dataSize = minValueSize + randomForValueSize.nextInt(Math.abs(maxValueSize - minValueSize)); dataSize = minValueSize + randomForValueSize.nextInt(Math.abs(maxValueSize - minValueSize));
} }
return getValueForRowColumn(rowKey, qual, dataSize); return getValueForRowColumn(dataSize, seedStrings);
} }
/** /**
* Generates random bytes of the given size for the given row and column * Generates random bytes of the given size for the given row and column
* qualifier. The random seed is fully determined by these parameters. * qualifier. The random seed is fully determined by these parameters.
*/ */
private static byte[] getValueForRowColumn(String rowKey, String qual, private static byte[] getValueForRowColumn(int dataSize, byte[]... seedStrings) {
int dataSize) { long seed = dataSize;
Random seededRandom = new Random(rowKey.hashCode() + qual.hashCode() + for (byte[] str : seedStrings) {
dataSize); seed += Bytes.toString(str).hashCode();
}
Random seededRandom = new Random(seed);
byte[] randomBytes = new byte[dataSize]; byte[] randomBytes = new byte[dataSize];
seededRandom.nextBytes(randomBytes); seededRandom.nextBytes(randomBytes);
return randomBytes; return randomBytes;

View File

@ -41,8 +41,8 @@ public class TestLoadTestKVGenerator {
@Test @Test
public void testValueLength() { public void testValueLength() {
for (int i = 0; i < 1000; ++i) { for (int i = 0; i < 1000; ++i) {
byte[] v = gen.generateRandomSizeValue(i, byte[] v = gen.generateRandomSizeValue(Integer.toString(i).getBytes(),
String.valueOf(rand.nextInt())); String.valueOf(rand.nextInt()).getBytes());
assertTrue(MIN_LEN <= v.length); assertTrue(MIN_LEN <= v.length);
assertTrue(v.length <= MAX_LEN); assertTrue(v.length <= MAX_LEN);
} }
@ -52,12 +52,12 @@ public class TestLoadTestKVGenerator {
public void testVerification() { public void testVerification() {
for (int i = 0; i < 1000; ++i) { for (int i = 0; i < 1000; ++i) {
for (int qualIndex = 0; qualIndex < 20; ++qualIndex) { for (int qualIndex = 0; qualIndex < 20; ++qualIndex) {
String qual = String.valueOf(qualIndex); byte[] qual = String.valueOf(qualIndex).getBytes();
byte[] v = gen.generateRandomSizeValue(i, qual); byte[] rowKey = LoadTestKVGenerator.md5PrefixedKey(i).getBytes();
String rowKey = LoadTestKVGenerator.md5PrefixedKey(i); byte[] v = gen.generateRandomSizeValue(rowKey, qual);
assertTrue(LoadTestKVGenerator.verify(rowKey, qual, v)); assertTrue(LoadTestKVGenerator.verify(v, rowKey, qual));
v[0]++; v[0]++;
assertFalse(LoadTestKVGenerator.verify(rowKey, qual, v)); assertFalse(LoadTestKVGenerator.verify(v, rowKey, qual));
} }
} }
} }

View File

@ -0,0 +1,286 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase;
import java.io.IOException;
import java.security.InvalidParameterException;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.atomic.AtomicLong;
import junit.framework.Assert;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.CompareFilter;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.LoadTestTool;
import org.apache.hadoop.hbase.util.MultiThreadedWriter;
import org.apache.hadoop.hbase.util.RegionSplitter;
import org.apache.hadoop.hbase.util.test.LoadTestDataGenerator;
import org.apache.hadoop.hbase.util.test.LoadTestKVGenerator;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.experimental.categories.Category;
/**
* Integration test that verifies lazy CF loading during scans by doing repeated scans
* with this feature while multiple threads are continuously writing values; and
* verifying the result.
*/
@Category(IntegrationTests.class)
public class IntegrationTestLazyCfLoading {
private static final String TABLE_NAME = IntegrationTestLazyCfLoading.class.getSimpleName();
private static final String TIMEOUT_KEY = "hbase.%s.timeout";
/** A soft test timeout; duration of the test, as such, depends on number of keys to put. */
private static final int DEFAULT_TIMEOUT_MINUTES = 10;
private static final int NUM_SERVERS = 1;
/** Set regions per server low to ensure splits happen during test */
private static final int REGIONS_PER_SERVER = 3;
private static final int KEYS_TO_WRITE_PER_SERVER = 20000;
private static final int WRITER_THREADS = 10;
private static final int WAIT_BETWEEN_SCANS_MS = 1000;
private static final Log LOG = LogFactory.getLog(IntegrationTestLazyCfLoading.class);
private IntegrationTestingUtility util = new IntegrationTestingUtility();
private final DataGenerator dataGen = new DataGenerator();
/** Custom LoadTestDataGenerator. Uses key generation and verification from
* LoadTestKVGenerator. Creates 3 column families; one with an integer column to
* filter on, the 2nd one with an integer column that matches the first integer column (for
* test-specific verification), and byte[] value that is used for general verification; and
* the third one with just the value.
*/
private static class DataGenerator extends LoadTestDataGenerator {
private static final int MIN_DATA_SIZE = 4096;
private static final int MAX_DATA_SIZE = 65536;
public static final byte[] ESSENTIAL_CF = Bytes.toBytes("essential");
public static final byte[] JOINED_CF1 = Bytes.toBytes("joined");
public static final byte[] JOINED_CF2 = Bytes.toBytes("joined2");
public static final byte[] FILTER_COLUMN = Bytes.toBytes("filter");
public static final byte[] VALUE_COLUMN = Bytes.toBytes("val");
public static final long ACCEPTED_VALUE = 1L;
private static final Map<byte[], byte[][]> columnMap = new TreeMap<byte[], byte[][]>(
Bytes.BYTES_COMPARATOR);
private final AtomicLong expectedNumberOfKeys = new AtomicLong(0);
private final AtomicLong totalNumberOfKeys = new AtomicLong(0);
public DataGenerator() {
super(MIN_DATA_SIZE, MAX_DATA_SIZE);
columnMap.put(ESSENTIAL_CF, new byte[][] { FILTER_COLUMN });
columnMap.put(JOINED_CF1, new byte[][] { FILTER_COLUMN, VALUE_COLUMN });
columnMap.put(JOINED_CF2, new byte[][] { VALUE_COLUMN });
}
public long getExpectedNumberOfKeys() {
return expectedNumberOfKeys.get();
}
public long getTotalNumberOfKeys() {
return totalNumberOfKeys.get();
}
@Override
public byte[] getDeterministicUniqueKey(long keyBase) {
return LoadTestKVGenerator.md5PrefixedKey(keyBase).getBytes();
}
@Override
public byte[][] getColumnFamilies() {
return columnMap.keySet().toArray(new byte[columnMap.size()][]);
}
@Override
public byte[][] generateColumnsForCf(byte[] rowKey, byte[] cf) {
return columnMap.get(cf);
}
@Override
public byte[] generateValue(byte[] rowKey, byte[] cf, byte[] column) {
if (Bytes.BYTES_COMPARATOR.compare(column, FILTER_COLUMN) == 0) {
// Random deterministic way to make some values "on" and others "off" for filters.
long value = Long.parseLong(Bytes.toString(rowKey, 0, 4), 16) & ACCEPTED_VALUE;
if (Bytes.BYTES_COMPARATOR.compare(cf, ESSENTIAL_CF) == 0) {
totalNumberOfKeys.incrementAndGet();
if (value == ACCEPTED_VALUE) {
expectedNumberOfKeys.incrementAndGet();
}
}
return Bytes.toBytes(value);
} else if (Bytes.BYTES_COMPARATOR.compare(column, VALUE_COLUMN) == 0) {
return kvGenerator.generateRandomSizeValue(rowKey, cf, column);
}
String error = "Unknown column " + Bytes.toString(column);
assert false : error;
throw new InvalidParameterException(error);
}
@Override
public boolean verify(byte[] rowKey, byte[] cf, byte[] column, byte[] value) {
if (Bytes.BYTES_COMPARATOR.compare(column, FILTER_COLUMN) == 0) {
// Relies on the filter from getScanFilter being used.
return Bytes.toLong(value) == ACCEPTED_VALUE;
} else if (Bytes.BYTES_COMPARATOR.compare(column, VALUE_COLUMN) == 0) {
return LoadTestKVGenerator.verify(value, rowKey, cf, column);
}
return false; // some bogus value from read, we don't expect any such thing.
}
@Override
public boolean verify(byte[] rowKey, byte[] cf, Set<byte[]> columnSet) {
return columnMap.get(cf).length == columnSet.size();
}
public Filter getScanFilter() {
SingleColumnValueFilter scf = new SingleColumnValueFilter(ESSENTIAL_CF, FILTER_COLUMN,
CompareFilter.CompareOp.EQUAL, Bytes.toBytes(ACCEPTED_VALUE));
scf.setFilterIfMissing(true);
return scf;
}
};
@Before
public void setUp() throws Exception {
LOG.info("Initializing cluster with " + NUM_SERVERS + " servers");
util.initializeCluster(NUM_SERVERS);
LOG.info("Done initializing cluster");
createTable();
}
private void createTable() throws Exception {
deleteTable();
LOG.info("Creating table");
HTableDescriptor htd = new HTableDescriptor(Bytes.toBytes(TABLE_NAME));
for (byte[] cf : dataGen.getColumnFamilies()) {
htd.addFamily(new HColumnDescriptor(cf));
}
int serverCount = util.getHBaseClusterInterface().getClusterStatus().getServersSize();
byte[][] splits = new RegionSplitter.HexStringSplit().split(serverCount * REGIONS_PER_SERVER);
util.getHBaseAdmin().createTable(htd, splits);
LOG.info("Created table");
}
private void deleteTable() throws Exception {
if (util.getHBaseAdmin().tableExists(TABLE_NAME)) {
LOG.info("Deleting table");
if (!util.getHBaseAdmin().isTableDisabled(TABLE_NAME)) {
util.getHBaseAdmin().disableTable(TABLE_NAME);
}
util.getHBaseAdmin().deleteTable(TABLE_NAME);
LOG.info("Deleted table");
}
}
@After
public void tearDown() throws Exception {
deleteTable();
LOG.info("Restoring the cluster");
util.restoreCluster();
LOG.info("Done restoring the cluster");
}
@Test
public void testReadersAndWriters() throws Exception {
Configuration conf = util.getConfiguration();
String timeoutKey = String.format(TIMEOUT_KEY, this.getClass().getSimpleName());
long maxRuntime = conf.getLong(timeoutKey, DEFAULT_TIMEOUT_MINUTES);
long serverCount = util.getHBaseClusterInterface().getClusterStatus().getServersSize();
long keysToWrite = serverCount * KEYS_TO_WRITE_PER_SERVER;
HTable table = new HTable(conf, Bytes.toBytes(TABLE_NAME));
// Create multi-threaded writer and start it. We write multiple columns/CFs and verify
// their integrity, therefore multi-put is necessary.
MultiThreadedWriter writer =
new MultiThreadedWriter(dataGen, conf, Bytes.toBytes(TABLE_NAME));
writer.setMultiPut(true);
LOG.info("Starting writer; the number of keys to write is " + keysToWrite);
writer.start(1, keysToWrite, WRITER_THREADS);
// Now, do scans.
long now = EnvironmentEdgeManager.currentTimeMillis();
long timeLimit = now + (maxRuntime * 60000);
boolean isWriterDone = false;
while (now < timeLimit && !isWriterDone) {
LOG.info("Starting the scan; wrote approximately "
+ dataGen.getTotalNumberOfKeys() + " keys");
isWriterDone = writer.isDone();
if (isWriterDone) {
LOG.info("Scanning full result, writer is done");
}
Scan scan = new Scan();
for (byte[] cf : dataGen.getColumnFamilies()) {
scan.addFamily(cf);
}
scan.setFilter(dataGen.getScanFilter());
scan.setLoadColumnFamiliesOnDemand(true);
// The number of keys we can expect from scan - lower bound (before scan).
// Not a strict lower bound - writer knows nothing about filters, so we report
// this from generator. Writer might have generated the value but not put it yet.
long onesGennedBeforeScan = dataGen.getExpectedNumberOfKeys();
long startTs = EnvironmentEdgeManager.currentTimeMillis();
ResultScanner results = table.getScanner(scan);
long resultCount = 0;
Result result = null;
// Verify and count the results.
while ((result = results.next()) != null) {
boolean isOk = writer.verifyResultAgainstDataGenerator(result, true, true);
Assert.assertTrue("Failed to verify [" + Bytes.toString(result.getRow())+ "]", isOk);
++resultCount;
}
long timeTaken = EnvironmentEdgeManager.currentTimeMillis() - startTs;
// Verify the result count.
long onesGennedAfterScan = dataGen.getExpectedNumberOfKeys();
Assert.assertTrue("Read " + resultCount + " keys when at most " + onesGennedAfterScan
+ " were generated ", onesGennedAfterScan >= resultCount);
if (isWriterDone) {
Assert.assertTrue("Read " + resultCount + " keys; the writer is done and "
+ onesGennedAfterScan + " keys were generated", onesGennedAfterScan == resultCount);
} else if (onesGennedBeforeScan * 0.9 > resultCount) {
LOG.warn("Read way too few keys (" + resultCount + "/" + onesGennedBeforeScan
+ ") - there might be a problem, or the writer might just be slow");
}
LOG.info("Scan took " + timeTaken + "ms");
if (!isWriterDone) {
Thread.sleep(WAIT_BETWEEN_SCANS_MS);
now = EnvironmentEdgeManager.currentTimeMillis();
}
}
Assert.assertEquals("There are write failures", 0, writer.getNumWriteFailures());
Assert.assertTrue("Writer is not done", isWriterDone);
// Assert.fail("Boom!");
}
}

View File

@ -129,13 +129,12 @@ public class TestEncodedSeekers {
private void doPuts(HRegion region) throws IOException{ private void doPuts(HRegion region) throws IOException{
LoadTestKVGenerator dataGenerator = new LoadTestKVGenerator(MIN_VALUE_SIZE, MAX_VALUE_SIZE); LoadTestKVGenerator dataGenerator = new LoadTestKVGenerator(MIN_VALUE_SIZE, MAX_VALUE_SIZE);
for (int i = 0; i < NUM_ROWS; ++i) { for (int i = 0; i < NUM_ROWS; ++i) {
byte[] key = MultiThreadedWriter.longToByteArrayKey(i); byte[] key = LoadTestKVGenerator.md5PrefixedKey(i).getBytes();
for (int j = 0; j < NUM_COLS_PER_ROW; ++j) { for (int j = 0; j < NUM_COLS_PER_ROW; ++j) {
Put put = new Put(key); Put put = new Put(key);
String colAsStr = String.valueOf(j); byte[] col = Bytes.toBytes(String.valueOf(j));
byte[] col = Bytes.toBytes(colAsStr); byte[] value = dataGenerator.generateRandomSizeValue(key, col);
byte[] value = dataGenerator.generateRandomSizeValue(i, colAsStr); put.add(CF_BYTES, col, value);
put.add(CF_BYTES, Bytes.toBytes(colAsStr), value);
if(VERBOSE){ if(VERBOSE){
KeyValue kvPut = new KeyValue(key, CF_BYTES, col, value); KeyValue kvPut = new KeyValue(key, CF_BYTES, col, value);
System.err.println(Strings.padFront(i+"", ' ', 4)+" "+kvPut); System.err.println(Strings.padFront(i+"", ' ', 4)+" "+kvPut);
@ -151,7 +150,7 @@ public class TestEncodedSeekers {
private void doGets(HRegion region) throws IOException{ private void doGets(HRegion region) throws IOException{
for (int i = 0; i < NUM_ROWS; ++i) { for (int i = 0; i < NUM_ROWS; ++i) {
final byte[] rowKey = MultiThreadedWriter.longToByteArrayKey(i); final byte[] rowKey = LoadTestKVGenerator.md5PrefixedKey(i).getBytes();
for (int j = 0; j < NUM_COLS_PER_ROW; ++j) { for (int j = 0; j < NUM_COLS_PER_ROW; ++j) {
final String qualStr = String.valueOf(j); final String qualStr = String.valueOf(j);
if (VERBOSE) { if (VERBOSE) {
@ -163,8 +162,8 @@ public class TestEncodedSeekers {
get.addColumn(CF_BYTES, qualBytes); get.addColumn(CF_BYTES, qualBytes);
Result result = region.get(get, null); Result result = region.get(get, null);
assertEquals(1, result.size()); assertEquals(1, result.size());
assertTrue(LoadTestKVGenerator.verify(Bytes.toString(rowKey), qualStr, byte[] value = result.getValue(CF_BYTES, qualBytes);
result.getValue(CF_BYTES, qualBytes))); assertTrue(LoadTestKVGenerator.verify(value, rowKey, qualBytes));
} }
} }
} }

View File

@ -32,6 +32,8 @@ import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.io.compress.Compression; import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.regionserver.BloomType; import org.apache.hadoop.hbase.regionserver.BloomType;
import org.apache.hadoop.hbase.regionserver.StoreFile;
import org.apache.hadoop.hbase.util.test.LoadTestDataGenerator;
/** /**
* A command-line utility that reads, writes, and verifies data. Unlike * A command-line utility that reads, writes, and verifies data. Unlike
@ -119,7 +121,7 @@ public class LoadTestTool extends AbstractHBaseTool {
// Writer options // Writer options
private int numWriterThreads = DEFAULT_NUM_THREADS; private int numWriterThreads = DEFAULT_NUM_THREADS;
private long minColsPerKey, maxColsPerKey; private int minColsPerKey, maxColsPerKey;
private int minColDataSize, maxColDataSize; private int minColDataSize, maxColDataSize;
private boolean isMultiPut; private boolean isMultiPut;
@ -260,7 +262,7 @@ public class LoadTestTool extends AbstractHBaseTool {
int colIndex = 0; int colIndex = 0;
minColsPerKey = 1; minColsPerKey = 1;
maxColsPerKey = 2 * Long.parseLong(writeOpts[colIndex++]); maxColsPerKey = 2 * Integer.parseInt(writeOpts[colIndex++]);
int avgColDataSize = int avgColDataSize =
parseInt(writeOpts[colIndex++], 1, Integer.MAX_VALUE); parseInt(writeOpts[colIndex++], 1, Integer.MAX_VALUE);
minColDataSize = avgColDataSize / 2; minColDataSize = avgColDataSize / 2;
@ -342,16 +344,16 @@ public class LoadTestTool extends AbstractHBaseTool {
initTestTable(); initTestTable();
} }
LoadTestDataGenerator dataGen = new MultiThreadedAction.DefaultDataGenerator(
minColDataSize, maxColDataSize, minColsPerKey, maxColsPerKey, COLUMN_FAMILY);
if (isWrite) { if (isWrite) {
writerThreads = new MultiThreadedWriter(conf, tableName, COLUMN_FAMILY); writerThreads = new MultiThreadedWriter(dataGen, conf, tableName);
writerThreads.setMultiPut(isMultiPut); writerThreads.setMultiPut(isMultiPut);
writerThreads.setColumnsPerKey(minColsPerKey, maxColsPerKey);
writerThreads.setDataSize(minColDataSize, maxColDataSize);
} }
if (isRead) { if (isRead) {
readerThreads = new MultiThreadedReader(conf, tableName, COLUMN_FAMILY, readerThreads = new MultiThreadedReader(dataGen, conf, tableName, verifyPercent);
verifyPercent);
readerThreads.setMaxErrors(maxReadErrors); readerThreads.setMaxErrors(maxReadErrors);
readerThreads.setKeyWindow(keyWindow); readerThreads.setKeyWindow(keyWindow);
} }

View File

@ -18,12 +18,19 @@ package org.apache.hadoop.hbase.util;
import java.io.IOException; import java.io.IOException;
import java.util.Collection; import java.util.Collection;
import java.util.Map;
import java.util.NavigableMap;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicLong;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.util.test.LoadTestDataGenerator;
import org.apache.hadoop.hbase.util.test.LoadTestKVGenerator;
import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.StringUtils;
/** /**
@ -34,7 +41,6 @@ public abstract class MultiThreadedAction {
private static final Log LOG = LogFactory.getLog(MultiThreadedAction.class); private static final Log LOG = LogFactory.getLog(MultiThreadedAction.class);
protected final byte[] tableName; protected final byte[] tableName;
protected final byte[] columnFamily;
protected final Configuration conf; protected final Configuration conf;
protected int numThreads = 1; protected int numThreads = 1;
@ -51,8 +57,69 @@ public abstract class MultiThreadedAction {
protected AtomicLong totalOpTimeMs = new AtomicLong(); protected AtomicLong totalOpTimeMs = new AtomicLong();
protected boolean verbose = false; protected boolean verbose = false;
protected int minDataSize = 256; protected LoadTestDataGenerator dataGenerator = null;
protected int maxDataSize = 1024;
/**
* Default implementation of LoadTestDataGenerator that uses LoadTestKVGenerator, fixed
* set of column families, and random number of columns in range. The table for it can
* be created manually or, for example, via
* {@link HBaseTestingUtility#createPreSplitLoadTestTable(
* org.apache.hadoop.hbase.Configuration, byte[], byte[], Algorithm, DataBlockEncoding)}
*/
public static class DefaultDataGenerator extends LoadTestDataGenerator {
private byte[][] columnFamilies = null;
private int minColumnsPerKey;
private int maxColumnsPerKey;
private final Random random = new Random();
public DefaultDataGenerator(int minValueSize, int maxValueSize,
int minColumnsPerKey, int maxColumnsPerKey, byte[]... columnFamilies) {
super(minValueSize, maxValueSize);
this.columnFamilies = columnFamilies;
this.minColumnsPerKey = minColumnsPerKey;
this.maxColumnsPerKey = maxColumnsPerKey;
}
public DefaultDataGenerator(byte[]... columnFamilies) {
// Default values for tests that didn't care to provide theirs.
this(256, 1024, 1, 10, columnFamilies);
}
@Override
public byte[] getDeterministicUniqueKey(long keyBase) {
return LoadTestKVGenerator.md5PrefixedKey(keyBase).getBytes();
}
@Override
public byte[][] getColumnFamilies() {
return columnFamilies;
}
@Override
public byte[][] generateColumnsForCf(byte[] rowKey, byte[] cf) {
int numColumns = minColumnsPerKey + random.nextInt(maxColumnsPerKey - minColumnsPerKey + 1);
byte[][] columns = new byte[numColumns][];
for (int i = 0; i < numColumns; ++i) {
columns[i] = Integer.toString(i).getBytes();
}
return columns;
}
@Override
public byte[] generateValue(byte[] rowKey, byte[] cf, byte[] column) {
return kvGenerator.generateRandomSizeValue(rowKey, cf, column);
}
@Override
public boolean verify(byte[] rowKey, byte[] cf, byte[] column, byte[] value) {
return LoadTestKVGenerator.verify(value, rowKey, cf, column);
}
@Override
public boolean verify(byte[] rowKey, byte[] cf, Set<byte[]> columnSet) {
return (columnSet.size() >= minColumnsPerKey) && (columnSet.size() <= maxColumnsPerKey);
}
}
/** "R" or "W" */ /** "R" or "W" */
private String actionLetter; private String actionLetter;
@ -62,11 +129,11 @@ public abstract class MultiThreadedAction {
public static final int REPORTING_INTERVAL_MS = 5000; public static final int REPORTING_INTERVAL_MS = 5000;
public MultiThreadedAction(Configuration conf, byte[] tableName, public MultiThreadedAction(LoadTestDataGenerator dataGen, Configuration conf, byte[] tableName,
byte[] columnFamily, String actionLetter) { String actionLetter) {
this.conf = conf; this.conf = conf;
this.dataGenerator = dataGen;
this.tableName = tableName; this.tableName = tableName;
this.columnFamily = columnFamily;
this.actionLetter = actionLetter; this.actionLetter = actionLetter;
} }
@ -165,17 +232,16 @@ public abstract class MultiThreadedAction {
} }
} }
public void setDataSize(int minDataSize, int maxDataSize) {
this.minDataSize = minDataSize;
this.maxDataSize = maxDataSize;
}
public void waitForFinish() { public void waitForFinish() {
while (numThreadsWorking.get() != 0) { while (numThreadsWorking.get() != 0) {
Threads.sleepWithoutInterrupt(1000); Threads.sleepWithoutInterrupt(1000);
} }
} }
public boolean isDone() {
return (numThreadsWorking.get() == 0);
}
protected void startThreads(Collection<? extends Thread> threads) { protected void startThreads(Collection<? extends Thread> threads) {
numThreadsWorking.addAndGet(threads.size()); numThreadsWorking.addAndGet(threads.size());
for (Thread thread : threads) { for (Thread thread : threads) {
@ -202,4 +268,77 @@ public abstract class MultiThreadedAction {
sb.append(v); sb.append(v);
} }
/**
* See {@link #verifyResultAgainstDataGenerator(Result, boolean, boolean)}.
* Does not verify cf/column integrity.
*/
public boolean verifyResultAgainstDataGenerator(Result result, boolean verifyValues) {
return verifyResultAgainstDataGenerator(result, verifyValues, false);
}
/**
* Verifies the result from get or scan using the dataGenerator (that was presumably
* also used to generate said result).
* @param verifyValues verify that values in the result make sense for row/cf/column combination
* @param verifyCfAndColumnIntegrity verify that cf/column set in the result is complete. Note
* that to use this multiPut should be used, or verification
* has to happen after writes, otherwise there can be races.
* @return
*/
public boolean verifyResultAgainstDataGenerator(Result result, boolean verifyValues,
boolean verifyCfAndColumnIntegrity) {
String rowKeyStr = Bytes.toString(result.getRow());
// See if we have any data at all.
if (result.isEmpty()) {
LOG.error("No data returned for key = [" + rowKeyStr + "]");
return false;
}
if (!verifyValues && !verifyCfAndColumnIntegrity) {
return true; // as long as we have something, we are good.
}
// See if we have all the CFs.
byte[][] expectedCfs = dataGenerator.getColumnFamilies();
if (verifyCfAndColumnIntegrity && (expectedCfs.length != result.getMap().size())) {
LOG.error("Bad family count for [" + rowKeyStr + "]: " + result.getMap().size());
return false;
}
// Verify each column family from get in the result.
for (byte[] cf : result.getMap().keySet()) {
String cfStr = Bytes.toString(cf);
Map<byte[], byte[]> columnValues = result.getFamilyMap(cf);
if (columnValues == null) {
LOG.error("No data for family [" + cfStr + "] for [" + rowKeyStr + "]");
return false;
}
// See if we have correct columns.
if (verifyCfAndColumnIntegrity
&& !dataGenerator.verify(result.getRow(), cf, columnValues.keySet())) {
String colsStr = "";
for (byte[] col : columnValues.keySet()) {
if (colsStr.length() > 0) {
colsStr += ", ";
}
colsStr += "[" + Bytes.toString(col) + "]";
}
LOG.error("Bad columns for family [" + cfStr + "] for [" + rowKeyStr + "]: " + colsStr);
return false;
}
// See if values check out.
if (verifyValues) {
for (Map.Entry<byte[], byte[]> kv : columnValues.entrySet()) {
if (!dataGenerator.verify(result.getRow(), cf, kv.getKey(), kv.getValue())) {
LOG.error("Error checking data for key [" + rowKeyStr + "], column family ["
+ cfStr + "], column [" + Bytes.toString(kv.getKey()) + "]; value of length " +
+ kv.getValue().length);
return false;
}
}
}
}
return true;
}
} }

View File

@ -19,6 +19,7 @@ package org.apache.hadoop.hbase.util;
import java.io.IOException; import java.io.IOException;
import java.util.HashSet; import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Map;
import java.util.Random; import java.util.Random;
import java.util.Set; import java.util.Set;
import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicLong;
@ -31,6 +32,7 @@ import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.util.test.LoadTestDataGenerator;
import org.apache.hadoop.hbase.util.test.LoadTestKVGenerator; import org.apache.hadoop.hbase.util.test.LoadTestKVGenerator;
/** Creates multiple threads that read and verify previously written data */ /** Creates multiple threads that read and verify previously written data */
@ -72,9 +74,9 @@ public class MultiThreadedReader extends MultiThreadedAction
private int maxErrors = DEFAULT_MAX_ERRORS; private int maxErrors = DEFAULT_MAX_ERRORS;
private int keyWindow = DEFAULT_KEY_WINDOW; private int keyWindow = DEFAULT_KEY_WINDOW;
public MultiThreadedReader(Configuration conf, byte[] tableName, public MultiThreadedReader(LoadTestDataGenerator dataGen, Configuration conf,
byte[] columnFamily, double verifyPercent) { byte[] tableName, double verifyPercent) {
super(conf, tableName, columnFamily, "R"); super(dataGen, conf, tableName, "R");
this.verifyPercent = verifyPercent; this.verifyPercent = verifyPercent;
} }
@ -223,14 +225,22 @@ public class MultiThreadedReader extends MultiThreadedAction
} }
private Get readKey(long keyToRead) { private Get readKey(long keyToRead) {
Get get = new Get( Get get = new Get(dataGenerator.getDeterministicUniqueKey(keyToRead));
LoadTestKVGenerator.md5PrefixedKey(keyToRead).getBytes()); String cfsString = "";
get.addFamily(columnFamily); byte[][] columnFamilies = dataGenerator.getColumnFamilies();
for (byte[] cf : columnFamilies) {
get.addFamily(cf);
if (verbose) {
if (cfsString.length() > 0) {
cfsString += ", ";
}
cfsString += "[" + Bytes.toStringBinary(cf) + "]";
}
}
try { try {
if (verbose) { if (verbose) {
LOG.info("[" + readerId + "] " + "Querying key " + keyToRead LOG.info("[" + readerId + "] " + "Querying key " + keyToRead + ", cfs " + cfsString);
+ ", cf " + Bytes.toStringBinary(columnFamily));
} }
queryKey(get, random.nextInt(100) < verifyPercent); queryKey(get, random.nextInt(100) < verifyPercent);
} catch (IOException e) { } catch (IOException e) {
@ -250,47 +260,38 @@ public class MultiThreadedReader extends MultiThreadedAction
Result result = table.get(get); Result result = table.get(get);
totalOpTimeMs.addAndGet(System.currentTimeMillis() - start); totalOpTimeMs.addAndGet(System.currentTimeMillis() - start);
numKeys.addAndGet(1); numKeys.addAndGet(1);
if (!result.isEmpty()) {
// if we got no data report error if (verify) {
if (result.isEmpty()) { numKeysVerified.incrementAndGet();
}
} else {
HRegionLocation hloc = table.getRegionLocation( HRegionLocation hloc = table.getRegionLocation(
Bytes.toBytes(rowKey)); Bytes.toBytes(rowKey));
LOG.info("Key = " + rowKey + ", RegionServer: " LOG.info("Key = " + rowKey + ", RegionServer: "
+ hloc.getHostname()); + hloc.getHostname());
numReadErrors.addAndGet(1);
LOG.error("No data returned, tried to get actions for key = "
+ rowKey + (writer == null ? "" : ", keys inserted by writer: " +
writer.numKeys.get() + ")"));
if (numReadErrors.get() > maxErrors) {
LOG.error("Aborting readers -- found more than " + maxErrors
+ " errors\n");
aborted = true;
}
} }
if (result.getFamilyMap(columnFamily) != null) { boolean isOk = verifyResultAgainstDataGenerator(result, verify);
// increment number of columns read long numErrorsAfterThis = 0;
numCols.addAndGet(result.getFamilyMap(columnFamily).size()); if (isOk) {
long cols = 0;
if (verify) { // Count the columns for reporting purposes.
// verify the result for (byte[] cf : result.getMap().keySet()) {
List<KeyValue> keyValues = result.list(); cols += result.getFamilyMap(cf).size();
for (KeyValue kv : keyValues) {
String qual = new String(kv.getQualifier());
// if something does not look right report it
if (!LoadTestKVGenerator.verify(rowKey, qual, kv.getValue())) {
numReadErrors.addAndGet(1);
LOG.error("Error checking data for key = " + rowKey
+ ", actionId = " + qual);
}
}
numKeysVerified.addAndGet(1);
} }
numCols.addAndGet(cols);
} else {
if (writer != null) {
LOG.error("At the time of failure, writer inserted " + writer.numKeys.get() + " keys");
}
numErrorsAfterThis = numReadErrors.incrementAndGet();
}
if (numErrorsAfterThis > maxErrors) {
LOG.error("Aborting readers -- found more than " + maxErrors + " errors");
aborted = true;
} }
} }
} }
public long getNumReadFailures() { public long getNumReadFailures() {

View File

@ -18,9 +18,9 @@ package org.apache.hadoop.hbase.util;
import java.io.IOException; import java.io.IOException;
import java.util.HashSet; import java.util.HashSet;
import java.util.Map;
import java.util.PriorityQueue; import java.util.PriorityQueue;
import java.util.Queue; import java.util.Queue;
import java.util.Random;
import java.util.Set; import java.util.Set;
import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue; import java.util.concurrent.BlockingQueue;
@ -33,14 +33,13 @@ import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.util.test.LoadTestKVGenerator; import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.util.test.LoadTestDataGenerator;
/** Creates multiple threads that write key/values into the */ /** Creates multiple threads that write key/values into the */
public class MultiThreadedWriter extends MultiThreadedAction { public class MultiThreadedWriter extends MultiThreadedAction {
private static final Log LOG = LogFactory.getLog(MultiThreadedWriter.class); private static final Log LOG = LogFactory.getLog(MultiThreadedWriter.class);
private long minColumnsPerKey = 1;
private long maxColumnsPerKey = 10;
private Set<HBaseWriterThread> writers = new HashSet<HBaseWriterThread>(); private Set<HBaseWriterThread> writers = new HashSet<HBaseWriterThread>();
private boolean isMultiPut = false; private boolean isMultiPut = false;
@ -51,8 +50,7 @@ public class MultiThreadedWriter extends MultiThreadedAction {
* {@link #insertedUpToKey}, the maximum key in the contiguous range of keys * {@link #insertedUpToKey}, the maximum key in the contiguous range of keys
* being inserted. This queue is supposed to stay small. * being inserted. This queue is supposed to stay small.
*/ */
private BlockingQueue<Long> insertedKeys = private BlockingQueue<Long> insertedKeys = new ArrayBlockingQueue<Long>(10000);
new ArrayBlockingQueue<Long>(10000);
/** /**
* This is the current key to be inserted by any thread. Each thread does an * This is the current key to be inserted by any thread. Each thread does an
@ -78,9 +76,9 @@ public class MultiThreadedWriter extends MultiThreadedAction {
/** Enable this if used in conjunction with a concurrent reader. */ /** Enable this if used in conjunction with a concurrent reader. */
private boolean trackInsertedKeys; private boolean trackInsertedKeys;
public MultiThreadedWriter(Configuration conf, byte[] tableName, public MultiThreadedWriter(LoadTestDataGenerator dataGen, Configuration conf,
byte[] columnFamily) { byte[] tableName) {
super(conf, tableName, columnFamily, "W"); super(dataGen, conf, tableName, "W");
} }
/** Use multi-puts vs. separate puts for every column in a row */ /** Use multi-puts vs. separate puts for every column in a row */
@ -88,11 +86,6 @@ public class MultiThreadedWriter extends MultiThreadedAction {
this.isMultiPut = isMultiPut; this.isMultiPut = isMultiPut;
} }
public void setColumnsPerKey(long minColumnsPerKey, long maxColumnsPerKey) {
this.minColumnsPerKey = minColumnsPerKey;
this.maxColumnsPerKey = maxColumnsPerKey;
}
@Override @Override
public void start(long startKey, long endKey, int numThreads) public void start(long startKey, long endKey, int numThreads)
throws IOException { throws IOException {
@ -118,17 +111,9 @@ public class MultiThreadedWriter extends MultiThreadedAction {
startThreads(writers); startThreads(writers);
} }
public static byte[] longToByteArrayKey(long rowKey) {
return LoadTestKVGenerator.md5PrefixedKey(rowKey).getBytes();
}
private class HBaseWriterThread extends Thread { private class HBaseWriterThread extends Thread {
private final HTable table; private final HTable table;
private final Random random = new Random();
private final LoadTestKVGenerator dataGenerator = new LoadTestKVGenerator(
minDataSize, maxDataSize);
public HBaseWriterThread(int writerId) throws IOException { public HBaseWriterThread(int writerId) throws IOException {
setName(getClass().getSimpleName() + "_" + writerId); setName(getClass().getSimpleName() + "_" + writerId);
table = new HTable(conf, tableName); table = new HTable(conf, tableName);
@ -136,20 +121,36 @@ public class MultiThreadedWriter extends MultiThreadedAction {
public void run() { public void run() {
try { try {
long rowKey; long rowKeyBase;
while ((rowKey = nextKeyToInsert.getAndIncrement()) < endKey) { byte[][] columnFamilies = dataGenerator.getColumnFamilies();
long numColumns = minColumnsPerKey + Math.abs(random.nextLong()) while ((rowKeyBase = nextKeyToInsert.getAndIncrement()) < endKey) {
% (maxColumnsPerKey - minColumnsPerKey); byte[] rowKey = dataGenerator.getDeterministicUniqueKey(rowKeyBase);
Put put = new Put(rowKey);
numKeys.addAndGet(1); numKeys.addAndGet(1);
if (isMultiPut) { int columnCount = 0;
multiPutInsertKey(rowKey, 0, numColumns); for (byte[] cf : columnFamilies) {
} else { String s;
for (long col = 0; col < numColumns; ++col) { byte[][] columns = dataGenerator.generateColumnsForCf(rowKey, cf);
insert(rowKey, col); for (byte[] column : columns) {
byte[] value = dataGenerator.generateValue(rowKey, cf, column);
put.add(cf, column, value);
++columnCount;
if (!isMultiPut) {
insert(put, rowKeyBase);
numCols.addAndGet(1);
put = new Put(rowKey);
}
} }
} }
if (isMultiPut) {
if (verbose) {
LOG.debug("Preparing put for key = [" + rowKey + "], " + columnCount + " columns");
}
insert(put, rowKeyBase);
numCols.addAndGet(columnCount);
}
if (trackInsertedKeys) { if (trackInsertedKeys) {
insertedKeys.add(rowKey); insertedKeys.add(rowKeyBase);
} }
} }
} finally { } finally {
@ -162,52 +163,14 @@ public class MultiThreadedWriter extends MultiThreadedAction {
} }
} }
public void insert(long rowKey, long col) { public void insert(Put put, long keyBase) {
Put put = new Put(longToByteArrayKey(rowKey));
String colAsStr = String.valueOf(col);
put.add(columnFamily, Bytes.toBytes(colAsStr),
dataGenerator.generateRandomSizeValue(rowKey, colAsStr));
try { try {
long start = System.currentTimeMillis(); long start = System.currentTimeMillis();
table.put(put); table.put(put);
numCols.addAndGet(1);
totalOpTimeMs.addAndGet(System.currentTimeMillis() - start); totalOpTimeMs.addAndGet(System.currentTimeMillis() - start);
} catch (IOException e) { } catch (IOException e) {
failedKeySet.add(rowKey); failedKeySet.add(keyBase);
LOG.error("Failed to insert: " + rowKey); LOG.error("Failed to insert: " + keyBase);
e.printStackTrace();
}
}
public void multiPutInsertKey(long rowKey, long startCol, long endCol) {
if (verbose) {
LOG.debug("Preparing put for key = " + rowKey + ", cols = ["
+ startCol + ", " + endCol + ")");
}
if (startCol >= endCol) {
return;
}
Put put = new Put(LoadTestKVGenerator.md5PrefixedKey(
rowKey).getBytes());
byte[] columnQualifier;
byte[] value;
for (long i = startCol; i < endCol; ++i) {
String qualStr = String.valueOf(i);
columnQualifier = qualStr.getBytes();
value = dataGenerator.generateRandomSizeValue(rowKey, qualStr);
put.add(columnFamily, columnQualifier, value);
}
try {
long start = System.currentTimeMillis();
table.put(put);
numCols.addAndGet(endCol - startCol);
totalOpTimeMs.addAndGet(
System.currentTimeMillis() - start);
} catch (IOException e) {
failedKeySet.add(rowKey);
e.printStackTrace(); e.printStackTrace();
} }
} }
@ -302,8 +265,7 @@ public class MultiThreadedWriter extends MultiThreadedAction {
* key, which requires a blocking queue and a consumer thread. * key, which requires a blocking queue and a consumer thread.
* @param enable whether to enable tracking the last inserted key * @param enable whether to enable tracking the last inserted key
*/ */
void setTrackInsertedKeys(boolean enable) { public void setTrackInsertedKeys(boolean enable) {
trackInsertedKeys = enable; trackInsertedKeys = enable;
} }
} }

View File

@ -29,6 +29,7 @@ import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.compress.Compression; import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.util.test.LoadTestDataGenerator;
/** /**
* A command-line tool that spins up a local process-based cluster, loads * A command-line tool that spins up a local process-based cluster, loads
@ -59,8 +60,8 @@ public class RestartMetaTest extends AbstractHBaseTool {
private void loadData() throws IOException { private void loadData() throws IOException {
long startKey = 0; long startKey = 0;
long endKey = 100000; long endKey = 100000;
long minColsPerKey = 5; int minColsPerKey = 5;
long maxColsPerKey = 15; int maxColsPerKey = 15;
int minColDataSize = 256; int minColDataSize = 256;
int maxColDataSize = 256 * 3; int maxColDataSize = 256 * 3;
int numThreads = 10; int numThreads = 10;
@ -74,11 +75,10 @@ public class RestartMetaTest extends AbstractHBaseTool {
System.out.printf("Client Threads: %d\n", numThreads); System.out.printf("Client Threads: %d\n", numThreads);
// start the writers // start the writers
MultiThreadedWriter writer = new MultiThreadedWriter(conf, TABLE_NAME, LoadTestDataGenerator dataGen = new MultiThreadedAction.DefaultDataGenerator(
LoadTestTool.COLUMN_FAMILY); minColDataSize, maxColDataSize, minColsPerKey, maxColsPerKey, LoadTestTool.COLUMN_FAMILY);
MultiThreadedWriter writer = new MultiThreadedWriter(dataGen, conf, TABLE_NAME);
writer.setMultiPut(true); writer.setMultiPut(true);
writer.setColumnsPerKey(minColsPerKey, maxColsPerKey);
writer.setDataSize(minColDataSize, maxColDataSize);
writer.start(startKey, endKey, numThreads); writer.start(startKey, endKey, numThreads);
System.out.printf("Started loading data..."); System.out.printf("Started loading data...");
writer.waitForFinish(); writer.waitForFinish();

View File

@ -34,6 +34,7 @@ import org.apache.hadoop.hbase.TableNotFoundException;
import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.io.compress.Compression; import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.util.test.LoadTestDataGenerator;
import org.junit.After; import org.junit.After;
import org.junit.Before; import org.junit.Before;
import org.junit.Test; import org.junit.Test;
@ -139,9 +140,10 @@ public class TestMiniClusterLoadSequential {
TEST_UTIL.waitUntilAllRegionsAssigned(numRegions); TEST_UTIL.waitUntilAllRegionsAssigned(numRegions);
writerThreads = new MultiThreadedWriter(conf, TABLE, CF); LoadTestDataGenerator dataGen = new MultiThreadedAction.DefaultDataGenerator(CF);
writerThreads = new MultiThreadedWriter(dataGen, conf, TABLE);
writerThreads.setMultiPut(isMultiPut); writerThreads.setMultiPut(isMultiPut);
readerThreads = new MultiThreadedReader(conf, TABLE, CF, 100); readerThreads = new MultiThreadedReader(dataGen, conf, TABLE, 100);
} }
protected int numKeys() { protected int numKeys() {