HBASE-607 MultiRegionTable.makeMultiRegionTable is not deterministic enough for regression tests

M MultiRegionTable

  Make deterministic by creating the regions directly and not rely on
  the asychronous nature of cache flushes, compactions and splits. The
  regions are small, but the point of this class is to generate a
  table with multiple regions so we can test map / reduce, region
  onlining / offlining, etc.

  Removed PUNCTUATION from row keys. Not sure why it was there in the
  first place, other than perhaps to verify that a row key can have
  punctuation in it provided it is not the first character. This will
  become moot when row keys change from Text to byte[] anyways.

  Incorporate repeated code
{code}
    region.close();
    region.getLog().closeAndDelete();
{code}
  into private method closeRegionAndDeleteLog

M TestSplit

  extends HBaseClusterTestCase instead of MultiRegionTable. It didn't
  use the output of MultiRegionTable, so all that work was just wasted
  by this test.

M TestTableIndex, TestTableMapReduce

  The only two tests that currently use MultiRegionTable. Minor
  modifications needed because MultiRegionTable now handles starting
  and stopping of the mini-DFS cluster. With the new MultiRegionTable
  class, if these tests fail now it will be because something they are
  testing has regressed and not because MultiRegionTable failed.



git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@652587 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jim Kellerman 2008-05-01 17:37:24 +00:00
parent c70e4fa984
commit ab778e22e4
5 changed files with 83 additions and 490 deletions

View File

@ -28,6 +28,8 @@ Hbase Change Log
HBASE-608 HRegionServer::getThisIP() checks hadoop config var for dns interface name
(Jim R. Wilson via Stack)
HBASE-609 Master doesn't see regionserver edits because of clock skew
HBASE-607 MultiRegionTable.makeMultiRegionTable is not deterministic enough
for regression tests
IMPROVEMENTS
HBASE-559 MR example job to count table rows

View File

@ -20,349 +20,101 @@
package org.apache.hadoop.hbase;
import java.io.IOException;
import java.util.ConcurrentModificationException;
import java.util.Map;
import java.util.TreeMap;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.util.Writables;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.hbase.io.Cell;
import org.apache.hadoop.hbase.io.RowResult;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Scanner;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.regionserver.HRegionServer;
/**
* Utility class to build a table of multiple regions.
*/
public class MultiRegionTable extends HBaseClusterTestCase {
static final Log LOG = LogFactory.getLog(MultiRegionTable.class.getName());
private static final Text[] KEYS = {
null,
new Text("bbb"),
new Text("ccc"),
new Text("ddd"),
new Text("eee"),
new Text("fff"),
new Text("ggg"),
new Text("hhh"),
new Text("iii"),
new Text("jjj"),
new Text("kkk"),
new Text("lll"),
new Text("mmm"),
new Text("nnn"),
new Text("ooo"),
new Text("ppp"),
new Text("qqq"),
new Text("rrr"),
new Text("sss"),
new Text("ttt"),
new Text("uuu"),
new Text("vvv"),
new Text("www"),
new Text("xxx"),
new Text("yyy")
};
protected final String columnName;
protected HTableDescriptor desc;
/**
* Default constructor
* @param columnName the column to populate.
*/
public MultiRegionTable() {
public MultiRegionTable(final String columnName) {
super();
this.columnName = columnName;
// These are needed for the new and improved Map/Reduce framework
System.setProperty("hadoop.log.dir", conf.get("hadoop.log.dir"));
conf.set("mapred.output.dir", conf.get("hadoop.tmp.dir"));
}
/**
* Make a multi-region table. Presumption is that table already exists and
* that there is only one regionserver. Makes it multi-region by filling with
* data and provoking splits. Asserts parent region is cleaned up after its
* daughter splits release all references.
* @param conf
* @param cluster
* @param fs
* @param tableName
* @param columnName
* @throws IOException
* Run after dfs is ready but before hbase cluster is started up.
*/
@SuppressWarnings("null")
public static void makeMultiRegionTable(HBaseConfiguration conf,
MiniHBaseCluster cluster, FileSystem fs, String tableName,
String columnName) throws IOException {
final int retries = 10;
final long waitTime = 20L * 1000L;
// This size should make it so we always split using the addContent
// below. After adding all data, the first region is 1.3M. Should
// set max filesize to be <= 1M.
assertTrue(conf.getLong("hbase.hregion.max.filesize",
HConstants.DEFAULT_MAX_FILE_SIZE) <= 1024 * 1024);
assertNotNull(fs);
Path d = fs.makeQualified(new Path(conf.get(HConstants.HBASE_DIR)));
// Get connection on the meta table and get count of rows.
HTable meta = new HTable(conf, HConstants.META_TABLE_NAME);
int count = count(meta, tableName);
HTable t = new HTable(conf, new Text(tableName));
// Get the parent region here now.
HRegionInfo parent =
t.getRegionLocation(HConstants.EMPTY_START_ROW).getRegionInfo();
LOG.info("Parent region " + parent.toString());
Path parentDir = HRegion.getRegionDir(new Path(d, tableName),
parent.getEncodedName());
assertTrue(fs.exists(parentDir));
// Now add content.
addContent(new HTableIncommon(t), columnName);
LOG.info("Finished content loading");
// All is running in the one JVM so I should be able to get the single
// region instance and bring on a split. Presumption is that there is only
// one regionserver. Of not, the split may already have happened by the
// time we got here. If so, then the region found when we go searching
// with EMPTY_START_ROW will be one of the unsplittable daughters.
HRegionInfo hri = null;
HRegion r = null;
HRegionServer server = cluster.getRegionThreads().get(0).getRegionServer();
for (int i = 0; i < 30; i++) {
@Override
protected void preHBaseClusterSetup() throws Exception {
try {
hri = t.getRegionLocation(HConstants.EMPTY_START_ROW).getRegionInfo();
} catch (IOException e) {
e = RemoteExceptionHandler.checkIOException(e);
e.printStackTrace();
continue;
}
LOG.info("Region location: " + hri);
r = server.getOnlineRegions().get(hri.getRegionName());
if (r != null) {
break;
}
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
LOG.warn("Waiting on region to come online", e);
}
}
assertNotNull(r);
// Create a bunch of regions
// Flush the cache
server.getFlushRequester().request(r);
// Now, wait until split makes it into the meta table.
int oldCount = count;
for (int i = 0; i < retries; i++) {
count = count(meta, tableName);
if (count > oldCount) {
break;
}
try {
Thread.sleep(waitTime);
} catch (InterruptedException e) {
// continue
}
}
if (count <= oldCount) {
throw new IOException("Failed waiting on splits to show up");
HRegion[] regions = new HRegion[KEYS.length];
for (int i = 0; i < regions.length; i++) {
int j = (i + 1) % regions.length;
regions[i] = createARegion(KEYS[i], KEYS[j]);
}
// Get info on the parent from the meta table. Pass in 'hri'. Its the
// region we have been dealing with up to this. Its the parent of the
// region split.
RowResult data = getSplitParentInfo(meta, parent);
if (data == null) {
// We changed stuff so daughters get cleaned up much faster now. Can
// run so fast, parent has been deleted by time we get to here.
} else {
parent = Writables.getHRegionInfo(
data.get(HConstants.COL_REGIONINFO));
LOG.info("Found parent region: " + parent);
assertTrue(parent.isOffline());
assertTrue(parent.isSplit());
HRegionInfo splitA =
Writables.getHRegionInfo(data.get(HConstants.COL_SPLITA));
HRegionInfo splitB =
Writables.getHRegionInfo(data.get(HConstants.COL_SPLITB));
assertTrue("parentDir should exist", fs.exists(parentDir));
LOG.info("Split happened. Parent is " + parent.getRegionName());
// Now create the root and meta regions and insert the data regions
// created above into the meta
// Recalibrate will cause us to wait on new regions' deployment
recalibrate(t, new Text(columnName), retries, waitTime);
HRegion root = HRegion.createHRegion(HRegionInfo.rootRegionInfo,
testDir, this.conf);
HRegion meta = HRegion.createHRegion(HRegionInfo.firstMetaRegionInfo,
testDir, this.conf);
HRegion.addRegionToMETA(root, meta);
if (splitA == null) {
LOG.info("splitA was already null. Assuming it was previously compacted.");
} else {
LOG.info("Daughter splitA: " + splitA.getRegionName());
// Compact a region at a time so we can test case where one region has
// no references but the other still has some
compact(cluster, splitA);
for(int i = 0; i < regions.length; i++) {
HRegion.addRegionToMETA(meta, regions[i]);
}
// Wait till the parent only has reference to remaining split, one that
// still has references.
while (true) {
data = getSplitParentInfo(meta, parent);
if (data != null && data.size() == 3) {
LOG.info("Waiting for splitA to release reference to parent");
try {
Thread.sleep(waitTime);
} catch (InterruptedException e) {
// continue
}
continue;
}
break;
}
if (data != null) {
LOG.info("Parent split info returned " + data.keySet().toString());
closeRegionAndDeleteLog(root);
closeRegionAndDeleteLog(meta);
} catch (Exception e) {
StaticTestEnvironment.shutdownDfs(dfsCluster);
throw e;
}
}
if (splitB == null) {
LOG.info("splitB was already null. Assuming it was previously compacted.");
} else {
LOG.info("Daughter splitB: " + splitB.getRegionName());
// Call second split.
compact(cluster, splitB);
}
// Now wait until parent disappears.
LOG.info("Waiting on parent " + parent.getRegionName() + " to disappear");
for (int i = 0; i < retries; i++) {
if (getSplitParentInfo(meta, parent) == null) {
break;
private HRegion createARegion(Text startKey, Text endKey) throws IOException {
HRegion region = createNewHRegion(desc, startKey, endKey);
addContent(region, this.columnName);
closeRegionAndDeleteLog(region);
return region;
}
try {
Thread.sleep(waitTime);
} catch (InterruptedException e) {
// continue
}
}
assertNull(getSplitParentInfo(meta, parent));
}
// Assert cleaned up.
for (int i = 0; i < retries; i++) {
if (!fs.exists(parentDir)) {
LOG.info("Parent directory was deleted. tries=" + i);
break;
}
LOG.info("Waiting for parent directory to be deleted. tries=" + i);
try {
Thread.sleep(waitTime);
} catch (InterruptedException e) {
// continue
}
}
assertFalse(fs.exists(parentDir));
}
/*
* Count of regions in passed meta table.
* @param t
* @param column
* @return
* @throws IOException
*/
private static int count(final HTable t, final String tableName)
throws IOException {
int size = 0;
Text [] cols = new Text[] {HConstants.COLUMN_FAMILY};
Scanner s = t.getScanner(cols, HConstants.EMPTY_START_ROW,
System.currentTimeMillis(), null);
try {
for (RowResult r : s) {
HRegionInfo hri = Writables.
getHRegionInfo(r.get(HConstants.COL_REGIONINFO));
if (hri.getTableDesc().getName().toString().equals(tableName)) {
size++;
}
}
return size;
} finally {
if (s != null) {
s.close();
}
}
}
/*
* @return Return row info for passed in region or null if not found in scan.
*/
private static RowResult getSplitParentInfo(final HTable t,
final HRegionInfo parent)
throws IOException {
Scanner s = t.getScanner(HConstants.COLUMN_FAMILY_ARRAY,
HConstants.EMPTY_START_ROW, System.currentTimeMillis(), null);
try {
for (RowResult r : s) {
HRegionInfo hri = Writables.
getHRegionInfo(r.get(HConstants.COL_REGIONINFO));
if (hri == null) {
continue;
}
// Make sure I get the parent.
if (hri.getRegionName().equals(parent.getRegionName()) &&
hri.getRegionId() == parent.getRegionId()) {
return r;
}
}
return null;
} finally {
s.close();
}
}
/*
* Recalibrate passed in HTable. Run after change in region geography.
* Open a scanner on the table. This will force HTable to recalibrate
* and in doing so, will force us to wait until the new child regions
* come on-line (since they are no longer automatically served by the
* HRegionServer that was serving the parent. In this test they will
* end up on the same server (since there is only one), but we have to
* wait until the master assigns them.
* @param t
* @param retries
*/
private static void recalibrate(final HTable t, final Text column,
final int retries, final long waitTime) throws IOException {
for (int i = 0; i < retries; i++) {
try {
Scanner s =
t.getScanner(new Text[] {column}, HConstants.EMPTY_START_ROW);
try {
s.next();
break;
} finally {
s.close();
}
} catch (NotServingRegionException x) {
System.out.println("it's alright");
try {
Thread.sleep(waitTime);
} catch (InterruptedException e) {
// continue
}
}
}
}
/*
* Compact the passed in region <code>r</code>.
* @param cluster
* @param r
* @throws IOException
*/
protected static void compact(final MiniHBaseCluster cluster,
final HRegionInfo r) throws IOException {
if (r == null) {
LOG.debug("Passed region is null");
return;
}
LOG.info("Starting compaction");
for (LocalHBaseCluster.RegionServerThread thread:
cluster.getRegionThreads()) {
Map<Text, HRegion> regions = thread.getRegionServer().getOnlineRegions();
// Retry if ConcurrentModification... alternative of sync'ing is not
// worth it for sake of unit test.
for (int i = 0; i < 10; i++) {
try {
for (HRegion online: regions.values()) {
if (online.getRegionName().equals(r.getRegionName())) {
online.compactStores();
}
}
break;
} catch (ConcurrentModificationException e) {
LOG.warn("Retrying because ..." + e.toString() + " -- one or " +
"two should be fine");
continue;
}
}
}
private void closeRegionAndDeleteLog(HRegion region) throws IOException {
region.close();
region.getLog().closeAndDelete();
}
}

View File

@ -23,7 +23,6 @@ import java.io.File;
import java.io.IOException;
import java.util.Map;
import java.util.Random;
import java.util.TreeMap;
import junit.framework.TestSuite;
import junit.textui.TestRunner;
@ -34,7 +33,6 @@ import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.regionserver.HRegion;
@ -75,45 +73,16 @@ public class TestTableIndex extends MultiRegionTable {
TEXT_OUTPUT_COLUMN
};
private HTableDescriptor desc;
private JobConf jobConf = null;
/** default constructor */
public TestTableIndex() {
// Enable DEBUG-level MR logging.
Logger.getLogger("org.apache.hadoop.mapred").setLevel(Level.DEBUG);
// Make sure the cache gets flushed so we trigger a compaction(s) and
// hence splits.
conf.setInt("hbase.hregion.memcache.flush.size", 1024 * 1024);
// This size should make it so we always split using the addContent
// below. After adding all data, the first region is 1.3M
conf.setLong("hbase.hregion.max.filesize", 1024 * 1024);
// Always compact if there is more than one store file.
conf.setInt("hbase.hstore.compactionThreshold", 2);
super(INPUT_COLUMN);
desc = new HTableDescriptor(TABLE_NAME);
desc.addFamily(new HColumnDescriptor(INPUT_COLUMN));
desc.addFamily(new HColumnDescriptor(OUTPUT_COLUMN));
}
/** {@inheritDoc} */
@Override
protected void postHBaseClusterSetup() throws Exception {
// Create a table.
HBaseAdmin admin = new HBaseAdmin(conf);
admin.createTable(desc);
// Populate a table into multiple regions
makeMultiRegionTable(conf, cluster, dfsCluster.getFileSystem(), TABLE_NAME,
INPUT_COLUMN);
// Verify table indeed has multiple regions
HTable table = new HTable(conf, new Text(TABLE_NAME));
Text[] startKeys = table.getStartKeys();
assertTrue(startKeys.length > 1);
// Enable DEBUG-level MR logging.
Logger.getLogger("org.apache.hadoop.mapred").setLevel(Level.DEBUG);
}
/** {@inheritDoc} */

View File

@ -21,26 +21,20 @@ package org.apache.hadoop.hbase.mapred;
import java.io.IOException;
import java.io.File;
import java.io.UnsupportedEncodingException;
import java.util.Map;
import java.util.TreeMap;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Scanner;
import org.apache.hadoop.hbase.io.RowResult;
import org.apache.hadoop.hbase.io.Cell;
import org.apache.hadoop.hbase.HStoreKey;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.MultiRegionTable;
import org.apache.hadoop.hbase.io.BatchUpdate;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.io.MapWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
@ -58,7 +52,6 @@ public class TestTableMapReduce extends MultiRegionTable {
private static final Log LOG =
LogFactory.getLog(TestTableMapReduce.class.getName());
static final String SINGLE_REGION_TABLE_NAME = "srtest";
static final String MULTI_REGION_TABLE_NAME = "mrtest";
static final String INPUT_COLUMN = "contents:";
static final Text TEXT_INPUT_COLUMN = new Text(INPUT_COLUMN);
@ -70,46 +63,12 @@ public class TestTableMapReduce extends MultiRegionTable {
TEXT_OUTPUT_COLUMN
};
private static byte[][] values = null;
static {
try {
values = new byte[][] {
"0123".getBytes(HConstants.UTF8_ENCODING),
"abcd".getBytes(HConstants.UTF8_ENCODING),
"wxyz".getBytes(HConstants.UTF8_ENCODING),
"6789".getBytes(HConstants.UTF8_ENCODING)
};
} catch (UnsupportedEncodingException e) {
fail();
}
}
/** constructor */
public TestTableMapReduce() {
super();
// Make sure the cache gets flushed so we trigger a compaction(s) and
// hence splits.
conf.setInt("hbase.hregion.memcache.flush.size", 1024 * 1024);
// Always compact if there is more than one store file.
conf.setInt("hbase.hstore.compactionThreshold", 2);
// This size should make it so we always split using the addContent
// below. After adding all data, the first region is 1.3M
conf.setLong("hbase.hregion.max.filesize", 1024 * 1024);
// Make lease timeout longer, lease checks less frequent
conf.setInt("hbase.master.lease.period", 10 * 1000);
conf.setInt("hbase.master.lease.thread.wakefrequency", 5 * 1000);
// Set client pause to the original default
conf.setInt("hbase.client.pause", 10 * 1000);
}
public void teardown() {
super(INPUT_COLUMN);
desc = new HTableDescriptor(MULTI_REGION_TABLE_NAME);
desc.addFamily(new HColumnDescriptor(INPUT_COLUMN));
desc.addFamily(new HColumnDescriptor(OUTPUT_COLUMN));
}
/**
@ -118,8 +77,6 @@ public class TestTableMapReduce extends MultiRegionTable {
public static class ProcessContentsMapper extends TableMap<Text, BatchUpdate> {
/**
* Pass the key, and reversed value to reduce
*
* @see org.apache.hadoop.hbase.mapred.TableMap#map(org.apache.hadoop.hbase.HStoreKey, org.apache.hadoop.io.MapWritable, org.apache.hadoop.mapred.OutputCollector, org.apache.hadoop.mapred.Reporter)
*/
@SuppressWarnings("unchecked")
@Override
@ -156,75 +113,12 @@ public class TestTableMapReduce extends MultiRegionTable {
}
}
/**
* Test a map/reduce against a single-region table
*/
public void testSingleRegionTable() throws IOException {
localTestSingleRegionTable();
}
/**
* Test a map/reduce against a multi-region table
* @throws IOException
*/
public void testMultiRegionTable() throws IOException {
localTestMultiRegionTable();
}
/*
* Test against a single region.
* @throws IOException
*/
private void localTestSingleRegionTable() throws IOException {
HTableDescriptor desc = new HTableDescriptor(SINGLE_REGION_TABLE_NAME);
desc.addFamily(new HColumnDescriptor(INPUT_COLUMN));
desc.addFamily(new HColumnDescriptor(OUTPUT_COLUMN));
// Create a table.
HBaseAdmin admin = new HBaseAdmin(this.conf);
admin.createTable(desc);
// insert some data into the test table
HTable table = new HTable(conf, new Text(SINGLE_REGION_TABLE_NAME));
for(int i = 0; i < values.length; i++) {
BatchUpdate b = new BatchUpdate(new Text("row_" +
String.format("%1$05d", i)));
b.put(TEXT_INPUT_COLUMN, values[i]);
table.commit(b);
}
LOG.info("Print table contents before map/reduce for " +
SINGLE_REGION_TABLE_NAME);
scanTable(SINGLE_REGION_TABLE_NAME, true);
runTestOnTable(table);
}
/*
* Test against multiple regions.
* @throws IOException
*/
private void localTestMultiRegionTable() throws IOException {
HTableDescriptor desc = new HTableDescriptor(MULTI_REGION_TABLE_NAME);
desc.addFamily(new HColumnDescriptor(INPUT_COLUMN));
desc.addFamily(new HColumnDescriptor(OUTPUT_COLUMN));
// Create a table.
HBaseAdmin admin = new HBaseAdmin(this.conf);
admin.createTable(desc);
// Populate a table into multiple regions
makeMultiRegionTable(conf, cluster, dfsCluster.getFileSystem(),
MULTI_REGION_TABLE_NAME, INPUT_COLUMN);
// Verify table indeed has multiple regions
HTable table = new HTable(conf, new Text(MULTI_REGION_TABLE_NAME));
Text[] startKeys = table.getStartKeys();
assertTrue(startKeys.length > 1);
runTestOnTable(table);
runTestOnTable(new HTable(conf, new Text(MULTI_REGION_TABLE_NAME)));
}
@ -259,30 +153,6 @@ public class TestTableMapReduce extends MultiRegionTable {
}
}
private void scanTable(String tableName, boolean printValues)
throws IOException {
HTable table = new HTable(conf, new Text(tableName));
Scanner scanner =
table.getScanner(columns, HConstants.EMPTY_START_ROW);
try {
for (RowResult r : scanner) {
if (printValues) {
LOG.info("row: " + r.getRow());
for(Map.Entry<Text, Cell> e: r.entrySet()) {
LOG.info(" column: " + e.getKey() + " value: "
+ new String(e.getValue().getValue(), HConstants.UTF8_ENCODING));
}
}
}
} finally {
scanner.close();
}
}
@SuppressWarnings("null")
private void verify(String tableName) throws IOException {
HTable table = new HTable(conf, new Text(tableName));

View File

@ -28,7 +28,7 @@ import org.apache.hadoop.io.Text;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.hadoop.hbase.HStoreKey;
import org.apache.hadoop.hbase.MultiRegionTable;
import org.apache.hadoop.hbase.HBaseClusterTestCase;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
@ -38,7 +38,7 @@ import org.apache.hadoop.hbase.io.Cell;
* {@Link TestHRegion} does a split but this TestCase adds testing of fast
* split and manufactures odd-ball split scenarios.
*/
public class TestSplit extends MultiRegionTable {
public class TestSplit extends HBaseClusterTestCase {
@SuppressWarnings("hiding")
static final Log LOG = LogFactory.getLog(TestSplit.class.getName());