HBASE-8505 References to split daughters should not be deleted separately from parent META entry

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1483481 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Enis Soztutar 2013-05-16 18:00:15 +00:00
parent 2c3d36cc10
commit 1a131b9e3c
6 changed files with 303 additions and 89 deletions

View File

@ -19,6 +19,13 @@
package org.apache.hadoop.hbase.client;
import java.io.Closeable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.NavigableMap;
import java.util.TreeMap;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
@ -29,13 +36,6 @@ import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.exceptions.TableNotFoundException;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.Closeable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.NavigableMap;
import java.util.TreeMap;
/**
* Scanner class that contains the <code>.META.</code> table scanning logic.
* Provided visitors will be called for each row.
@ -287,10 +287,7 @@ public class MetaScanner {
}
/**
* A MetaScannerVisitor that provides a consistent view of the table's
* META entries during concurrent splits (see HBASE-5986 for details). This class
* does not guarantee ordered traversal of meta entries, and can block until the
* META entries for daughters are available during splits.
* A MetaScannerVisitor that skips offline regions and split parents
*/
public static abstract class DefaultMetaScannerVisitor
extends MetaScannerVisitorBase {

View File

@ -18,8 +18,8 @@
package org.apache.hadoop.hbase.util;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkPositionIndex;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.base.Preconditions.checkPositionIndex;
import java.io.DataInput;
import java.io.DataOutput;
@ -123,15 +123,45 @@ public class Bytes {
public ByteArrayComparator() {
super();
}
@Override
public int compare(byte [] left, byte [] right) {
return compareTo(left, right);
}
@Override
public int compare(byte [] b1, int s1, int l1, byte [] b2, int s2, int l2) {
return LexicographicalComparerHolder.BEST_COMPARER.
compareTo(b1, s1, l1, b2, s2, l2);
}
}
/**
* A {@link ByteArrayComparator} that treats the empty array as the largest value.
* This is useful for comparing row end keys for regions.
*/
// TODO: unfortunately, HBase uses byte[0] as both start and end keys for region
// boundaries. Thus semantically, we should treat empty byte array as the smallest value
// while comparing row keys, start keys etc; but as the largest value for comparing
// region boundaries for endKeys.
public static class RowEndKeyComparator extends ByteArrayComparator {
@Override
public int compare(byte[] left, byte[] right) {
return compare(left, 0, left.length, right, 0, right.length);
}
@Override
public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
if (b1 == b2 && s1 == s2 && l1 == l2) {
return 0;
}
if (l1 == 0) {
return l2; //0 or positive
}
if (l2 == 0) {
return -1;
}
return super.compare(b1, s1, l1, b2, s2, l2);
}
}
/**
* Pass this to TreeMaps where byte [] are keys.
*/
@ -1660,23 +1690,23 @@ public class Bytes {
}
/**
* Copy the byte array given in parameter and return an instance
* Copy the byte array given in parameter and return an instance
* of a new byte array with the same length and the same content.
* @param bytes the byte array to duplicate
* @return a copy of the given byte array
* @return a copy of the given byte array
*/
public static byte [] copy(byte [] bytes) {
if (bytes == null) return null;
byte [] result = new byte[bytes.length];
System.arraycopy(bytes, 0, result, 0, bytes.length);
System.arraycopy(bytes, 0, result, 0, bytes.length);
return result;
}
/**
* Copy the byte array given in parameter and return an instance
* Copy the byte array given in parameter and return an instance
* of a new byte array with the same length and the same content.
* @param bytes the byte array to copy from
* @return a copy of the given designated byte array
* @return a copy of the given designated byte array
* @param offset
* @param length
*/
@ -1801,7 +1831,7 @@ public class Bytes {
}
return -1;
}
/**
* Returns the start position of the first occurrence of the specified {@code
* target} within {@code array}, or {@code -1} if there is no such occurrence.
@ -1831,7 +1861,7 @@ public class Bytes {
}
return -1;
}
/**
* @param array an array of {@code byte} values, possibly empty
* @param target a primitive {@code byte} value
@ -1840,7 +1870,7 @@ public class Bytes {
public static boolean contains(byte[] array, byte target) {
return indexOf(array, target) > -1;
}
/**
* @param array an array of {@code byte} values, possibly empty
* @param target an array of {@code byte}
@ -1849,7 +1879,7 @@ public class Bytes {
public static boolean contains(byte[] array, byte[] target) {
return indexOf(array, target) > -1;
}
/**
* Fill given array with zeros.
* @param b array which needs to be filled with zeros

View File

@ -493,25 +493,6 @@ public class MetaEditor {
}
}
/**
* Deletes daughters references in offlined split parent.
* @param catalogTracker
* @param parent Parent row we're to remove daughter reference from
* @throws org.apache.hadoop.hbase.exceptions.NotAllMetaRegionsOnlineException
* @throws IOException
*/
public static void deleteDaughtersReferencesInParent(CatalogTracker catalogTracker,
final HRegionInfo parent)
throws NotAllMetaRegionsOnlineException, IOException {
Delete delete = new Delete(parent.getRegionName());
delete.deleteColumns(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER);
delete.deleteColumns(HConstants.CATALOG_FAMILY, HConstants.SPLITB_QUALIFIER);
deleteFromMetaTable(catalogTracker, delete);
LOG.info("Deleted daughters references, qualifier=" + Bytes.toStringBinary(HConstants.SPLITA_QUALIFIER) +
" and qualifier=" + Bytes.toStringBinary(HConstants.SPLITB_QUALIFIER) +
", from parent " + parent.getRegionNameAsString());
}
/**
* Deletes merge qualifiers for the specified merged region.
* @param catalogTracker

View File

@ -41,6 +41,8 @@ import org.apache.hadoop.hbase.Server;
import org.apache.hadoop.hbase.backup.HFileArchiver;
import org.apache.hadoop.hbase.catalog.MetaEditor;
import org.apache.hadoop.hbase.catalog.MetaReader;
import org.apache.hadoop.hbase.client.MetaScanner;
import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
import org.apache.hadoop.hbase.util.Bytes;
@ -136,9 +138,10 @@ public class CatalogJanitor extends Chore {
new TreeMap<HRegionInfo, Result>(new SplitParentFirstComparator());
final Map<HRegionInfo, Result> mergedRegions = new TreeMap<HRegionInfo, Result>();
// This visitor collects split parents and counts rows in the .META. table
MetaReader.Visitor visitor = new MetaReader.Visitor() {
MetaScannerVisitor visitor = new MetaScanner.MetaScannerVisitorBase() {
@Override
public boolean visit(Result r) throws IOException {
public boolean processRow(Result r) throws IOException {
if (r == null || r.isEmpty()) return true;
count.incrementAndGet();
HRegionInfo info = HRegionInfo.getHRegionInfo(r);
@ -157,12 +160,9 @@ public class CatalogJanitor extends Chore {
}
};
byte[] startRow = (!isTableSpecified) ? HConstants.EMPTY_START_ROW
: HRegionInfo.createRegionName(tableName, HConstants.EMPTY_START_ROW,
HConstants.ZEROES, false);
// Run full scan of .META. catalog table passing in our custom visitor with
// the start row
MetaReader.fullScan(this.server.getCatalogTracker(), visitor, startRow);
MetaScanner.metaScan(server.getConfiguration(), visitor, tableName);
return new Triple<Integer, Map<HRegionInfo, Result>, Map<HRegionInfo, Result>>(
count.get(), mergedRegions, splitParents);
@ -281,6 +281,7 @@ public class CatalogJanitor extends Chore {
* daughters.
*/
static class SplitParentFirstComparator implements Comparator<HRegionInfo> {
Comparator<byte[]> rowEndKeyComparator = new Bytes.RowEndKeyComparator();
@Override
public int compare(HRegionInfo left, HRegionInfo right) {
// This comparator differs from the one HRegionInfo in that it sorts
@ -295,19 +296,9 @@ public class CatalogJanitor extends Chore {
result = Bytes.compareTo(left.getStartKey(), right.getStartKey());
if (result != 0) return result;
// Compare end keys.
result = Bytes.compareTo(left.getEndKey(), right.getEndKey());
if (result != 0) {
if (left.getStartKey().length != 0
&& left.getEndKey().length == 0) {
return -1; // left is last region
}
if (right.getStartKey().length != 0
&& right.getEndKey().length == 0) {
return 1; // right is the last region
}
return -result; // Flip the result so parent comes first.
}
return result;
result = rowEndKeyComparator.compare(left.getEndKey(), right.getEndKey());
return -result; // Flip the result so parent comes first.
}
}
@ -338,8 +329,6 @@ public class CatalogJanitor extends Chore {
if (hasNoReferences(a) && hasNoReferences(b)) {
LOG.debug("Deleting region " + parent.getRegionNameAsString() +
" because daughter splits no longer hold references");
// wipe out daughter references from parent region in meta
removeDaughtersFromParent(parent);
// This latter regionOffline should not be necessary but is done for now
// until we let go of regionserver to master heartbeats. See HBASE-3368.
@ -367,16 +356,6 @@ public class CatalogJanitor extends Chore {
return !p.getFirst() || !p.getSecond();
}
/**
* Remove mention of daughters from parent row.
* @param parent
* @throws IOException
*/
private void removeDaughtersFromParent(final HRegionInfo parent)
throws IOException {
MetaEditor.deleteDaughtersReferencesInParent(this.server.getCatalogTracker(), parent);
}
/**
* Checks if a daughter region -- either splitA or splitB -- still holds
* references to parent.

View File

@ -18,39 +18,55 @@
*/
package org.apache.hadoop.hbase.client;
import static org.mockito.Matchers.anyObject;
import static org.mockito.Mockito.doReturn;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.reset;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
import java.math.BigDecimal;
import java.util.List;
import java.util.NavigableMap;
import java.util.Random;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.MediumTests;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.catalog.CatalogTracker;
import org.apache.hadoop.hbase.catalog.MetaEditor;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.apache.hadoop.hbase.util.StoppableImplementation;
import org.apache.hadoop.hbase.util.Threads;
import org.apache.hadoop.util.StringUtils;
import org.junit.After;
import org.junit.Assert;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import static org.mockito.Mockito.*;
@Category(MediumTests.class)
public class TestMetaScanner {
final Log LOG = LogFactory.getLog(getClass());
private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
@BeforeClass
public static void setUpBeforeClass() throws Exception {
public void setUp() throws Exception {
TEST_UTIL.startMiniCluster(1);
}
/**
* @throws java.lang.Exception
*/
@AfterClass
public static void tearDownAfterClass() throws Exception {
@After
public void tearDown() throws Exception {
TEST_UTIL.shutdownMiniCluster();
}
@Test
public void testMetaScanner() throws Exception {
LOG.info("Starting testMetaScanner");
setUp();
final byte[] TABLENAME = Bytes.toBytes("testMetaScanner");
final byte[] FAMILY = Bytes.toBytes("family");
TEST_UTIL.createTable(TABLENAME, FAMILY);
@ -63,29 +79,29 @@ public class TestMetaScanner {
Bytes.toBytes("region_b")});
// Make sure all the regions are deployed
TEST_UTIL.countRows(table);
MetaScanner.MetaScannerVisitor visitor =
MetaScanner.MetaScannerVisitor visitor =
mock(MetaScanner.MetaScannerVisitor.class);
doReturn(true).when(visitor).processRow((Result)anyObject());
// Scanning the entire table should give us three rows
MetaScanner.metaScan(conf, visitor, TABLENAME);
verify(visitor, times(3)).processRow((Result)anyObject());
// Scanning the table with a specified empty start row should also
// give us three META rows
reset(visitor);
doReturn(true).when(visitor).processRow((Result)anyObject());
MetaScanner.metaScan(conf, visitor, TABLENAME, HConstants.EMPTY_BYTE_ARRAY, 1000);
verify(visitor, times(3)).processRow((Result)anyObject());
// Scanning the table starting in the middle should give us two rows:
// region_a and region_b
reset(visitor);
doReturn(true).when(visitor).processRow((Result)anyObject());
MetaScanner.metaScan(conf, visitor, TABLENAME, Bytes.toBytes("region_ac"), 1000);
verify(visitor, times(2)).processRow((Result)anyObject());
// Scanning with a limit of 1 should only give us one row
reset(visitor);
doReturn(true).when(visitor).processRow((Result)anyObject());
@ -94,5 +110,134 @@ public class TestMetaScanner {
table.close();
}
@Test
public void testConcurrentMetaScannerAndCatalogJanitor() throws Throwable {
/* TEST PLAN: start with only one region in a table. Have a splitter
* thread and metascanner threads that continously scan the meta table for regions.
* CatalogJanitor from master will run frequently to clean things up
*/
TEST_UTIL.getConfiguration().setLong("hbase.catalogjanitor.interval", 500);
setUp();
final long runtime = 30 * 1000; //30 sec
LOG.info("Starting testConcurrentMetaScannerAndCatalogJanitor");
final byte[] TABLENAME = Bytes.toBytes("testConcurrentMetaScannerAndCatalogJanitor");
final byte[] FAMILY = Bytes.toBytes("family");
TEST_UTIL.createTable(TABLENAME, FAMILY);
final CatalogTracker catalogTracker = mock(CatalogTracker.class);
when(catalogTracker.getConnection()).thenReturn(TEST_UTIL.getHBaseAdmin().getConnection());
class RegionMetaSplitter extends StoppableImplementation implements Runnable {
Random random = new Random();
Throwable ex = null;
@Override
public void run() {
while (!isStopped()) {
try {
List<HRegionInfo> regions = MetaScanner.listAllRegions(
TEST_UTIL.getConfiguration(), false);
//select a random region
HRegionInfo parent = regions.get(random.nextInt(regions.size()));
if (parent == null || !Bytes.equals(TABLENAME, parent.getTableName())) {
continue;
}
long startKey = 0, endKey = Long.MAX_VALUE;
byte[] start = parent.getStartKey();
byte[] end = parent.getEndKey();
if (!Bytes.equals(HConstants.EMPTY_START_ROW, parent.getStartKey())) {
startKey = Bytes.toLong(parent.getStartKey());
}
if (!Bytes.equals(HConstants.EMPTY_END_ROW, parent.getEndKey())) {
endKey = Bytes.toLong(parent.getEndKey());
}
if (startKey == endKey) {
continue;
}
long midKey = BigDecimal.valueOf(startKey).add(BigDecimal.valueOf(endKey))
.divideToIntegralValue(BigDecimal.valueOf(2)).longValue();
HRegionInfo splita = new HRegionInfo(TABLENAME,
start,
Bytes.toBytes(midKey));
HRegionInfo splitb = new HRegionInfo(TABLENAME,
Bytes.toBytes(midKey),
end);
MetaEditor.splitRegion(catalogTracker, parent, splita, splitb, new ServerName("fooserver", 1, 0));
Threads.sleep(random.nextInt(200));
} catch (Throwable e) {
ex = e;
Assert.fail(StringUtils.stringifyException(e));
}
}
}
void rethrowExceptionIfAny() throws Throwable {
if (ex != null) { throw ex; }
}
}
class MetaScannerVerifier extends StoppableImplementation implements Runnable {
Random random = new Random();
Throwable ex = null;
@Override
public void run() {
while(!isStopped()) {
try {
NavigableMap<HRegionInfo, ServerName> regions =
MetaScanner.allTableRegions(TEST_UTIL.getConfiguration(), TABLENAME, false);
LOG.info("-------");
byte[] lastEndKey = HConstants.EMPTY_START_ROW;
for (HRegionInfo hri: regions.navigableKeySet()) {
long startKey = 0, endKey = Long.MAX_VALUE;
if (!Bytes.equals(HConstants.EMPTY_START_ROW, hri.getStartKey())) {
startKey = Bytes.toLong(hri.getStartKey());
}
if (!Bytes.equals(HConstants.EMPTY_END_ROW, hri.getEndKey())) {
endKey = Bytes.toLong(hri.getEndKey());
}
LOG.info("start:" + startKey + " end:" + endKey + " hri:" + hri);
Assert.assertTrue(Bytes.equals(lastEndKey, hri.getStartKey()));
lastEndKey = hri.getEndKey();
}
Assert.assertTrue(Bytes.equals(lastEndKey, HConstants.EMPTY_END_ROW));
LOG.info("-------");
Threads.sleep(10 + random.nextInt(50));
} catch (Throwable e) {
ex = e;
Assert.fail(StringUtils.stringifyException(e));
}
}
}
void rethrowExceptionIfAny() throws Throwable {
if (ex != null) { throw ex; }
}
}
RegionMetaSplitter regionMetaSplitter = new RegionMetaSplitter();
MetaScannerVerifier metaScannerVerifier = new MetaScannerVerifier();
Thread regionMetaSplitterThread = new Thread(regionMetaSplitter);
Thread metaScannerVerifierThread = new Thread(metaScannerVerifier);
regionMetaSplitterThread.start();
metaScannerVerifierThread.start();
Threads.sleep(runtime);
regionMetaSplitter.stop("test finished");
metaScannerVerifier.stop("test finished");
regionMetaSplitterThread.join();
metaScannerVerifierThread.join();
regionMetaSplitter.rethrowExceptionIfAny();
metaScannerVerifier.rethrowExceptionIfAny();
}
}

View File

@ -39,6 +39,7 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.Server;
@ -586,6 +587,87 @@ public class TestCatalogJanitor {
* Test that we correctly archive all the storefiles when a region is deleted
* @throws Exception
*/
@Test
public void testSplitParentFirstComparator() {
SplitParentFirstComparator comp = new SplitParentFirstComparator();
final HTableDescriptor htd = createHTableDescriptor();
/* Region splits:
*
* rootRegion --- firstRegion --- firstRegiona
* | |- firstRegionb
* |
* |- lastRegion --- lastRegiona --- lastRegionaa
* | |- lastRegionab
* |- lastRegionb
*
* rootRegion : [] - []
* firstRegion : [] - bbb
* lastRegion : bbb - []
* firstRegiona : [] - aaa
* firstRegionb : aaa - bbb
* lastRegiona : bbb - ddd
* lastRegionb : ddd - []
*/
// root region
HRegionInfo rootRegion = new HRegionInfo(htd.getName(), HConstants.EMPTY_START_ROW,
HConstants.EMPTY_END_ROW, true);
HRegionInfo firstRegion = new HRegionInfo(htd.getName(), HConstants.EMPTY_START_ROW,
Bytes.toBytes("bbb"), true);
HRegionInfo lastRegion = new HRegionInfo(htd.getName(), Bytes.toBytes("bbb"),
HConstants.EMPTY_END_ROW, true);
assertTrue(comp.compare(rootRegion, rootRegion) == 0);
assertTrue(comp.compare(firstRegion, firstRegion) == 0);
assertTrue(comp.compare(lastRegion, lastRegion) == 0);
assertTrue(comp.compare(rootRegion, firstRegion) < 0);
assertTrue(comp.compare(rootRegion, lastRegion) < 0);
assertTrue(comp.compare(firstRegion, lastRegion) < 0);
//first region split into a, b
HRegionInfo firstRegiona = new HRegionInfo(htd.getName(), HConstants.EMPTY_START_ROW,
Bytes.toBytes("aaa"), true);
HRegionInfo firstRegionb = new HRegionInfo(htd.getName(), Bytes.toBytes("aaa"),
Bytes.toBytes("bbb"), true);
//last region split into a, b
HRegionInfo lastRegiona = new HRegionInfo(htd.getName(), Bytes.toBytes("bbb"),
Bytes.toBytes("ddd"), true);
HRegionInfo lastRegionb = new HRegionInfo(htd.getName(), Bytes.toBytes("ddd"),
HConstants.EMPTY_END_ROW, true);
assertTrue(comp.compare(firstRegiona, firstRegiona) == 0);
assertTrue(comp.compare(firstRegionb, firstRegionb) == 0);
assertTrue(comp.compare(rootRegion, firstRegiona) < 0);
assertTrue(comp.compare(rootRegion, firstRegionb) < 0);
assertTrue(comp.compare(firstRegion, firstRegiona) < 0);
assertTrue(comp.compare(firstRegion, firstRegionb) < 0);
assertTrue(comp.compare(firstRegiona, firstRegionb) < 0);
assertTrue(comp.compare(lastRegiona, lastRegiona) == 0);
assertTrue(comp.compare(lastRegionb, lastRegionb) == 0);
assertTrue(comp.compare(rootRegion, lastRegiona) < 0);
assertTrue(comp.compare(rootRegion, lastRegionb) < 0);
assertTrue(comp.compare(lastRegion, lastRegiona) < 0);
assertTrue(comp.compare(lastRegion, lastRegionb) < 0);
assertTrue(comp.compare(lastRegiona, lastRegionb) < 0);
assertTrue(comp.compare(firstRegiona, lastRegiona) < 0);
assertTrue(comp.compare(firstRegiona, lastRegionb) < 0);
assertTrue(comp.compare(firstRegionb, lastRegiona) < 0);
assertTrue(comp.compare(firstRegionb, lastRegionb) < 0);
HRegionInfo lastRegionaa = new HRegionInfo(htd.getName(), Bytes.toBytes("bbb"),
Bytes.toBytes("ccc"), false);
HRegionInfo lastRegionab = new HRegionInfo(htd.getName(), Bytes.toBytes("ccc"),
Bytes.toBytes("ddd"), false);
assertTrue(comp.compare(lastRegiona, lastRegionaa) < 0);
assertTrue(comp.compare(lastRegiona, lastRegionab) < 0);
assertTrue(comp.compare(lastRegionaa, lastRegionab) < 0);
}
@Test
public void testArchiveOldRegion() throws Exception {
String table = "table";