HBASE-9204 An Offline SplitParent Region can be assigned breaking split references

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1514747 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
jxiang 2013-08-16 15:20:56 +00:00
parent 01f291e7b5
commit b058cdf710
5 changed files with 146 additions and 112 deletions

View File

@ -655,17 +655,28 @@ public class AssignmentManager extends ZooKeeperListener {
LOG.warn("Processed region " + prettyPrintedRegionName + " in state : " + et +
" on a dead regionserver: " + sn + " doing nothing");
} else {
// Splitting region should be online. We could have skipped it during
// user region rebuilding since we may consider the split is completed.
// Put it in SPLITTING state to avoid complications.
regionStates.regionOnline(regionInfo, sn);
regionStates.updateRegionState(rt, RegionState.State.SPLITTING);
LOG.info("Processed " + prettyPrintedRegionName + " in state : " + et);
}
break;
case RS_ZK_REGION_SPLIT:
if (!serverManager.isServerOnline(sn)) {
forceOffline(regionInfo, rt);
// The region is already in SPLIT state, do nothing
LOG.warn("Processed " + prettyPrintedRegionName
+ " in state : " + et + " on a dead regionserver: " + sn
+ " doing nothing");
} else {
LOG.info("Processed " + prettyPrintedRegionName + " in state : " + et +
" nothing to do.");
// We don't do anything. The regionserver is supposed to update the znode
// Splitting region should be online. We could have skipped it during
// user region rebuilding since we may consider the split is completed.
// Put it in SPLITTING state to avoid complications.
regionStates.regionOnline(regionInfo, sn);
regionStates.updateRegionState(rt, RegionState.State.SPLITTING);
LOG.info("Processed " + prettyPrintedRegionName + " in state : " + et);
// Move the region to splitting state. The regionserver is supposed to update the znode
// multiple times so if it's still up we will receive an update soon.
}
break;
@ -684,13 +695,18 @@ public class AssignmentManager extends ZooKeeperListener {
break;
case RS_ZK_REGION_MERGED:
if (!serverManager.isServerOnline(sn)) {
// ServerShutdownHandler would handle this region
// Do nothing, merging regions are already removed from meta,
// so they are not in region states map any more.
// The new region will be assigned by the ServerShutdownHandler
LOG.warn("Processed " + prettyPrintedRegionName
+ " in state : " + et + " on a dead regionserver: " + sn
+ " doing nothing");
+ " in state : " + et + " on a dead regionserver: " + sn
+ " doing nothing");
} else {
// Merging regions are already removed from meta. It doesn't hurt to
// do nothing here, no need to set them to merging state here. We are fine
// to put the new region to online state during user region rebuilding.
LOG.info("Processed " + prettyPrintedRegionName + " in state : " +
et + " nothing to do.");
et + " nothing to do.");
// We don't do anything. The regionserver is supposed to update the znode
// multiple times so if it's still up we will receive an update soon.
}
@ -1786,34 +1802,34 @@ public class AssignmentManager extends ZooKeeperListener {
if (state == null) {
LOG.warn("Assigning a region not in region states: " + region);
state = regionStates.createRegionState(region);
} else {
switch (state.getState()) {
case OPEN:
case OPENING:
case PENDING_OPEN:
if (!forceNewPlan) {
LOG.debug("Attempting to assign region " +
region + " but it is already in transition: " + state);
return null;
}
case CLOSING:
case PENDING_CLOSE:
case FAILED_CLOSE:
case FAILED_OPEN:
unassign(region, state, -1, null, false, null);
state = regionStates.getRegionState(region);
if (state.isOffline()) break;
case CLOSED:
LOG.debug("Forcing OFFLINE; was=" + state);
state = regionStates.updateRegionState(
region, RegionState.State.OFFLINE);
case OFFLINE:
break;
default:
LOG.error("Trying to assign region " + region
+ ", which is in state " + state);
}
switch (state.getState()) {
case OPEN:
case OPENING:
case PENDING_OPEN:
if (!forceNewPlan) {
LOG.debug("Attempting to assign region " +
region + " but it is already in transition: " + state);
return null;
}
case CLOSING:
case PENDING_CLOSE:
case FAILED_CLOSE:
case FAILED_OPEN:
unassign(region, state, -1, null, false, null);
state = regionStates.getRegionState(region);
if (state.isOffline()) break;
case CLOSED:
LOG.debug("Forcing OFFLINE; was=" + state);
state = regionStates.updateRegionState(
region, RegionState.State.OFFLINE);
case OFFLINE:
break;
default:
LOG.error("Trying to assign region " + region
+ ", which is in state " + state);
return null;
}
return state;
}
@ -2640,6 +2656,13 @@ public class AssignmentManager extends ZooKeeperListener {
ServerName regionLocation = region.getSecond();
if (regionInfo == null) continue;
regionStates.createRegionState(regionInfo);
if (regionStates.isRegionInState(regionInfo, State.SPLIT)) {
// Split is considered to be completed. If the split znode still
// exists, the region will be put back to SPLITTING state later
LOG.debug("Region " + regionInfo.getRegionNameAsString()
+ " split is completed. Hence need not add to regions list");
continue;
}
TableName tableName = regionInfo.getTableName();
if (regionLocation == null) {
// regionLocation could be null if createTable didn't finish properly.
@ -2672,19 +2695,6 @@ public class AssignmentManager extends ZooKeeperListener {
setEnabledTable(tableName);
}
} else {
// If region is in offline and split state check the ZKNode
if (regionInfo.isOffline() && regionInfo.isSplit()) {
String node = ZKAssign.getNodeName(this.watcher, regionInfo
.getEncodedName());
Stat stat = new Stat();
byte[] data = ZKUtil.getDataNoWatch(this.watcher, node, stat);
// If znode does not exist, don't consider this region
if (data == null) {
LOG.debug("Region " + regionInfo.getRegionNameAsString()
+ " split is completed. Hence need not add to regions list");
continue;
}
}
// Region is being served and on an active server
// add only if region not in disabled or enabling table
if (!disabledOrEnablingTables.contains(tableName)) {

View File

@ -29,12 +29,12 @@ import java.util.TreeMap;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.RegionTransition;
import org.apache.hadoop.hbase.Server;
import org.apache.hadoop.hbase.ServerLoad;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.catalog.MetaReader;
import org.apache.hadoop.hbase.master.RegionState.State;
import org.apache.hadoop.hbase.util.Bytes;
@ -173,8 +173,9 @@ public class RegionStates {
}
/**
* Add a list of regions to RegionStates. The initial state is OFFLINE.
* If any region is already in RegionStates, that region will be skipped.
* Add a list of regions to RegionStates. If a region is split
* and offline, its state will be SPLIT. Otherwise, its state will
* be OFFLINE. Region already in RegionStates will be skipped.
*/
public synchronized void createRegionStates(
final List<HRegionInfo> hris) {
@ -184,18 +185,20 @@ public class RegionStates {
}
/**
* Add a region to RegionStates. The initial state is OFFLINE.
* If it is already in RegionStates, this call has no effect,
* and the original state is returned.
* Add a region to RegionStates. If the region is split
* and offline, its state will be SPLIT. Otherwise, its state will
* be OFFLINE. If it is already in RegionStates, this call has
* no effect, and the original state is returned.
*/
public synchronized RegionState createRegionState(final HRegionInfo hri) {
State newState = (hri.isOffline() && hri.isSplit()) ? State.SPLIT : State.OFFLINE;
String regionName = hri.getEncodedName();
RegionState regionState = regionStates.get(regionName);
if (regionState != null) {
LOG.warn("Tried to create a state of a region already in RegionStates, " +
"used existing state: " + regionState + ", ignored new state: state=OFFLINE, server=null");
LOG.warn("Tried to create a state for a region already in RegionStates, "
+ "used existing: " + regionState + ", ignored new: " + newState);
} else {
regionState = new RegionState(hri, State.OFFLINE);
regionState = new RegionState(hri, newState);
regionStates.put(regionName, regionState);
}
return regionState;
@ -579,8 +582,8 @@ public class RegionStates {
}
return hri;
} catch (IOException e) {
server.abort("Aborting because error occoured while reading " +
Bytes.toStringBinary(regionName) + " from .META.", e);
server.abort("Aborting because error occoured while reading "
+ Bytes.toStringBinary(regionName) + " from .META.", e);
return null;
}
}

View File

@ -22,7 +22,6 @@ package org.apache.hadoop.hbase.util;
import java.io.IOException;
import java.io.InterruptedIOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.CompletionService;
@ -35,18 +34,11 @@ import java.util.concurrent.TimeUnit;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.backup.HFileArchiver;
import org.apache.hadoop.hbase.catalog.CatalogTracker;
import org.apache.hadoop.hbase.catalog.MetaEditor;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.master.AssignmentManager;
import org.apache.hadoop.hbase.regionserver.HRegion;
/**
@ -157,21 +149,4 @@ public abstract class ModifyRegionUtils {
});
return regionOpenAndInitThreadPool;
}
/**
* Trigger immediate assignment of the regions in round-robin fashion
*
* @param assignmentManager
* @param regions
*/
public static void assignRegions(final AssignmentManager assignmentManager,
final List<HRegionInfo> regions) throws IOException {
try {
assignmentManager.getRegionStates().createRegionStates(regions);
assignmentManager.assign(regions);
} catch (InterruptedException e) {
LOG.error("Caught " + e + " during round-robin assignment");
throw new InterruptedIOException(e.getMessage());
}
}
}

View File

@ -26,8 +26,8 @@ import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HRegionInfo;
@ -35,10 +35,14 @@ import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.MediumTests;
import org.apache.hadoop.hbase.MiniHBaseCluster;
import org.apache.hadoop.hbase.Server;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.master.HMaster;
import org.apache.hadoop.hbase.master.MasterFileSystem;
import org.apache.hadoop.hbase.master.MasterServices;
import org.apache.hadoop.hbase.master.RegionState.State;
import org.apache.hadoop.hbase.master.RegionStates;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.FSUtils;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
@ -48,7 +52,6 @@ import org.junit.experimental.categories.Category;
public class TestCreateTableHandler {
private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
private static final Log LOG = LogFactory.getLog(TestCreateTableHandler.class);
private static final byte[] TABLENAME = Bytes.toBytes("TestCreateTableHandler");
private static final byte[] FAMILYNAME = Bytes.toBytes("fam");
private static boolean throwException = false;
@ -65,10 +68,11 @@ public class TestCreateTableHandler {
}
@Test (timeout=300000)
public void testCreateTableHandlerIfCalledTwoTimesAndFirstOneIsUnderProgress() throws Exception {
public void testCreateTableCalledTwiceAndFirstOneInProgress() throws Exception {
final byte[] tableName = Bytes.toBytes("testCreateTableCalledTwiceAndFirstOneInProgress");
final MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
final HMaster m = cluster.getMaster();
final HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(TABLENAME));
final HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(tableName));
desc.addFamily(new HColumnDescriptor(FAMILYNAME));
final HRegionInfo[] hRegionInfos = new HRegionInfo[] { new HRegionInfo(desc.getTableName(), null,
null) };
@ -83,13 +87,48 @@ public class TestCreateTableHandler {
handler1.prepare();
handler1.process();
for (int i = 0; i < 100; i++) {
if (!TEST_UTIL.getHBaseAdmin().isTableAvailable(TABLENAME)) {
if (!TEST_UTIL.getHBaseAdmin().isTableAvailable(tableName)) {
Thread.sleep(200);
}
}
assertTrue(TEST_UTIL.getHBaseAdmin().isTableEnabled(TABLENAME));
assertTrue(TEST_UTIL.getHBaseAdmin().isTableEnabled(tableName));
}
@Test (timeout=300000)
public void testCreateTableWithSplitRegion() throws Exception {
final byte[] tableName = Bytes.toBytes("testCreateTableWithSplitRegion");
final MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
final HMaster m = cluster.getMaster();
final HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(tableName));
desc.addFamily(new HColumnDescriptor(FAMILYNAME));
byte[] splitPoint = Bytes.toBytes("split-point");
long ts = System.currentTimeMillis();
HRegionInfo d1 = new HRegionInfo(desc.getTableName(), null, splitPoint, false, ts);
HRegionInfo d2 = new HRegionInfo(desc.getTableName(), splitPoint, null, false, ts + 1);
HRegionInfo parent = new HRegionInfo(desc.getTableName(), null, null, true, ts + 2);
parent.setOffline(true);
Path tempdir = m.getMasterFileSystem().getTempDir();
FileSystem fs = m.getMasterFileSystem().getFileSystem();
Path tempTableDir = FSUtils.getTableDir(tempdir, desc.getTableName());
fs.delete(tempTableDir, true); // Clean up temp table dir if exists
final HRegionInfo[] hRegionInfos = new HRegionInfo[] {d1, d2, parent};
CreateTableHandler handler = new CreateTableHandler(m, m.getMasterFileSystem(),
desc, cluster.getConfiguration(), hRegionInfos, m);
handler.prepare();
handler.process();
for (int i = 0; i < 100; i++) {
if (!TEST_UTIL.getHBaseAdmin().isTableAvailable(tableName)) {
Thread.sleep(300);
}
}
assertTrue(TEST_UTIL.getHBaseAdmin().isTableEnabled(tableName));
RegionStates regionStates = m.getAssignmentManager().getRegionStates();
assertTrue("Parent should be in SPLIT state",
regionStates.isRegionInState(parent, State.SPLIT));
}
@Test (timeout=60000)
public void testMasterRestartAfterEnablingNodeIsCreated() throws Exception {
byte[] tableName = Bytes.toBytes("testMasterRestartAfterEnablingNodeIsCreated");
@ -106,9 +145,8 @@ public class TestCreateTableHandler {
handler.process();
abortAndStartNewMaster(cluster);
assertTrue(cluster.getLiveMasterThreads().size() == 1);
}
private void abortAndStartNewMaster(final MiniHBaseCluster cluster) throws IOException {
cluster.abortMaster(0);
cluster.waitOnMaster(0);

View File

@ -461,7 +461,6 @@ public class TestSplitTransactionOnCluster {
@Test(timeout = 180000)
public void testSplitShouldNotThrowNPEEvenARegionHasEmptySplitFiles() throws Exception {
Configuration conf = TESTING_UTIL.getConfiguration();
ZooKeeperWatcher zkw = HBaseTestingUtility.getZooKeeperWatcher(TESTING_UTIL);
TableName userTableName =
TableName.valueOf("testSplitShouldNotThrowNPEEvenARegionHasEmptySplitFiles");
HTableDescriptor htd = new HTableDescriptor(userTableName);
@ -548,7 +547,7 @@ public class TestSplitTransactionOnCluster {
* @throws KeeperException
* @throws DeserializationException
*/
@Test(timeout = 300000)
@Test(timeout = 400000)
public void testMasterRestartWhenSplittingIsPartial()
throws IOException, InterruptedException, NodeExistsException,
KeeperException, DeserializationException, ServiceException {
@ -596,13 +595,30 @@ public class TestSplitTransactionOnCluster {
this.admin = new HBaseAdmin(TESTING_UTIL.getConfiguration());
// update the hri to be offlined and splitted.
// Update the region to be offline and split, so that HRegionInfo#equals
// returns true in checking rebuilt region states map.
hri.setOffline(true);
hri.setSplit(true);
ServerName regionServerOfRegion = master.getAssignmentManager()
.getRegionStates().getRegionServerOfRegion(hri);
assertTrue(regionServerOfRegion != null);
// Remove the block so that split can move ahead.
SplitRegionHandler.TEST_SKIP = false;
String node = ZKAssign.getNodeName(zkw, hri.getEncodedName());
Stat stat = new Stat();
byte[] data = ZKUtil.getDataNoWatch(zkw, node, stat);
// ZKUtil.create
for (int i=0; data != null && i<60; i++) {
Thread.sleep(1000);
data = ZKUtil.getDataNoWatch(zkw, node, stat);
}
assertNull("Waited too long for ZK node to be removed: "+node, data);
RegionStates regionStates = master.getAssignmentManager().getRegionStates();
assertTrue("Split parent should be in SPLIT state",
regionStates.isRegionInState(hri, State.SPLIT));
regionServerOfRegion = regionStates.getRegionServerOfRegion(hri);
assertTrue(regionServerOfRegion == null);
} finally {
// Set this flag back.
SplitRegionHandler.TEST_SKIP = false;
@ -661,7 +677,6 @@ public class TestSplitTransactionOnCluster {
for (int i=0; data != null && i<60; i++) {
Thread.sleep(1000);
data = ZKUtil.getDataNoWatch(zkw, node, stat);
}
assertNull("Waited too long for ZK node to be removed: "+node, data);
@ -669,10 +684,14 @@ public class TestSplitTransactionOnCluster {
this.admin = new HBaseAdmin(TESTING_UTIL.getConfiguration());
// Update the region to be offline and split, so that HRegionInfo#equals
// returns true in checking rebuilt region states map.
hri.setOffline(true);
hri.setSplit(true);
ServerName regionServerOfRegion = master.getAssignmentManager()
.getRegionStates().getRegionServerOfRegion(hri);
RegionStates regionStates = master.getAssignmentManager().getRegionStates();
assertTrue("Split parent should be in SPLIT state",
regionStates.isRegionInState(hri, State.SPLIT));
ServerName regionServerOfRegion = regionStates.getRegionServerOfRegion(hri);
assertTrue(regionServerOfRegion == null);
} finally {
// Set this flag back.
@ -844,7 +863,7 @@ public class TestSplitTransactionOnCluster {
final byte[] tableName = Bytes.toBytes("testSplitBeforeSettingSplittingInZK");
try {
// Create table then get the single region for our new table.
HTable t = createTableAndWait(tableName, Bytes.toBytes("cf"));
createTableAndWait(tableName, Bytes.toBytes("cf"));
List<HRegion> regions = awaitTableRegions(tableName);
assertTrue("Table not online", cluster.getRegions(tableName).size() != 0);
@ -937,7 +956,6 @@ public class TestSplitTransactionOnCluster {
}
private HRegion findSplittableRegion(final List<HRegion> regions) throws InterruptedException {
HRegion region = null;
for (int i = 0; i < 5; ++i) {
for (HRegion r: regions) {
if (r.isSplittable()) {
@ -986,17 +1004,6 @@ public class TestSplitTransactionOnCluster {
ProtobufUtil.getOnlineRegions(server).size() <= regionCount);
}
private void removeDaughterFromMeta(final byte [] regionName) throws IOException {
HTable metaTable = new HTable(TESTING_UTIL.getConfiguration(), TableName.META_TABLE_NAME);
try {
Delete d = new Delete(regionName);
LOG.info("Deleted " + Bytes.toString(regionName));
metaTable.delete(d);
} finally {
metaTable.close();
}
}
/**
* Ensure single table region is not on same server as the single .META. table
* region.
@ -1129,10 +1136,11 @@ public class TestSplitTransactionOnCluster {
}
private static class SplittingNodeCreationFailedException extends IOException {
private static final long serialVersionUID = 1652404976265623004L;
public SplittingNodeCreationFailedException () {
super();
}
}
}