HBASE-9052 Prevent split/merged region from assigning again

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1508247 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
jxiang 2013-07-29 23:54:29 +00:00
parent 417593e3aa
commit fd991fe908
8 changed files with 131 additions and 40 deletions

View File

@ -126,6 +126,10 @@ public class RegionState implements org.apache.hadoop.io.Writable {
return state == State.SPLITTING;
}
public boolean isSplit() {
return state == State.SPLIT;
}
public boolean isFailedOpen() {
return state == State.FAILED_OPEN;
}
@ -138,6 +142,10 @@ public class RegionState implements org.apache.hadoop.io.Writable {
return state == State.MERGING;
}
public boolean isMerged() {
return state == State.MERGED;
}
public boolean isOpenOrMergingOnServer(final ServerName sn) {
return isOnServer(sn) && (isOpened() || isMerging());
}

View File

@ -62,6 +62,7 @@ import org.apache.hadoop.hbase.exceptions.TableNotFoundException;
import org.apache.hadoop.hbase.executor.EventHandler;
import org.apache.hadoop.hbase.executor.EventType;
import org.apache.hadoop.hbase.executor.ExecutorService;
import org.apache.hadoop.hbase.master.RegionState.State;
import org.apache.hadoop.hbase.master.balancer.FavoredNodeAssignmentHelper;
import org.apache.hadoop.hbase.master.balancer.FavoredNodeLoadBalancer;
import org.apache.hadoop.hbase.master.handler.ClosedRegionHandler;
@ -1415,10 +1416,7 @@ public class AssignmentManager extends ZooKeeperListener {
* @param regionInfo
*/
public void regionOffline(final HRegionInfo regionInfo) {
regionStates.regionOffline(regionInfo);
removeClosedRegion(regionInfo);
// remove the region plan as well just in case.
clearRegionPlan(regionInfo);
regionOffline(regionInfo, null);
}
public void offlineDisabledRegion(HRegionInfo regionInfo) {
@ -2355,7 +2353,7 @@ public class AssignmentManager extends ZooKeeperListener {
public boolean waitForAssignment(HRegionInfo regionInfo)
throws InterruptedException {
while (!regionStates.isRegionAssigned(regionInfo)) {
if (regionStates.isRegionFailedToOpen(regionInfo)
if (regionStates.isRegionInState(regionInfo, State.FAILED_OPEN)
|| this.server.isStopped()) {
return false;
}
@ -3102,7 +3100,7 @@ public class AssignmentManager extends ZooKeeperListener {
*/
public void handleSplitReport(final ServerName sn, final HRegionInfo parent,
final HRegionInfo a, final HRegionInfo b) {
regionOffline(parent);
regionOffline(parent, State.SPLIT);
regionOnline(a, sn);
regionOnline(b, sn);
@ -3126,8 +3124,8 @@ public class AssignmentManager extends ZooKeeperListener {
*/
public void handleRegionsMergeReport(final ServerName sn,
final HRegionInfo merged, final HRegionInfo a, final HRegionInfo b) {
regionOffline(a);
regionOffline(b);
regionOffline(a, State.MERGED);
regionOffline(b, State.MERGED);
regionOnline(merged, sn);
// There's a possibility that the region was merging while a user asked
@ -3238,4 +3236,16 @@ public class AssignmentManager extends ZooKeeperListener {
regionStates.updateRegionState(merging_b, RegionState.State.MERGING);
return true;
}
/**
* A region is offline. The new state should be the specified one,
* if not null. If the specified state is null, the new state is Offline.
* The specified state can be Split/Merged/Offline/null only.
*/
private void regionOffline(final HRegionInfo regionInfo, final State state) {
regionStates.regionOffline(regionInfo, state);
removeClosedRegion(regionInfo);
// remove the region plan as well just in case.
clearRegionPlan(regionInfo);
}
}

View File

@ -42,6 +42,8 @@ import org.apache.hadoop.hbase.zookeeper.ZKAssign;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
import org.apache.zookeeper.KeeperException;
import com.google.common.base.Preconditions;
/**
* Region state accountant. It holds the states of all regions in the memory.
* In normal scenario, it should match the meta table and the true region states.
@ -136,21 +138,13 @@ public class RegionStates {
}
/**
* @return True if specified region failed to open.
* @return True if specified region is in specified state
*/
public synchronized boolean isRegionFailedToOpen(final HRegionInfo hri) {
RegionState regionState = getRegionTransitionState(hri);
State state = regionState != null ? regionState.getState() : null;
return state == State.FAILED_OPEN;
}
/**
* @return True if specified region failed to close.
*/
public synchronized boolean isRegionFailedToClose(final HRegionInfo hri) {
RegionState regionState = getRegionTransitionState(hri);
State state = regionState != null ? regionState.getState() : null;
return state == State.FAILED_CLOSE;
public synchronized boolean isRegionInState(
final HRegionInfo hri, final State state) {
RegionState regionState = getRegionState(hri);
State s = regionState != null ? regionState.getState() : null;
return s == state;
}
/**
@ -315,20 +309,42 @@ public class RegionStates {
/**
* A region is offline, won't be in transition any more.
*/
public synchronized void regionOffline(final HRegionInfo hri) {
public void regionOffline(final HRegionInfo hri) {
regionOffline(hri, null);
}
/**
* A region is offline, won't be in transition any more.
* Its state should be the specified expected state, which
* can be Split/Merged/Offline/null(=Offline) only.
*/
public synchronized void regionOffline(
final HRegionInfo hri, final State expectedState) {
Preconditions.checkArgument(expectedState == null
|| expectedState == State.OFFLINE || expectedState == State.SPLIT
|| expectedState == State.MERGED, "Offlined region should be in state"
+ " OFFLINE/SPLIT/MERGED instead of " + expectedState);
String regionName = hri.getEncodedName();
RegionState oldState = regionStates.get(regionName);
if (oldState == null) {
LOG.warn("Offline a region not in RegionStates: " + hri.getShortNameToLog());
} else {
} else if (LOG.isDebugEnabled()) {
State state = oldState.getState();
ServerName sn = oldState.getServerName();
if (state != State.OFFLINE || sn != null) {
LOG.debug("Offline a region " + hri.getShortNameToLog() + " with current state=" + state +
", expected state=OFFLINE" + ", assigned to server: " + sn + ", expected null");
if (state != State.OFFLINE
&& state != State.SPLITTING && state != State.MERGING) {
LOG.debug("Offline a region " + hri.getShortNameToLog() + " with current state="
+ state + ", expected state=OFFLINE/SPLITTING/MERGING");
}
if (sn != null && state == State.OFFLINE) {
LOG.debug("Offline a region " + hri.getShortNameToLog()
+ " with current state=OFFLINE, assigned to server: "
+ sn + ", expected null");
}
}
updateRegionState(hri, State.OFFLINE);
State newState = expectedState;
if (newState == null) newState = State.OFFLINE;
updateRegionState(hri, newState);
regionsInTransition.remove(regionName);
ServerName oldServerName = regionAssignments.remove(hri);

View File

@ -37,6 +37,7 @@ import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
import org.apache.hadoop.hbase.master.MasterFileSystem;
import org.apache.hadoop.hbase.master.MasterServices;
import org.apache.hadoop.hbase.master.RegionStates;
import org.apache.hadoop.hbase.master.RegionState.State;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Threads;
import org.apache.zookeeper.KeeperException;
@ -73,7 +74,7 @@ public class DeleteTableHandler extends TableEventHandler {
for (HRegionInfo region : regions) {
long done = System.currentTimeMillis() + waitTime;
while (System.currentTimeMillis() < done) {
if (states.isRegionFailedToOpen(region)) {
if (states.isRegionInState(region, State.FAILED_OPEN)) {
am.regionOffline(region);
}
if (!states.isRegionInTransition(region)) break;

View File

@ -39,6 +39,7 @@ import org.apache.hadoop.hbase.master.HMaster;
import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
import org.apache.hadoop.hbase.master.RegionStates;
import org.apache.hadoop.hbase.master.TableLockManager;
import org.apache.hadoop.hbase.master.RegionState.State;
import org.apache.hadoop.hbase.master.TableLockManager.TableLock;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.zookeeper.KeeperException;
@ -203,7 +204,9 @@ public class DisableTableHandler extends EventHandler {
RegionStates regionStates = assignmentManager.getRegionStates();
for (HRegionInfo region: regions) {
if (regionStates.isRegionInTransition(region)
&& !regionStates.isRegionFailedToClose(region)) continue;
&& !regionStates.isRegionInState(region, State.FAILED_CLOSE)) {
continue;
}
final HRegionInfo hri = region;
pool.execute(Trace.wrap(new Runnable() {
public void run() {

View File

@ -380,10 +380,12 @@ public class TestAssignmentManager {
RegionPlan plan = new RegionPlan(REGIONINFO, SERVERNAME_A, SERVERNAME_B);
am.balance(plan);
RegionStates regionStates = am.getRegionStates();
// Must be failed to close since the server is fake
assertTrue(am.getRegionStates().isRegionFailedToClose(REGIONINFO));
assertTrue(regionStates.isRegionInTransition(REGIONINFO)
&& regionStates.isRegionInState(REGIONINFO, State.FAILED_CLOSE));
// Move it back to pending_close
am.getRegionStates().updateRegionState(REGIONINFO, State.PENDING_CLOSE);
regionStates.updateRegionState(REGIONINFO, State.PENDING_CLOSE);
// Now fake the region closing successfully over on the regionserver; the
// regionserver will have set the region in CLOSED state. This will
@ -413,7 +415,7 @@ public class TestAssignmentManager {
ZKAssign.transitionNodeOpened(this.watcher, REGIONINFO, SERVERNAME_B, versionid);
assertNotSame(-1, versionid);
// Wait on the handler removing the OPENED znode.
while(am.getRegionStates().isRegionInTransition(REGIONINFO)) Threads.sleep(1);
while(regionStates.isRegionInTransition(REGIONINFO)) Threads.sleep(1);
} finally {
executor.shutdown();
am.shutdown();

View File

@ -26,6 +26,7 @@ import static org.junit.Assert.fail;
import java.io.IOException;
import java.util.List;
import org.apache.commons.lang.math.RandomUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileSystem;
@ -46,10 +47,14 @@ import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.exceptions.MergeRegionException;
import org.apache.hadoop.hbase.exceptions.UnknownRegionException;
import org.apache.hadoop.hbase.master.AssignmentManager;
import org.apache.hadoop.hbase.master.HMaster;
import org.apache.hadoop.hbase.master.RegionStates;
import org.apache.hadoop.hbase.master.RegionState.State;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.hbase.util.PairOfSameType;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
@ -98,7 +103,7 @@ public class TestRegionMergeTransactionOnCluster {
public static void afterAllTests() throws Exception {
TEST_UTIL.shutdownMiniCluster();
}
@Test
public void testWholesomeMerge() throws Exception {
LOG.info("Starting testWholesomeMerge");
@ -111,11 +116,31 @@ public class TestRegionMergeTransactionOnCluster {
INITIAL_REGION_NUM - 1);
// Merge 2nd and 3th region
mergeRegionsAndVerifyRegionNum(master, tableName, 1, 2,
PairOfSameType<HRegionInfo> mergedRegions =
mergeRegionsAndVerifyRegionNum(master, tableName, 1, 2,
INITIAL_REGION_NUM - 2);
verifyRowCount(table, ROWSIZE);
// Randomly choose one of the two merged regions
HRegionInfo hri = RandomUtils.nextBoolean() ?
mergedRegions.getFirst() : mergedRegions.getSecond();
MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
AssignmentManager am = cluster.getMaster().getAssignmentManager();
RegionStates regionStates = am.getRegionStates();
long start = EnvironmentEdgeManager.currentTimeMillis();
while (!regionStates.isRegionInState(hri, State.MERGED)) {
assertFalse("Timed out in waiting one merged region to be in state MERGED",
EnvironmentEdgeManager.currentTimeMillis() - start > 60000);
Thread.sleep(500);
}
// We should not be able to assign it again
am.assign(hri, true, true);
assertFalse("Merged region should not be in transition again",
regionStates.isRegionInTransition(hri)
&& regionStates.isRegionInState(hri, State.MERGED));
table.close();
}
@ -246,20 +271,27 @@ public class TestRegionMergeTransactionOnCluster {
}
}
private void mergeRegionsAndVerifyRegionNum(HMaster master, byte[] tablename,
private PairOfSameType<HRegionInfo> mergeRegionsAndVerifyRegionNum(
HMaster master, byte[] tablename,
int regionAnum, int regionBnum, int expectedRegionNum) throws Exception {
requestMergeRegion(master, tablename, regionAnum, regionBnum);
PairOfSameType<HRegionInfo> mergedRegions =
requestMergeRegion(master, tablename, regionAnum, regionBnum);
waitAndVerifyRegionNum(master, tablename, expectedRegionNum);
return mergedRegions;
}
private void requestMergeRegion(HMaster master, byte[] tablename,
private PairOfSameType<HRegionInfo> requestMergeRegion(
HMaster master, byte[] tablename,
int regionAnum, int regionBnum) throws Exception {
List<Pair<HRegionInfo, ServerName>> tableRegions = MetaReader
.getTableRegionsAndLocations(master.getCatalogTracker(),
Bytes.toString(tablename));
HRegionInfo regionA = tableRegions.get(regionAnum).getFirst();
HRegionInfo regionB = tableRegions.get(regionBnum).getFirst();
TEST_UTIL.getHBaseAdmin().mergeRegions(
tableRegions.get(regionAnum).getFirst().getEncodedNameAsBytes(),
tableRegions.get(regionBnum).getFirst().getEncodedNameAsBytes(), false);
regionA.getEncodedNameAsBytes(),
regionB.getEncodedNameAsBytes(), false);
return new PairOfSameType<HRegionInfo>(regionA, regionB);
}
private void waitAndVerifyRegionNum(HMaster master, byte[] tablename,

View File

@ -61,12 +61,15 @@ import org.apache.hadoop.hbase.exceptions.MasterNotRunningException;
import org.apache.hadoop.hbase.exceptions.UnknownRegionException;
import org.apache.hadoop.hbase.exceptions.ZooKeeperConnectionException;
import org.apache.hadoop.hbase.executor.EventType;
import org.apache.hadoop.hbase.master.AssignmentManager;
import org.apache.hadoop.hbase.master.HMaster;
import org.apache.hadoop.hbase.master.RegionState;
import org.apache.hadoop.hbase.master.RegionStates;
import org.apache.hadoop.hbase.master.RegionState.State;
import org.apache.hadoop.hbase.master.handler.SplitRegionHandler;
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.util.HBaseFsck;
import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
@ -811,6 +814,22 @@ public class TestSplitTransactionOnCluster {
FSUtils.getTableStoreFilePathMap(null, fs, rootDir, tableName);
assertEquals("Expected nothing but found " + storefilesAfter.toString(),
storefilesAfter.size(), 0);
hri = region.getRegionInfo(); // split parent
AssignmentManager am = cluster.getMaster().getAssignmentManager();
RegionStates regionStates = am.getRegionStates();
long start = EnvironmentEdgeManager.currentTimeMillis();
while (!regionStates.isRegionInState(hri, State.SPLIT)) {
assertFalse("Timed out in waiting split parent to be in state SPLIT",
EnvironmentEdgeManager.currentTimeMillis() - start > 60000);
Thread.sleep(500);
}
// We should not be able to assign it again
am.assign(hri, true, true);
assertFalse("Split region should not be in transition again",
regionStates.isRegionInTransition(hri)
&& regionStates.isRegionInState(hri, State.SPLIT));
} finally {
admin.setBalancerRunning(true, false);
cluster.getMaster().setCatalogJanitorEnabled(true);