HBASE-9052 Prevent split/merged region from assigning again
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1508247 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
417593e3aa
commit
fd991fe908
|
@ -126,6 +126,10 @@ public class RegionState implements org.apache.hadoop.io.Writable {
|
|||
return state == State.SPLITTING;
|
||||
}
|
||||
|
||||
public boolean isSplit() {
|
||||
return state == State.SPLIT;
|
||||
}
|
||||
|
||||
public boolean isFailedOpen() {
|
||||
return state == State.FAILED_OPEN;
|
||||
}
|
||||
|
@ -138,6 +142,10 @@ public class RegionState implements org.apache.hadoop.io.Writable {
|
|||
return state == State.MERGING;
|
||||
}
|
||||
|
||||
public boolean isMerged() {
|
||||
return state == State.MERGED;
|
||||
}
|
||||
|
||||
public boolean isOpenOrMergingOnServer(final ServerName sn) {
|
||||
return isOnServer(sn) && (isOpened() || isMerging());
|
||||
}
|
||||
|
|
|
@ -62,6 +62,7 @@ import org.apache.hadoop.hbase.exceptions.TableNotFoundException;
|
|||
import org.apache.hadoop.hbase.executor.EventHandler;
|
||||
import org.apache.hadoop.hbase.executor.EventType;
|
||||
import org.apache.hadoop.hbase.executor.ExecutorService;
|
||||
import org.apache.hadoop.hbase.master.RegionState.State;
|
||||
import org.apache.hadoop.hbase.master.balancer.FavoredNodeAssignmentHelper;
|
||||
import org.apache.hadoop.hbase.master.balancer.FavoredNodeLoadBalancer;
|
||||
import org.apache.hadoop.hbase.master.handler.ClosedRegionHandler;
|
||||
|
@ -1415,10 +1416,7 @@ public class AssignmentManager extends ZooKeeperListener {
|
|||
* @param regionInfo
|
||||
*/
|
||||
public void regionOffline(final HRegionInfo regionInfo) {
|
||||
regionStates.regionOffline(regionInfo);
|
||||
removeClosedRegion(regionInfo);
|
||||
// remove the region plan as well just in case.
|
||||
clearRegionPlan(regionInfo);
|
||||
regionOffline(regionInfo, null);
|
||||
}
|
||||
|
||||
public void offlineDisabledRegion(HRegionInfo regionInfo) {
|
||||
|
@ -2355,7 +2353,7 @@ public class AssignmentManager extends ZooKeeperListener {
|
|||
public boolean waitForAssignment(HRegionInfo regionInfo)
|
||||
throws InterruptedException {
|
||||
while (!regionStates.isRegionAssigned(regionInfo)) {
|
||||
if (regionStates.isRegionFailedToOpen(regionInfo)
|
||||
if (regionStates.isRegionInState(regionInfo, State.FAILED_OPEN)
|
||||
|| this.server.isStopped()) {
|
||||
return false;
|
||||
}
|
||||
|
@ -3102,7 +3100,7 @@ public class AssignmentManager extends ZooKeeperListener {
|
|||
*/
|
||||
public void handleSplitReport(final ServerName sn, final HRegionInfo parent,
|
||||
final HRegionInfo a, final HRegionInfo b) {
|
||||
regionOffline(parent);
|
||||
regionOffline(parent, State.SPLIT);
|
||||
regionOnline(a, sn);
|
||||
regionOnline(b, sn);
|
||||
|
||||
|
@ -3126,8 +3124,8 @@ public class AssignmentManager extends ZooKeeperListener {
|
|||
*/
|
||||
public void handleRegionsMergeReport(final ServerName sn,
|
||||
final HRegionInfo merged, final HRegionInfo a, final HRegionInfo b) {
|
||||
regionOffline(a);
|
||||
regionOffline(b);
|
||||
regionOffline(a, State.MERGED);
|
||||
regionOffline(b, State.MERGED);
|
||||
regionOnline(merged, sn);
|
||||
|
||||
// There's a possibility that the region was merging while a user asked
|
||||
|
@ -3238,4 +3236,16 @@ public class AssignmentManager extends ZooKeeperListener {
|
|||
regionStates.updateRegionState(merging_b, RegionState.State.MERGING);
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* A region is offline. The new state should be the specified one,
|
||||
* if not null. If the specified state is null, the new state is Offline.
|
||||
* The specified state can be Split/Merged/Offline/null only.
|
||||
*/
|
||||
private void regionOffline(final HRegionInfo regionInfo, final State state) {
|
||||
regionStates.regionOffline(regionInfo, state);
|
||||
removeClosedRegion(regionInfo);
|
||||
// remove the region plan as well just in case.
|
||||
clearRegionPlan(regionInfo);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -42,6 +42,8 @@ import org.apache.hadoop.hbase.zookeeper.ZKAssign;
|
|||
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
|
||||
import org.apache.zookeeper.KeeperException;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
|
||||
/**
|
||||
* Region state accountant. It holds the states of all regions in the memory.
|
||||
* In normal scenario, it should match the meta table and the true region states.
|
||||
|
@ -136,21 +138,13 @@ public class RegionStates {
|
|||
}
|
||||
|
||||
/**
|
||||
* @return True if specified region failed to open.
|
||||
* @return True if specified region is in specified state
|
||||
*/
|
||||
public synchronized boolean isRegionFailedToOpen(final HRegionInfo hri) {
|
||||
RegionState regionState = getRegionTransitionState(hri);
|
||||
State state = regionState != null ? regionState.getState() : null;
|
||||
return state == State.FAILED_OPEN;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return True if specified region failed to close.
|
||||
*/
|
||||
public synchronized boolean isRegionFailedToClose(final HRegionInfo hri) {
|
||||
RegionState regionState = getRegionTransitionState(hri);
|
||||
State state = regionState != null ? regionState.getState() : null;
|
||||
return state == State.FAILED_CLOSE;
|
||||
public synchronized boolean isRegionInState(
|
||||
final HRegionInfo hri, final State state) {
|
||||
RegionState regionState = getRegionState(hri);
|
||||
State s = regionState != null ? regionState.getState() : null;
|
||||
return s == state;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -315,20 +309,42 @@ public class RegionStates {
|
|||
/**
|
||||
* A region is offline, won't be in transition any more.
|
||||
*/
|
||||
public synchronized void regionOffline(final HRegionInfo hri) {
|
||||
public void regionOffline(final HRegionInfo hri) {
|
||||
regionOffline(hri, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* A region is offline, won't be in transition any more.
|
||||
* Its state should be the specified expected state, which
|
||||
* can be Split/Merged/Offline/null(=Offline) only.
|
||||
*/
|
||||
public synchronized void regionOffline(
|
||||
final HRegionInfo hri, final State expectedState) {
|
||||
Preconditions.checkArgument(expectedState == null
|
||||
|| expectedState == State.OFFLINE || expectedState == State.SPLIT
|
||||
|| expectedState == State.MERGED, "Offlined region should be in state"
|
||||
+ " OFFLINE/SPLIT/MERGED instead of " + expectedState);
|
||||
String regionName = hri.getEncodedName();
|
||||
RegionState oldState = regionStates.get(regionName);
|
||||
if (oldState == null) {
|
||||
LOG.warn("Offline a region not in RegionStates: " + hri.getShortNameToLog());
|
||||
} else {
|
||||
} else if (LOG.isDebugEnabled()) {
|
||||
State state = oldState.getState();
|
||||
ServerName sn = oldState.getServerName();
|
||||
if (state != State.OFFLINE || sn != null) {
|
||||
LOG.debug("Offline a region " + hri.getShortNameToLog() + " with current state=" + state +
|
||||
", expected state=OFFLINE" + ", assigned to server: " + sn + ", expected null");
|
||||
if (state != State.OFFLINE
|
||||
&& state != State.SPLITTING && state != State.MERGING) {
|
||||
LOG.debug("Offline a region " + hri.getShortNameToLog() + " with current state="
|
||||
+ state + ", expected state=OFFLINE/SPLITTING/MERGING");
|
||||
}
|
||||
if (sn != null && state == State.OFFLINE) {
|
||||
LOG.debug("Offline a region " + hri.getShortNameToLog()
|
||||
+ " with current state=OFFLINE, assigned to server: "
|
||||
+ sn + ", expected null");
|
||||
}
|
||||
}
|
||||
updateRegionState(hri, State.OFFLINE);
|
||||
State newState = expectedState;
|
||||
if (newState == null) newState = State.OFFLINE;
|
||||
updateRegionState(hri, newState);
|
||||
regionsInTransition.remove(regionName);
|
||||
|
||||
ServerName oldServerName = regionAssignments.remove(hri);
|
||||
|
|
|
@ -37,6 +37,7 @@ import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
|
|||
import org.apache.hadoop.hbase.master.MasterFileSystem;
|
||||
import org.apache.hadoop.hbase.master.MasterServices;
|
||||
import org.apache.hadoop.hbase.master.RegionStates;
|
||||
import org.apache.hadoop.hbase.master.RegionState.State;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.hbase.util.Threads;
|
||||
import org.apache.zookeeper.KeeperException;
|
||||
|
@ -73,7 +74,7 @@ public class DeleteTableHandler extends TableEventHandler {
|
|||
for (HRegionInfo region : regions) {
|
||||
long done = System.currentTimeMillis() + waitTime;
|
||||
while (System.currentTimeMillis() < done) {
|
||||
if (states.isRegionFailedToOpen(region)) {
|
||||
if (states.isRegionInState(region, State.FAILED_OPEN)) {
|
||||
am.regionOffline(region);
|
||||
}
|
||||
if (!states.isRegionInTransition(region)) break;
|
||||
|
|
|
@ -39,6 +39,7 @@ import org.apache.hadoop.hbase.master.HMaster;
|
|||
import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
|
||||
import org.apache.hadoop.hbase.master.RegionStates;
|
||||
import org.apache.hadoop.hbase.master.TableLockManager;
|
||||
import org.apache.hadoop.hbase.master.RegionState.State;
|
||||
import org.apache.hadoop.hbase.master.TableLockManager.TableLock;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.zookeeper.KeeperException;
|
||||
|
@ -203,7 +204,9 @@ public class DisableTableHandler extends EventHandler {
|
|||
RegionStates regionStates = assignmentManager.getRegionStates();
|
||||
for (HRegionInfo region: regions) {
|
||||
if (regionStates.isRegionInTransition(region)
|
||||
&& !regionStates.isRegionFailedToClose(region)) continue;
|
||||
&& !regionStates.isRegionInState(region, State.FAILED_CLOSE)) {
|
||||
continue;
|
||||
}
|
||||
final HRegionInfo hri = region;
|
||||
pool.execute(Trace.wrap(new Runnable() {
|
||||
public void run() {
|
||||
|
|
|
@ -380,10 +380,12 @@ public class TestAssignmentManager {
|
|||
RegionPlan plan = new RegionPlan(REGIONINFO, SERVERNAME_A, SERVERNAME_B);
|
||||
am.balance(plan);
|
||||
|
||||
RegionStates regionStates = am.getRegionStates();
|
||||
// Must be failed to close since the server is fake
|
||||
assertTrue(am.getRegionStates().isRegionFailedToClose(REGIONINFO));
|
||||
assertTrue(regionStates.isRegionInTransition(REGIONINFO)
|
||||
&& regionStates.isRegionInState(REGIONINFO, State.FAILED_CLOSE));
|
||||
// Move it back to pending_close
|
||||
am.getRegionStates().updateRegionState(REGIONINFO, State.PENDING_CLOSE);
|
||||
regionStates.updateRegionState(REGIONINFO, State.PENDING_CLOSE);
|
||||
|
||||
// Now fake the region closing successfully over on the regionserver; the
|
||||
// regionserver will have set the region in CLOSED state. This will
|
||||
|
@ -413,7 +415,7 @@ public class TestAssignmentManager {
|
|||
ZKAssign.transitionNodeOpened(this.watcher, REGIONINFO, SERVERNAME_B, versionid);
|
||||
assertNotSame(-1, versionid);
|
||||
// Wait on the handler removing the OPENED znode.
|
||||
while(am.getRegionStates().isRegionInTransition(REGIONINFO)) Threads.sleep(1);
|
||||
while(regionStates.isRegionInTransition(REGIONINFO)) Threads.sleep(1);
|
||||
} finally {
|
||||
executor.shutdown();
|
||||
am.shutdown();
|
||||
|
|
|
@ -26,6 +26,7 @@ import static org.junit.Assert.fail;
|
|||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.lang.math.RandomUtils;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
|
@ -46,10 +47,14 @@ import org.apache.hadoop.hbase.client.ResultScanner;
|
|||
import org.apache.hadoop.hbase.client.Scan;
|
||||
import org.apache.hadoop.hbase.exceptions.MergeRegionException;
|
||||
import org.apache.hadoop.hbase.exceptions.UnknownRegionException;
|
||||
import org.apache.hadoop.hbase.master.AssignmentManager;
|
||||
import org.apache.hadoop.hbase.master.HMaster;
|
||||
import org.apache.hadoop.hbase.master.RegionStates;
|
||||
import org.apache.hadoop.hbase.master.RegionState.State;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
|
||||
import org.apache.hadoop.hbase.util.Pair;
|
||||
import org.apache.hadoop.hbase.util.PairOfSameType;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
@ -98,7 +103,7 @@ public class TestRegionMergeTransactionOnCluster {
|
|||
public static void afterAllTests() throws Exception {
|
||||
TEST_UTIL.shutdownMiniCluster();
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testWholesomeMerge() throws Exception {
|
||||
LOG.info("Starting testWholesomeMerge");
|
||||
|
@ -111,11 +116,31 @@ public class TestRegionMergeTransactionOnCluster {
|
|||
INITIAL_REGION_NUM - 1);
|
||||
|
||||
// Merge 2nd and 3th region
|
||||
mergeRegionsAndVerifyRegionNum(master, tableName, 1, 2,
|
||||
PairOfSameType<HRegionInfo> mergedRegions =
|
||||
mergeRegionsAndVerifyRegionNum(master, tableName, 1, 2,
|
||||
INITIAL_REGION_NUM - 2);
|
||||
|
||||
verifyRowCount(table, ROWSIZE);
|
||||
|
||||
// Randomly choose one of the two merged regions
|
||||
HRegionInfo hri = RandomUtils.nextBoolean() ?
|
||||
mergedRegions.getFirst() : mergedRegions.getSecond();
|
||||
MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
|
||||
AssignmentManager am = cluster.getMaster().getAssignmentManager();
|
||||
RegionStates regionStates = am.getRegionStates();
|
||||
long start = EnvironmentEdgeManager.currentTimeMillis();
|
||||
while (!regionStates.isRegionInState(hri, State.MERGED)) {
|
||||
assertFalse("Timed out in waiting one merged region to be in state MERGED",
|
||||
EnvironmentEdgeManager.currentTimeMillis() - start > 60000);
|
||||
Thread.sleep(500);
|
||||
}
|
||||
|
||||
// We should not be able to assign it again
|
||||
am.assign(hri, true, true);
|
||||
assertFalse("Merged region should not be in transition again",
|
||||
regionStates.isRegionInTransition(hri)
|
||||
&& regionStates.isRegionInState(hri, State.MERGED));
|
||||
|
||||
table.close();
|
||||
}
|
||||
|
||||
|
@ -246,20 +271,27 @@ public class TestRegionMergeTransactionOnCluster {
|
|||
}
|
||||
}
|
||||
|
||||
private void mergeRegionsAndVerifyRegionNum(HMaster master, byte[] tablename,
|
||||
private PairOfSameType<HRegionInfo> mergeRegionsAndVerifyRegionNum(
|
||||
HMaster master, byte[] tablename,
|
||||
int regionAnum, int regionBnum, int expectedRegionNum) throws Exception {
|
||||
requestMergeRegion(master, tablename, regionAnum, regionBnum);
|
||||
PairOfSameType<HRegionInfo> mergedRegions =
|
||||
requestMergeRegion(master, tablename, regionAnum, regionBnum);
|
||||
waitAndVerifyRegionNum(master, tablename, expectedRegionNum);
|
||||
return mergedRegions;
|
||||
}
|
||||
|
||||
private void requestMergeRegion(HMaster master, byte[] tablename,
|
||||
private PairOfSameType<HRegionInfo> requestMergeRegion(
|
||||
HMaster master, byte[] tablename,
|
||||
int regionAnum, int regionBnum) throws Exception {
|
||||
List<Pair<HRegionInfo, ServerName>> tableRegions = MetaReader
|
||||
.getTableRegionsAndLocations(master.getCatalogTracker(),
|
||||
Bytes.toString(tablename));
|
||||
HRegionInfo regionA = tableRegions.get(regionAnum).getFirst();
|
||||
HRegionInfo regionB = tableRegions.get(regionBnum).getFirst();
|
||||
TEST_UTIL.getHBaseAdmin().mergeRegions(
|
||||
tableRegions.get(regionAnum).getFirst().getEncodedNameAsBytes(),
|
||||
tableRegions.get(regionBnum).getFirst().getEncodedNameAsBytes(), false);
|
||||
regionA.getEncodedNameAsBytes(),
|
||||
regionB.getEncodedNameAsBytes(), false);
|
||||
return new PairOfSameType<HRegionInfo>(regionA, regionB);
|
||||
}
|
||||
|
||||
private void waitAndVerifyRegionNum(HMaster master, byte[] tablename,
|
||||
|
|
|
@ -61,12 +61,15 @@ import org.apache.hadoop.hbase.exceptions.MasterNotRunningException;
|
|||
import org.apache.hadoop.hbase.exceptions.UnknownRegionException;
|
||||
import org.apache.hadoop.hbase.exceptions.ZooKeeperConnectionException;
|
||||
import org.apache.hadoop.hbase.executor.EventType;
|
||||
import org.apache.hadoop.hbase.master.AssignmentManager;
|
||||
import org.apache.hadoop.hbase.master.HMaster;
|
||||
import org.apache.hadoop.hbase.master.RegionState;
|
||||
import org.apache.hadoop.hbase.master.RegionStates;
|
||||
import org.apache.hadoop.hbase.master.RegionState.State;
|
||||
import org.apache.hadoop.hbase.master.handler.SplitRegionHandler;
|
||||
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
|
||||
import org.apache.hadoop.hbase.util.FSUtils;
|
||||
import org.apache.hadoop.hbase.util.HBaseFsck;
|
||||
import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
|
||||
|
@ -811,6 +814,22 @@ public class TestSplitTransactionOnCluster {
|
|||
FSUtils.getTableStoreFilePathMap(null, fs, rootDir, tableName);
|
||||
assertEquals("Expected nothing but found " + storefilesAfter.toString(),
|
||||
storefilesAfter.size(), 0);
|
||||
|
||||
hri = region.getRegionInfo(); // split parent
|
||||
AssignmentManager am = cluster.getMaster().getAssignmentManager();
|
||||
RegionStates regionStates = am.getRegionStates();
|
||||
long start = EnvironmentEdgeManager.currentTimeMillis();
|
||||
while (!regionStates.isRegionInState(hri, State.SPLIT)) {
|
||||
assertFalse("Timed out in waiting split parent to be in state SPLIT",
|
||||
EnvironmentEdgeManager.currentTimeMillis() - start > 60000);
|
||||
Thread.sleep(500);
|
||||
}
|
||||
|
||||
// We should not be able to assign it again
|
||||
am.assign(hri, true, true);
|
||||
assertFalse("Split region should not be in transition again",
|
||||
regionStates.isRegionInTransition(hri)
|
||||
&& regionStates.isRegionInState(hri, State.SPLIT));
|
||||
} finally {
|
||||
admin.setBalancerRunning(true, false);
|
||||
cluster.getMaster().setCatalogJanitorEnabled(true);
|
||||
|
|
Loading…
Reference in New Issue