HBASE-25130 - Fix master in-memory server holding map after: (#3402)
HBASE-25130 [branch-1] Masters in-memory serverHoldings map is not cleared during hbck repair Signed-off-by: Andrew Purtell <apurtell@apache.org>
This commit is contained in:
parent
b2f8ec993e
commit
395eb0c8e0
|
@ -1661,6 +1661,19 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
regionOffline(regionInfo, null);
|
regionOffline(regionInfo, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Marks the region as offline. In addition whether removing it from
|
||||||
|
* replicas and master in-memory server holding map.
|
||||||
|
* <p>
|
||||||
|
* @param regionInfo - region info.
|
||||||
|
* @param force - setting to true to force this region to be removed from replicas and master
|
||||||
|
* in-memory server holding map, to make this region not be re-opened on any other region
|
||||||
|
* servers. The only use case is hbck for now.
|
||||||
|
*/
|
||||||
|
public void regionOffline(final HRegionInfo regionInfo, boolean force) {
|
||||||
|
regionOffline(regionInfo, null, force);
|
||||||
|
}
|
||||||
|
|
||||||
public void offlineDisabledRegion(HRegionInfo regionInfo) {
|
public void offlineDisabledRegion(HRegionInfo regionInfo) {
|
||||||
if (useZKForAssignment) {
|
if (useZKForAssignment) {
|
||||||
// Disabling so should not be reassigned, just delete the CLOSED node
|
// Disabling so should not be reassigned, just delete the CLOSED node
|
||||||
|
@ -4551,13 +4564,20 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
|
|
||||||
public Map<String, AtomicInteger> getFailedOpenTracker() {return failedOpenTracker;}
|
public Map<String, AtomicInteger> getFailedOpenTracker() {return failedOpenTracker;}
|
||||||
|
|
||||||
|
private void regionOffline(final HRegionInfo regionInfo, final State state) {
|
||||||
|
regionOffline(regionInfo, state, false);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A region is offline. The new state should be the specified one,
|
* A region is offline. The new state should be the specified one,
|
||||||
* if not null. If the specified state is null, the new state is Offline.
|
* if not null. If the specified state is null, the new state is Offline.
|
||||||
* The specified state can be Split/Merged/Offline/null only.
|
* The specified state can be Split/Merged/Offline/null only.
|
||||||
|
*
|
||||||
|
* If region offline is initiated by rpc call from admin, we force offline it.
|
||||||
*/
|
*/
|
||||||
private void regionOffline(final HRegionInfo regionInfo, final State state) {
|
private void regionOffline(final HRegionInfo regionInfo, final State state,
|
||||||
regionStates.regionOffline(regionInfo, state);
|
final boolean force) {
|
||||||
|
regionStates.regionOffline(regionInfo, state, force);
|
||||||
removeClosedRegion(regionInfo);
|
removeClosedRegion(regionInfo);
|
||||||
// remove the region plan as well just in case.
|
// remove the region plan as well just in case.
|
||||||
clearRegionPlan(regionInfo);
|
clearRegionPlan(regionInfo);
|
||||||
|
@ -4566,7 +4586,7 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
// Tell our listeners that a region was closed
|
// Tell our listeners that a region was closed
|
||||||
sendRegionClosedNotification(regionInfo);
|
sendRegionClosedNotification(regionInfo);
|
||||||
// also note that all the replicas of the primary should be closed
|
// also note that all the replicas of the primary should be closed
|
||||||
if (state != null && state.equals(State.SPLIT)) {
|
if (force || (state != null && state.equals(State.SPLIT))) {
|
||||||
Collection<HRegionInfo> c = new ArrayList<HRegionInfo>(1);
|
Collection<HRegionInfo> c = new ArrayList<HRegionInfo>(1);
|
||||||
c.add(regionInfo);
|
c.add(regionInfo);
|
||||||
Map<ServerName, List<HRegionInfo>> map = regionStates.getRegionAssignments(c);
|
Map<ServerName, List<HRegionInfo>> map = regionStates.getRegionAssignments(c);
|
||||||
|
@ -4575,7 +4595,7 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
replicasToClose.addAll(list);
|
replicasToClose.addAll(list);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (state != null && state.equals(State.MERGED)) {
|
else if (force || (state != null && state.equals(State.MERGED))) {
|
||||||
Collection<HRegionInfo> c = new ArrayList<HRegionInfo>(1);
|
Collection<HRegionInfo> c = new ArrayList<HRegionInfo>(1);
|
||||||
c.add(regionInfo);
|
c.add(regionInfo);
|
||||||
Map<ServerName, List<HRegionInfo>> map = regionStates.getRegionAssignments(c);
|
Map<ServerName, List<HRegionInfo>> map = regionStates.getRegionAssignments(c);
|
||||||
|
|
|
@ -1401,7 +1401,7 @@ public class MasterRpcServices extends RSRpcServices
|
||||||
master.cpHost.preRegionOffline(hri);
|
master.cpHost.preRegionOffline(hri);
|
||||||
}
|
}
|
||||||
LOG.info(master.getClientIdAuditPrefix() + " offline " + hri.getRegionNameAsString());
|
LOG.info(master.getClientIdAuditPrefix() + " offline " + hri.getRegionNameAsString());
|
||||||
master.assignmentManager.regionOffline(hri);
|
master.assignmentManager.regionOffline(hri, true);
|
||||||
if (master.cpHost != null) {
|
if (master.cpHost != null) {
|
||||||
master.cpHost.postRegionOffline(hri);
|
master.cpHost.postRegionOffline(hri);
|
||||||
}
|
}
|
||||||
|
|
|
@ -669,7 +669,7 @@ public class RegionStates {
|
||||||
* A region is offline, won't be in transition any more.
|
* A region is offline, won't be in transition any more.
|
||||||
*/
|
*/
|
||||||
public void regionOffline(final HRegionInfo hri) {
|
public void regionOffline(final HRegionInfo hri) {
|
||||||
regionOffline(hri, null);
|
regionOffline(hri, null, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -678,7 +678,7 @@ public class RegionStates {
|
||||||
* Split/Merged/Offline/null(=Offline)/SplittingNew/MergingNew.
|
* Split/Merged/Offline/null(=Offline)/SplittingNew/MergingNew.
|
||||||
*/
|
*/
|
||||||
public void regionOffline(
|
public void regionOffline(
|
||||||
final HRegionInfo hri, final State expectedState) {
|
final HRegionInfo hri, final State expectedState, final boolean force) {
|
||||||
Preconditions.checkArgument(expectedState == null
|
Preconditions.checkArgument(expectedState == null
|
||||||
|| RegionState.isUnassignable(expectedState),
|
|| RegionState.isUnassignable(expectedState),
|
||||||
"Offlined region should not be " + expectedState);
|
"Offlined region should not be " + expectedState);
|
||||||
|
@ -713,9 +713,9 @@ public class RegionStates {
|
||||||
regionsInTransition.remove(encodedName);
|
regionsInTransition.remove(encodedName);
|
||||||
ServerName oldServerName = regionAssignments.remove(hri);
|
ServerName oldServerName = regionAssignments.remove(hri);
|
||||||
if (oldServerName != null && serverHoldings.containsKey(oldServerName)) {
|
if (oldServerName != null && serverHoldings.containsKey(oldServerName)) {
|
||||||
if (newState == State.MERGED || newState == State.SPLIT
|
if (force || (newState == State.MERGED || newState == State.SPLIT
|
||||||
|| hri.isMetaRegion() || tableStateManager.isTableState(hri.getTable(),
|
|| hri.isMetaRegion() || tableStateManager.isTableState(hri.getTable(),
|
||||||
TableState.State.DISABLED, TableState.State.DISABLING)) {
|
TableState.State.DISABLED, TableState.State.DISABLING))) {
|
||||||
// Offline the region only if it's merged/split, or the table is disabled/disabling.
|
// Offline the region only if it's merged/split, or the table is disabled/disabling.
|
||||||
// Otherwise, offline it from this server only when it is online on a different server.
|
// Otherwise, offline it from this server only when it is online on a different server.
|
||||||
LOG.info("Offlined " + hri.getShortNameToLog() + " from " + oldServerName);
|
LOG.info("Offlined " + hri.getShortNameToLog() + " from " + oldServerName);
|
||||||
|
|
|
@ -882,6 +882,76 @@ public class TestHBaseFsck {
|
||||||
assertNoErrors(hbck2);
|
assertNoErrors(hbck2);
|
||||||
assertEquals(0, hbck2.getOverlapGroups(table).size());
|
assertEquals(0, hbck2.getOverlapGroups(table).size());
|
||||||
assertEquals(ROWKEYS.length, countRows());
|
assertEquals(ROWKEYS.length, countRows());
|
||||||
|
|
||||||
|
MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
|
||||||
|
long totalRegions = cluster.countServedRegions();
|
||||||
|
|
||||||
|
// stop a region servers and run fsck again
|
||||||
|
cluster.stopRegionServer(server);
|
||||||
|
cluster.waitForRegionServerToStop(server, 60);
|
||||||
|
|
||||||
|
// wait for all regions to come online.
|
||||||
|
while (cluster.countServedRegions() < totalRegions) {
|
||||||
|
Thread.sleep(100);
|
||||||
|
}
|
||||||
|
|
||||||
|
// check again after stopping a region server.
|
||||||
|
HBaseFsck hbck3 = doFsck(conf,false);
|
||||||
|
assertNoErrors(hbck3);
|
||||||
|
} finally {
|
||||||
|
cleanupTable(table);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This create and fixes a bad table with regions that have overlap regions.
|
||||||
|
*/
|
||||||
|
@Test(timeout=180000)
|
||||||
|
public void testOverlapRegions() throws Exception {
|
||||||
|
MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
|
||||||
|
TableName table =
|
||||||
|
TableName.valueOf("tableOverlapRegions");
|
||||||
|
HRegionInfo hri;
|
||||||
|
ServerName server;
|
||||||
|
try {
|
||||||
|
setupTable(table);
|
||||||
|
assertNoErrors(doFsck(conf, false));
|
||||||
|
assertEquals(ROWKEYS.length, countRows());
|
||||||
|
|
||||||
|
// Now let's mess it up, by adding a region which overlaps with others
|
||||||
|
hri = createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A2"), Bytes.toBytes("B2"));
|
||||||
|
TEST_UTIL.assignRegion(hri);
|
||||||
|
server = regionStates.getRegionServerOfRegion(hri);
|
||||||
|
TEST_UTIL.assertRegionOnServer(hri, server, REGION_ONLINE_TIMEOUT);
|
||||||
|
|
||||||
|
HBaseFsck hbck = doFsck(conf, false);
|
||||||
|
assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
|
||||||
|
ERROR_CODE.OVERLAP_IN_REGION_CHAIN });
|
||||||
|
assertEquals(3, hbck.getOverlapGroups(table).size());
|
||||||
|
assertEquals(ROWKEYS.length, countRows());
|
||||||
|
|
||||||
|
// fix the overlap regions.
|
||||||
|
doFsck(conf, true);
|
||||||
|
|
||||||
|
// check that the overlap regions are gone and no data loss
|
||||||
|
HBaseFsck hbck2 = doFsck(conf,false);
|
||||||
|
assertNoErrors(hbck2);
|
||||||
|
assertEquals(0, hbck2.getOverlapGroups(table).size());
|
||||||
|
assertEquals(ROWKEYS.length, countRows());
|
||||||
|
|
||||||
|
long totalRegions = cluster.countServedRegions();
|
||||||
|
|
||||||
|
// stop a region servers and run fsck again
|
||||||
|
cluster.stopRegionServer(server);
|
||||||
|
cluster.waitForRegionServerToStop(server, 60);
|
||||||
|
|
||||||
|
// wait for all regions to come online.
|
||||||
|
while (cluster.countServedRegions() < totalRegions) {
|
||||||
|
Thread.sleep(100);
|
||||||
|
}
|
||||||
|
|
||||||
|
HBaseFsck hbck3 = doFsck(conf,false);
|
||||||
|
assertNoErrors(hbck3);
|
||||||
} finally {
|
} finally {
|
||||||
cleanupTable(table);
|
cleanupTable(table);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue