HBASE-23369 Auto-close 'unknown' Regions reported as OPEN on RegionServers

Master force-closes unknown/incorrect Regions OPEN on RS

M hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java
 Added a note and small refactor.

M hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java
 Fix an NPE when CJ ran.

M hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
 Minor clean up of log message; make it clearer.

M hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
 Make it so closeRegionSilentlyAndWait can be used w/o timeout.

M hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
 If a RegionServer Report notes a Region is OPEN and the Master does not
 know of said Region, close it (We used to crash out the RegionServer)

M hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateNode.java
 Minor tweak of toString -- label should be state, not rit (confusing).

M hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java
 Doc.

M hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/TransitRegionStateProcedure.java
 Add region name to exception.

M hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/HBCKServerCrashProcedure.java
 Be more careful about which Regions we queue up for reassign. This
 procedure is run by the operator so could happen at any time. We
 will likely be running this when Master has some accounting of
 cluster members so check its answers for what Regions were on
 server before running.

M hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
 Doc and we were misrepresenting the case where a Region as not in RIT
 when we got CLOSE -- we were reporting it as though it was already
 trying to CLOSE.
This commit is contained in:
stack 2019-12-03 16:26:17 -08:00
parent 85ecdd10a9
commit d75a7001b1
9 changed files with 66 additions and 25 deletions

View File

@ -320,6 +320,7 @@ public class MetaTableAccessor {
* is stored in the name, so the returned object should only be used for the fields * is stored in the name, so the returned object should only be used for the fields
* in the regionName. * in the regionName.
*/ */
// This should be moved to RegionInfo? TODO.
public static RegionInfo parseRegionInfoFromRegionName(byte[] regionName) throws IOException { public static RegionInfo parseRegionInfoFromRegionName(byte[] regionName) throws IOException {
byte[][] fields = RegionInfo.parseRegionName(regionName); byte[][] fields = RegionInfo.parseRegionName(regionName);
long regionId = Long.parseLong(Bytes.toString(fields[2])); long regionId = Long.parseLong(Bytes.toString(fields[2]));
@ -1249,9 +1250,7 @@ public class MetaTableAccessor {
* Generates and returns a Put containing the region into for the catalog table * Generates and returns a Put containing the region into for the catalog table
*/ */
public static Put makePutFromRegionInfo(RegionInfo regionInfo, long ts) throws IOException { public static Put makePutFromRegionInfo(RegionInfo regionInfo, long ts) throws IOException {
Put put = new Put(regionInfo.getRegionName(), ts); return addRegionInfo(new Put(regionInfo.getRegionName(), ts), regionInfo);
addRegionInfo(put, regionInfo);
return put;
} }
/** /**

View File

@ -735,7 +735,7 @@ public class CatalogJanitor extends ScheduledChore {
continue; continue;
} }
RegionState rs = this.services.getAssignmentManager().getRegionStates().getRegionState(ri); RegionState rs = this.services.getAssignmentManager().getRegionStates().getRegionState(ri);
if (rs.isClosedOrAbnormallyClosed()) { if (rs == null || rs.isClosedOrAbnormallyClosed()) {
// If closed against an 'Unknown Server', that is should be fine. // If closed against an 'Unknown Server', that is should be fine.
continue; continue;
} }

View File

@ -1855,7 +1855,7 @@ public class HMaster extends HRegionServer implements MasterServices {
} catch (HBaseIOException hioe) { } catch (HBaseIOException hioe) {
//should ignore failed plans here, avoiding the whole balance plans be aborted //should ignore failed plans here, avoiding the whole balance plans be aborted
//later calls of balance() can fetch up the failed and skipped plans //later calls of balance() can fetch up the failed and skipped plans
LOG.warn("Failed balance plan: {}, just skip it", plan, hioe); LOG.warn("Failed balance plan {}, skipping...", plan, hioe);
} }
//rpCount records balance plans processed, does not care if a plan succeeds //rpCount records balance plans processed, does not care if a plan succeeds
rpCount++; rpCount++;

View File

@ -675,8 +675,9 @@ public class ServerManager {
} }
/** /**
* Contacts a region server and waits up to timeout ms to close the region. This bypasses the * Contacts a region server and waits up to timeout ms
* active hmaster. * to close the region. This bypasses the active hmaster.
* Pass -1 as timeout if you do not want to wait on result.
*/ */
public static void closeRegionSilentlyAndWait(AsyncClusterConnection connection, public static void closeRegionSilentlyAndWait(AsyncClusterConnection connection,
ServerName server, RegionInfo region, long timeout) throws IOException, InterruptedException { ServerName server, RegionInfo region, long timeout) throws IOException, InterruptedException {
@ -687,6 +688,9 @@ public class ServerManager {
} catch (IOException e) { } catch (IOException e) {
LOG.warn("Exception when closing region: " + region.getRegionNameAsString(), e); LOG.warn("Exception when closing region: " + region.getRegionNameAsString(), e);
} }
if (timeout < 0) {
return;
}
long expiration = timeout + System.currentTimeMillis(); long expiration = timeout + System.currentTimeMillis();
while (System.currentTimeMillis() < expiration) { while (System.currentTimeMillis() < expiration) {
try { try {

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -36,6 +36,7 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.DoNotRetryIOException; import org.apache.hadoop.hbase.DoNotRetryIOException;
import org.apache.hadoop.hbase.HBaseIOException; import org.apache.hadoop.hbase.HBaseIOException;
import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.MetaTableAccessor;
import org.apache.hadoop.hbase.PleaseHoldException; import org.apache.hadoop.hbase.PleaseHoldException;
import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableName;
@ -629,7 +630,8 @@ public class AssignmentManager {
ServerName targetServer) throws HBaseIOException { ServerName targetServer) throws HBaseIOException {
RegionStateNode regionNode = this.regionStates.getRegionStateNode(regionInfo); RegionStateNode regionNode = this.regionStates.getRegionStateNode(regionInfo);
if (regionNode == null) { if (regionNode == null) {
throw new UnknownRegionException("No RegionState found for " + regionInfo.getEncodedName()); throw new UnknownRegionException("No RegionStateNode found for " +
regionInfo.getEncodedName() + "(Closed/Deleted?)");
} }
TransitRegionStateProcedure proc; TransitRegionStateProcedure proc;
regionNode.lock(); regionNode.lock();
@ -944,7 +946,7 @@ public class AssignmentManager {
if (regionNode == null) { if (regionNode == null) {
// the table/region is gone. maybe a delete, split, merge // the table/region is gone. maybe a delete, split, merge
throw new UnexpectedStateException(String.format( throw new UnexpectedStateException(String.format(
"Server %s was trying to transition region %s to %s. but the region was removed.", "Server %s was trying to transition region %s to %s. but Region is not known.",
serverName, regionInfo, state)); serverName, regionInfo, state));
} }
LOG.trace("Update region transition serverName={} region={} regionState={}", serverName, LOG.trace("Update region transition serverName={} region={} regionState={}", serverName,
@ -966,7 +968,8 @@ public class AssignmentManager {
state.equals(TransitionCode.CLOSED)) { state.equals(TransitionCode.CLOSED)) {
LOG.info("RegionServer {} {}", state, regionNode.getRegionInfo().getEncodedName()); LOG.info("RegionServer {} {}", state, regionNode.getRegionInfo().getEncodedName());
} else { } else {
LOG.warn("No matching procedure found for {} transition to {}", regionNode, state); LOG.warn("No matching procedure found for {} transition on {} to {}",
serverName, regionNode, state);
} }
} }
} finally { } finally {
@ -1092,7 +1095,27 @@ public class AssignmentManager {
checkOnlineRegionsReport(serverNode, regionNames); checkOnlineRegionsReport(serverNode, regionNames);
} }
// just check and output possible inconsistency, without actually doing anything /**
* Close <code>regionName</code> on <code>sn</code> silently and immediately without
* using a Procedure or going via hbase:meta. For case where a RegionServer's hosting
* of a Region is not aligned w/ the Master's accounting of Region state. This is for
* cleaning up an error in accounting.
*/
private void closeRegionSilently(ServerName sn, byte [] regionName) {
try {
RegionInfo ri = MetaTableAccessor.parseRegionInfoFromRegionName(regionName);
// Pass -1 for timeout. Means do not wait.
ServerManager.closeRegionSilentlyAndWait(this.master.getClusterConnection(), sn, ri, -1);
} catch (Exception e) {
LOG.error("Failed trying to close {} on {}", Bytes.toStringBinary(regionName), sn, e);
}
}
/**
* Check that what the RegionServer reports aligns with the Master's image.
* If disagreement, we will tell the RegionServer to expediently close
* a Region we do not think it should have.
*/
private void checkOnlineRegionsReport(ServerStateNode serverNode, Set<byte[]> regionNames) { private void checkOnlineRegionsReport(ServerStateNode serverNode, Set<byte[]> regionNames) {
ServerName serverName = serverNode.getServerName(); ServerName serverName = serverNode.getServerName();
for (byte[] regionName : regionNames) { for (byte[] regionName : regionNames) {
@ -1101,10 +1124,13 @@ public class AssignmentManager {
} }
RegionStateNode regionNode = regionStates.getRegionStateNodeFromName(regionName); RegionStateNode regionNode = regionStates.getRegionStateNodeFromName(regionName);
if (regionNode == null) { if (regionNode == null) {
LOG.warn("No region state node for {}, it should already be on {}", String regionNameAsStr = Bytes.toStringBinary(regionName);
Bytes.toStringBinary(regionName), serverName); LOG.warn("No RegionStateNode for {} but reported as up on {}; closing...",
regionNameAsStr, serverName);
closeRegionSilently(serverNode.getServerName(), regionName);
continue; continue;
} }
final long lag = 1000;
regionNode.lock(); regionNode.lock();
try { try {
long diff = EnvironmentEdgeManager.currentTime() - regionNode.getLastUpdate(); long diff = EnvironmentEdgeManager.currentTime() - regionNode.getLastUpdate();
@ -1112,17 +1138,19 @@ public class AssignmentManager {
// This is possible as a region server has just closed a region but the region server // This is possible as a region server has just closed a region but the region server
// report is generated before the closing, but arrive after the closing. Make sure there // report is generated before the closing, but arrive after the closing. Make sure there
// is some elapsed time so less false alarms. // is some elapsed time so less false alarms.
if (!regionNode.getRegionLocation().equals(serverName) && diff > 1000) { if (!regionNode.getRegionLocation().equals(serverName) && diff > lag) {
LOG.warn("{} reported OPEN on server={} but state has otherwise", regionNode, LOG.warn("Reporting {} server does not match {} (time since last " +
serverName); "update={}ms); closing...",
serverName, regionNode, diff);
closeRegionSilently(serverNode.getServerName(), regionName);
} }
} else if (!regionNode.isInState(State.CLOSING, State.SPLITTING)) { } else if (!regionNode.isInState(State.CLOSING, State.SPLITTING)) {
// So, we can get report that a region is CLOSED or SPLIT because a heartbeat // So, we can get report that a region is CLOSED or SPLIT because a heartbeat
// came in at about same time as a region transition. Make sure there is some // came in at about same time as a region transition. Make sure there is some
// elapsed time so less false alarms. // elapsed time so less false alarms.
if (diff > 1000) { if (diff > lag) {
LOG.warn("{} reported an unexpected OPEN on {}; time since last update={}ms", LOG.warn("Reporting {} state does not match {} (time since last update={}ms)",
regionNode, serverName, diff); serverName, regionNode, diff);
} }
} }
} finally { } finally {

View File

@ -282,7 +282,7 @@ public class RegionStateNode implements Comparable<RegionStateNode> {
public String toShortString() { public String toShortString() {
// rit= is the current Region-In-Transition State -- see State enum. // rit= is the current Region-In-Transition State -- see State enum.
return String.format("rit=%s, location=%s", getState(), getRegionLocation()); return String.format("state=%s, location=%s", getState(), getRegionLocation());
} }
public String toDescriptiveString() { public String toDescriptiveString() {

View File

@ -96,6 +96,9 @@ public class RegionStates {
public RegionStates() { } public RegionStates() { }
/**
* Called on stop of AssignmentManager.
*/
public void clear() { public void clear() {
regionsMap.clear(); regionsMap.clear();
regionInTransition.clear(); regionInTransition.clear();
@ -728,12 +731,15 @@ public class RegionStates {
return serverMap.computeIfAbsent(serverName, key -> new ServerStateNode(key)); return serverMap.computeIfAbsent(serverName, key -> new ServerStateNode(key));
} }
/**
* Called by SCP at end of successful processing.
*/
public void removeServer(final ServerName serverName) { public void removeServer(final ServerName serverName) {
serverMap.remove(serverName); serverMap.remove(serverName);
} }
/** /**
* @return Pertinent ServerStateNode or NULL if none found. * @return Pertinent ServerStateNode or NULL if none found (Do not make modifications).
*/ */
@VisibleForTesting @VisibleForTesting
public ServerStateNode getServerNode(final ServerName serverName) { public ServerStateNode getServerNode(final ServerName serverName) {

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -248,7 +248,8 @@ public class TransitRegionStateProcedure
if (retries > env.getAssignmentManager().getAssignRetryImmediatelyMaxAttempts()) { if (retries > env.getAssignmentManager().getAssignRetryImmediatelyMaxAttempts()) {
// Throw exception to backoff and retry when failed open too many times // Throw exception to backoff and retry when failed open too many times
throw new HBaseIOException("Failed to open region"); throw new HBaseIOException("Failed confirm OPEN of " + regionNode +
" (remote log may yield more detail on why).");
} else { } else {
// Here we do not throw exception because we want to the region to be online ASAP // Here we do not throw exception because we want to the region to be online ASAP
return Flow.HAS_MORE_STATE; return Flow.HAS_MORE_STATE;

View File

@ -3170,7 +3170,7 @@ public class HRegionServer extends HasThread implements
/** /**
* Close asynchronously a region, can be called from the master or internally by the regionserver * Close asynchronously a region, can be called from the master or internally by the regionserver
* when stopping. If called from the master, the region will update the znode status. * when stopping. If called from the master, the region will update the status.
* *
* <p> * <p>
* If an opening was in progress, this method will cancel it, but will not start a new close. The * If an opening was in progress, this method will cancel it, but will not start a new close. The
@ -3200,6 +3200,7 @@ public class HRegionServer extends HasThread implements
} }
} }
// previous can come back 'null' if not in map.
final Boolean previous = this.regionsInTransitionInRS.putIfAbsent(Bytes.toBytes(encodedName), final Boolean previous = this.regionsInTransitionInRS.putIfAbsent(Bytes.toBytes(encodedName),
Boolean.FALSE); Boolean.FALSE);
@ -3221,6 +3222,8 @@ public class HRegionServer extends HasThread implements
throw new NotServingRegionException("The region " + encodedName + throw new NotServingRegionException("The region " + encodedName +
" was opening but not yet served. Opening is cancelled."); " was opening but not yet served. Opening is cancelled.");
} }
} else if (previous == null) {
LOG.info("Received CLOSE for {}", encodedName);
} else if (Boolean.FALSE.equals(previous)) { } else if (Boolean.FALSE.equals(previous)) {
LOG.info("Received CLOSE for the region: " + encodedName + LOG.info("Received CLOSE for the region: " + encodedName +
", which we are already trying to CLOSE, but not completed yet"); ", which we are already trying to CLOSE, but not completed yet");