diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java index 72eb3515b13..c58a5917cfb 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java @@ -944,7 +944,8 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi " initialization."); } if (LOG.isDebugEnabled()) { - LOG.debug("Region open journal:\n" + status.prettyPrintJournal()); + LOG.debug("Region open journal for {}:\n{}", this.getRegionInfo().getEncodedName(), + status.prettyPrintJournal()); } status.cleanup(); } @@ -1553,7 +1554,8 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi } } finally { if (LOG.isDebugEnabled()) { - LOG.debug("Region close journal:\n" + status.prettyPrintJournal()); + LOG.debug("Region close journal for {}:\n{}", this.getRegionInfo().getEncodedName(), + status.prettyPrintJournal()); } status.cleanup(); } @@ -1755,7 +1757,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi Closeables.close(this.metricsRegionWrapper, true); } status.markComplete("Closed"); - LOG.info("Closed " + this); + LOG.info("Closed {}", this); return result; } finally { lock.writeLock().unlock(); @@ -2266,7 +2268,8 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi } finally { if (requestNeedsCancellation) store.cancelRequestedCompaction(compaction); if (status != null) { - LOG.debug("Compaction status journal:\n\t" + status.prettyPrintJournal()); + LOG.debug("Compaction status journal for {}:\n\t{}", this.getRegionInfo().getEncodedName(), + status.prettyPrintJournal()); status.cleanup(); } } @@ -2413,7 +2416,8 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi } } finally { lock.readLock().unlock(); - LOG.debug("Flush status journal:\n\t" + status.prettyPrintJournal()); + LOG.debug("Flush status journal for {}:\n\t{}", this.getRegionInfo().getEncodedName(), + status.prettyPrintJournal()); status.cleanup(); } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java index fc0b69345ca..0de2c1755b7 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java @@ -53,6 +53,7 @@ import java.util.concurrent.ConcurrentSkipListMap; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.locks.ReentrantReadWriteLock; +import java.util.stream.Collectors; import javax.management.MalformedObjectNameException; import javax.servlet.http.HttpServlet; import org.apache.commons.lang3.RandomUtils; @@ -1457,6 +1458,9 @@ public class HRegionServer extends HasThread implements " because some regions failed closing"); } break; + } else { + LOG.debug("Waiting on {}", this.regionsInTransitionInRS.keySet().stream(). + map(e -> Bytes.toString(e)).collect(Collectors.joining(", "))); } if (sleep(200)) { interrupted = true; @@ -3187,10 +3191,12 @@ public class HRegionServer extends HasThread implements * * @param encodedName Region to close * @param abort True if we are aborting + * @param destination Where the Region is being moved too... maybe null if unknown. * @return True if closed a region. * @throws NotServingRegionException if the region is not online */ - protected boolean closeRegion(String encodedName, final boolean abort, final ServerName sn) + protected boolean closeRegion(String encodedName, final boolean abort, + final ServerName destination) throws NotServingRegionException { //Check for permissions to close. HRegion actualRegion = this.getRegion(encodedName); @@ -3216,7 +3222,7 @@ public class HRegionServer extends HasThread implements // We're going to try to do a standard close then. LOG.warn("The opening for region " + encodedName + " was done before we could cancel it." + " Doing a standard close now"); - return closeRegion(encodedName, abort, sn); + return closeRegion(encodedName, abort, destination); } // Let's get the region from the online region list again actualRegion = this.getRegion(encodedName); @@ -3247,7 +3253,7 @@ public class HRegionServer extends HasThread implements if (hri.isMetaRegion()) { crh = new CloseMetaHandler(this, this, hri, abort); } else { - crh = new CloseRegionHandler(this, this, hri, abort, sn); + crh = new CloseRegionHandler(this, this, hri, abort, destination); } this.executorService.submit(crh); return true; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/AssignRegionHandler.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/AssignRegionHandler.java index 76e8f802712..3474bf916be 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/AssignRegionHandler.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/AssignRegionHandler.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -34,7 +34,6 @@ import org.apache.hadoop.hbase.util.RetryCounter; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; - import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode; /** @@ -136,6 +135,8 @@ public class AssignRegionHandler extends EventHandler { cleanUpAndReportFailure(e); return; } + // From here on out, this is PONR. We can not revert back. The only way to address an + // exception from here on out is to abort the region server. rs.postOpenDeployTasks(new PostOpenDeployContext(region, openProcId, masterSystemTime)); rs.addRegion(region); LOG.info("Opened {}", regionName); @@ -156,6 +157,9 @@ public class AssignRegionHandler extends EventHandler { protected void handleException(Throwable t) { LOG.warn("Fatal error occurred while opening region {}, aborting...", regionInfo.getRegionNameAsString(), t); + // Clear any reference in getServer().getRegionsInTransitionInRS() otherwise can hold up + // regionserver abort on cluster shutdown. HBASE-23984. + getServer().getRegionsInTransitionInRS().remove(regionInfo.getEncodedNameAsBytes()); getServer().abort( "Failed to open region " + regionInfo.getRegionNameAsString() + " and can not recover", t); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/CloseRegionHandler.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/CloseRegionHandler.java index d4ea004cb23..6f430429635 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/CloseRegionHandler.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/CloseRegionHandler.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -19,7 +19,6 @@ package org.apache.hadoop.hbase.regionserver.handler; import java.io.IOException; - import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.Server; import org.apache.hadoop.hbase.ServerName; @@ -38,9 +37,13 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProto /** * Handles closing of a region on a region server. *
- * Now for regular close region request, we will use {@link UnassignRegionHandler} instead. But when - * shutting down the region server, will also close regions and the related methods still use this - * class so we keep it here. + * In normal operation, we use {@link UnassignRegionHandler} closing Regions but when shutting down + * the region server and closing out Regions, we use this handler instead; it does not expect to + * be able to communicate the close back to the Master. + *Expects that the close *has* been registered in the hosting RegionServer before
+ * submitting this Handler; i.e. rss.getRegionsInTransitionInRS().putIfAbsent(
+ * this.regionInfo.getEncodedNameAsBytes(), Boolean.FALSE);
has been called first.
+ * In here when done, we do the deregister.