diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/RetryCounterFactory.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/RetryCounterFactory.java index dcf6626ae4c..c15cfb2cc77 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/RetryCounterFactory.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/RetryCounterFactory.java @@ -28,6 +28,10 @@ import org.apache.yetus.audience.InterfaceAudience; public class RetryCounterFactory { private final RetryConfig retryConfig; + public RetryCounterFactory(int sleepIntervalMillis) { + this(Integer.MAX_VALUE, sleepIntervalMillis); + } + public RetryCounterFactory(int maxAttempts, int sleepIntervalMillis) { this(maxAttempts, sleepIntervalMillis, -1); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index cd1fedff9ec..1a8b8dd8797 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -55,6 +55,7 @@ import javax.servlet.ServletException; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; + import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; @@ -196,6 +197,8 @@ import org.apache.hadoop.hbase.util.HasThread; import org.apache.hadoop.hbase.util.IdLock; import org.apache.hadoop.hbase.util.ModifyRegionUtils; import org.apache.hadoop.hbase.util.Pair; +import org.apache.hadoop.hbase.util.RetryCounter; +import org.apache.hadoop.hbase.util.RetryCounterFactory; import org.apache.hadoop.hbase.util.Threads; import org.apache.hadoop.hbase.util.VersionInfo; import org.apache.hadoop.hbase.zookeeper.LoadBalancerTracker; @@ -943,11 +946,13 @@ public class HMaster extends HRegionServer implements MasterServices { // Start RegionServerTracker with listing of servers found with exiting SCPs -- these should // be registered in the deadServers set -- and with the list of servernames out on the // filesystem that COULD BE 'alive' (we'll schedule SCPs for each and let SCP figure it out). + // We also pass dirs that are already 'splitting'... so we can do some checks down in tracker. + // TODO: Generate the splitting and live Set in one pass instead of two as we currently do. this.regionServerTracker = new RegionServerTracker(zooKeeper, this, this.serverManager); this.regionServerTracker.start( procsByType.getOrDefault(ServerCrashProcedure.class, Collections.emptyList()).stream() .map(p -> (ServerCrashProcedure) p).map(p -> p.getServerName()).collect(Collectors.toSet()), - walManager.getLiveServersFromWALDir()); + walManager.getLiveServersFromWALDir(), walManager.getSplittingServersFromWALDir()); // This manager will be started AFTER hbase:meta is confirmed on line. // hbase.mirror.table.state.to.zookeeper is so hbase1 clients can connect. They read table // state from zookeeper while hbase2 reads it from hbase:meta. Disable if no hbase1 clients. @@ -978,10 +983,14 @@ public class HMaster extends HRegionServer implements MasterServices { status.setStatus("Initializing master coprocessors"); this.cpHost = new MasterCoprocessorHost(this, this.conf); + // Checking if meta needs initializing. status.setStatus("Initializing meta table if this is a new deploy"); InitMetaProcedure initMetaProc = null; - if (assignmentManager.getRegionStates().getRegionState(RegionInfoBuilder.FIRST_META_REGIONINFO) - .isOffline()) { + // Print out state of hbase:meta on startup; helps debugging. + RegionState rs = this.assignmentManager.getRegionStates(). + getRegionState(RegionInfoBuilder.FIRST_META_REGIONINFO); + LOG.info("hbase:meta {}", rs); + if (rs.isOffline()) { Optional> optProc = procedureExecutor.getProcedures().stream() .filter(p -> p instanceof InitMetaProcedure).findAny(); if (optProc.isPresent()) { @@ -1008,7 +1017,6 @@ public class HMaster extends HRegionServer implements MasterServices { if (initMetaProc != null) { initMetaProc.await(); } - tableStateManager.start(); // Wake up this server to check in sleeper.skipSleepCycle(); @@ -1025,7 +1033,20 @@ public class HMaster extends HRegionServer implements MasterServices { return; } - //Initialize after meta as it scans meta + status.setStatus("Starting assignment manager"); + // FIRST HBASE:META READ!!!! + // The below cannot make progress w/o hbase:meta being online. + // This is the FIRST attempt at going to hbase:meta. Meta on-lining is going on in background + // as procedures run -- in particular SCPs for crashed servers... One should put up hbase:meta + // if it is down. It may take a while to come online. So, wait here until meta if for sure + // available. Thats what waitUntilMetaOnline does. + if (!waitUntilMetaOnline()) { + return; + } + this.assignmentManager.joinCluster(); + // The below depends on hbase:meta being online. + this.tableStateManager.start(); + // Initialize after meta is up as below scans meta if (favoredNodesManager != null) { SnapshotOfRegionAssignmentFromMeta snapshotOfRegionAssignment = new SnapshotOfRegionAssignmentFromMeta(getConnection()); @@ -1033,10 +1054,6 @@ public class HMaster extends HRegionServer implements MasterServices { favoredNodesManager.initialize(snapshotOfRegionAssignment); } - // Fix up assignment manager status - status.setStatus("Starting assignment manager"); - this.assignmentManager.joinCluster(); - // set cluster status again after user regions are assigned this.balancer.setClusterMetrics(getClusterMetricsWithoutCoprocessor()); @@ -1051,6 +1068,13 @@ public class HMaster extends HRegionServer implements MasterServices { this.catalogJanitorChore = new CatalogJanitor(this); getChoreService().scheduleChore(catalogJanitorChore); + // NAMESPACE READ!!!! + // Here we expect hbase:namespace to be online. See inside initClusterSchemaService. + // TODO: Fix this. Namespace is a pain being a sort-of system table. Fold it in to hbase:meta. + // isNamespace does like isMeta and waits until namespace is onlined before allowing progress. + if (!waitUntilNamespaceOnline()) { + return; + } status.setStatus("Starting cluster schema service"); initClusterSchemaService(); @@ -1126,6 +1150,68 @@ public class HMaster extends HRegionServer implements MasterServices { } } + /** + * Check hbase:meta is up and ready for reading. For use during Master startup only. + * @return True if meta is UP and online and startup can progress. Otherwise, meta is not online + * and we will hold here until operator intervention. + */ + @VisibleForTesting + public boolean waitUntilMetaOnline() throws InterruptedException { + return isRegionOnline(RegionInfoBuilder.FIRST_META_REGIONINFO); + } + + /** + * @return True if region is online and scannable else false if an error or shutdown (Otherwise + * we just block in here holding up all forward-progess). + */ + private boolean isRegionOnline(RegionInfo ri) throws InterruptedException { + RetryCounter rc = null; + while (!isStopped()) { + RegionState rs = this.assignmentManager.getRegionStates().getRegionState(ri); + if (rs.isOpened()) { + if (this.getServerManager().isServerOnline(rs.getServerName())) { + return true; + } + } + // Region is not OPEN. + Optional> optProc = this.procedureExecutor.getProcedures(). + stream().filter(p -> p instanceof ServerCrashProcedure).findAny(); + // TODO: Add a page to refguide on how to do repair. Have this log message point to it. + // Page will talk about loss of edits, how to schedule at least the meta WAL recovery, and + // then how to assign including how to break region lock if one held. + LOG.warn("{} is NOT online; state={}; ServerCrashProcedures={}. Master startup cannot " + + "progress, in holding-pattern until region onlined; operator intervention required. " + + "Schedule an assign.", ri.getRegionNameAsString(), rs, optProc.isPresent()); + // Check once-a-minute. + if (rc == null) { + rc = new RetryCounterFactory(1000).create(); + } + Threads.sleep(rc.getBackoffTimeAndIncrementAttempts()); + } + return false; + } + + /** + * Check hbase:namespace table is assigned. If not, startup will hang looking for the ns table + * (TODO: Fix this! NS should not hold-up startup). + * @return True if namespace table is up/online. + */ + @VisibleForTesting + public boolean waitUntilNamespaceOnline() throws InterruptedException { + List ris = this.assignmentManager.getRegionStates(). + getRegionsOfTable(TableName.NAMESPACE_TABLE_NAME); + if (ris.isEmpty()) { + // If empty, means we've not assigned the namespace table yet... Just return true so startup + // continues and the namespace table gets created. + return true; + } + // Else there are namespace regions up in meta. Ensure they are assigned before we go on. + for (RegionInfo ri: ris) { + isRegionOnline(ri); + } + return true; + } + /** * Adds the {@code MasterQuotasObserver} to the list of configured Master observers to * automatically remove quotas for a table when that table is deleted. diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java index 51551758fb1..b0be5d14967 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java @@ -530,9 +530,8 @@ public class MasterRpcServices extends RSRpcServices RpcController controller, ReportRSFatalErrorRequest request) throws ServiceException { String errorText = request.getErrorMessage(); ServerName sn = ProtobufUtil.toServerName(request.getServer()); - String msg = "Region server " + sn - + " reported a fatal error:\n" + errorText; - LOG.error(msg); + String msg = sn + " reported a fatal error:\n" + errorText; + LOG.warn(msg); master.rsFatals.add(msg); return ReportRSFatalErrorResponse.newBuilder().build(); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterWalManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterWalManager.java index d716a116197..2b1a81f3578 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterWalManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterWalManager.java @@ -144,18 +144,33 @@ public class MasterWalManager { } /** - * @return listing of ServerNames found in the filesystem under the WAL directory - * that COULD BE 'alive'; excludes those that have a '-splitting' suffix as these are already - * being split -- they cannot be 'alive'. + * Get Servernames which are currently splitting; paths have a '-splitting' suffix. + * @return ServerName + * @throws IOException IOException + */ + public Set getSplittingServersFromWALDir() throws IOException { + return getServerNamesFromWALDirPath( + p -> p.getName().endsWith(AbstractFSWALProvider.SPLITTING_EXT)); + } + + /** + * Get Servernames that COULD BE 'alive'; excludes those that have a '-splitting' suffix as these + * are already being split -- they cannot be 'alive'. + * @return ServerName + * @throws IOException IOException */ public Set getLiveServersFromWALDir() throws IOException { - Path walDirPath = new Path(rootDir, HConstants.HREGION_LOGDIR_NAME); - FileStatus[] walDirForLiveServers = FSUtils.listStatus(fs, walDirPath, + return getServerNamesFromWALDirPath( p -> !p.getName().endsWith(AbstractFSWALProvider.SPLITTING_EXT)); - if (walDirForLiveServers == null) { - return Collections.emptySet(); - } - return Stream.of(walDirForLiveServers).map(s -> { + } + + /** + * @return listing of ServerNames found by parsing WAL directory paths in FS. + * + */ + public Set getServerNamesFromWALDirPath(final PathFilter filter) throws IOException { + FileStatus[] walDirForServerNames = getWALDirPaths(filter); + return Stream.of(walDirForServerNames).map(s -> { ServerName serverName = AbstractFSWALProvider.getServerNameFromWALDirectoryName(s.getPath()); if (serverName == null) { LOG.warn("Log folder {} doesn't look like its name includes a " + @@ -167,6 +182,15 @@ public class MasterWalManager { }).filter(s -> s != null).collect(Collectors.toSet()); } + /** + * @return List of all RegionServer WAL dirs; i.e. this.rootDir/HConstants.HREGION_LOGDIR_NAME. + */ + public FileStatus[] getWALDirPaths(final PathFilter filter) throws IOException { + Path walDirPath = new Path(rootDir, HConstants.HREGION_LOGDIR_NAME); + FileStatus[] walDirForServerNames = FSUtils.listStatus(fs, walDirPath, filter); + return walDirForServerNames == null? new FileStatus[0]: walDirForServerNames; + } + /** * Inspect the log directory to find dead servers which need recovery work * @return A set of ServerNames which aren't running but still have WAL files left in file system diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionServerTracker.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionServerTracker.java index f472e7267c6..b2aedf09bef 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionServerTracker.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionServerTracker.java @@ -115,11 +115,19 @@ public class RegionServerTracker extends ZKListener { * protection to prevent concurrency issues with server expiration operation. * @param deadServersFromPE the region servers which already have SCP associated. * @param liveServersFromWALDir the live region servers from wal directory. + * @param splittingServersFromWALDir Servers whose WALs are being actively 'split'. */ - public void start(Set deadServersFromPE, Set liveServersFromWALDir) + public void start(Set deadServersFromPE, Set liveServersFromWALDir, + Set splittingServersFromWALDir) throws KeeperException, IOException { LOG.info("Starting RegionServerTracker; {} have existing ServerCrashProcedures, {} " + - "possibly 'live' servers.", deadServersFromPE.size(), liveServersFromWALDir.size()); + "possibly 'live' servers, and {} 'splitting'.", deadServersFromPE.size(), + liveServersFromWALDir.size(), splittingServersFromWALDir.size()); + // deadServersFromPE is made from a list of outstanding ServerCrashProcedures. + // splittingServersFromWALDir are being actively split -- the directory in the FS ends in + // '-SPLITTING'. Each splitting server should have a corresponding SCP. Log if not. + splittingServersFromWALDir.stream().map(s -> !deadServersFromPE.contains(s)). + forEach(s -> LOG.error("{} has no matching ServerCrashProcedure", s)); watcher.registerListener(this); synchronized (this) { List servers = diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/TableNamespaceManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/TableNamespaceManager.java index 0b4e35bf19a..aefeebe0989 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/TableNamespaceManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/TableNamespaceManager.java @@ -383,12 +383,16 @@ public class TableNamespaceManager implements Stoppable { return; } try { - this.zkNamespaceManager.stop(); + if (this.zkNamespaceManager != null) { + this.zkNamespaceManager.stop(); + } } catch (IOException ioe) { LOG.warn("Failed NamespaceManager close", ioe); } try { - this.nsTable.close(); + if (this.nsTable != null) { + this.nsTable.close(); + } } catch (IOException ioe) { LOG.warn("Failed Namespace Table close", ioe); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java index 28df28a2b6f..d30dcc98791 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java @@ -1210,8 +1210,9 @@ public class AssignmentManager implements ServerListener { long startTime = System.nanoTime(); LOG.debug("Joining cluster..."); - // Scan hbase:meta to build list of existing regions, servers, and assignment - // hbase:meta is online when we get to here and TableStateManager has been started. + // Scan hbase:meta to build list of existing regions, servers, and assignment. + // hbase:meta is online now or will be. Inside loadMeta, we keep trying. Can't make progress + // w/o meta. loadMeta(); while (master.getServerManager().countOfRegionServers() < 1) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DisableTableProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DisableTableProcedure.java index 71d1fc99edb..f6e46401ef0 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DisableTableProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DisableTableProcedure.java @@ -261,7 +261,7 @@ public class DisableTableProcedure TableStateManager tsm = env.getMasterServices().getTableStateManager(); TableState ts = tsm.getTableState(tableName); if (!ts.isEnabled()) { - LOG.info("Not ENABLED skipping {}", this); + LOG.info("Not ENABLED, state={}, skipping disable; {}", ts.getState(), this); setFailure("master-disable-table", new TableNotEnabledException(ts.toString())); canTableBeDisabled = false; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/EnableTableProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/EnableTableProcedure.java index 4e6211e2f14..144b0737b94 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/EnableTableProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/EnableTableProcedure.java @@ -331,7 +331,7 @@ public class EnableTableProcedure TableStateManager tsm = env.getMasterServices().getTableStateManager(); TableState ts = tsm.getTableState(tableName); if(!ts.isDisabled()){ - LOG.info("Not DISABLED tableState=" + ts + "; skipping enable"); + LOG.info("Not DISABLED tableState={}; skipping enable; {}", ts.getState(), this); setFailure("master-enable-table", new TableNotDisabledException(ts.toString())); canTableBeEnabled = false; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java index 4ef3c2d9968..e28b29624c2 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java @@ -711,8 +711,12 @@ public class HRegionServer extends HasThread implements "hbase.regionserver.kerberos.principal", host); } - protected void waitForMasterActive() { - } + + /** + * Wait for an active Master. + * See override in Master superclass for how it is used. + */ + protected void waitForMasterActive() {} protected String getProcessName() { return REGIONSERVER; @@ -873,10 +877,6 @@ public class HRegionServer extends HasThread implements } } - // In case colocated master, wait here till it's active. - // So backup masters won't start as regionservers. - // This is to avoid showing backup masters as regionservers - // in master web UI, or assigning any region to them. waitForMasterActive(); if (isStopped() || isAborted()) { return; // No need for further initialization diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestMetaTableAccessor.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestMetaTableAccessor.java index 279b9abf1b1..eaed0bc8a8e 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestMetaTableAccessor.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestMetaTableAccessor.java @@ -46,6 +46,7 @@ import org.apache.hadoop.hbase.ipc.CallRunner; import org.apache.hadoop.hbase.ipc.DelegatingRpcScheduler; import org.apache.hadoop.hbase.ipc.PriorityFunction; import org.apache.hadoop.hbase.ipc.RpcScheduler; +import org.apache.hadoop.hbase.master.HMaster; import org.apache.hadoop.hbase.regionserver.HRegionServer; import org.apache.hadoop.hbase.regionserver.RSRpcServices; import org.apache.hadoop.hbase.regionserver.SimpleRpcSchedulerFactory; @@ -105,6 +106,21 @@ public class TestMetaTableAccessor { UTIL.shutdownMiniCluster(); } + @Test + public void testIsMetaWhenAllHealthy() throws InterruptedException { + HMaster m = UTIL.getMiniHBaseCluster().getMaster(); + assertTrue(m.waitUntilMetaOnline()); + } + + @Test + public void testIsMetaWhenMetaGoesOffline() throws InterruptedException { + HMaster m = UTIL.getMiniHBaseCluster().getMaster(); + int index = UTIL.getMiniHBaseCluster().getServerWithMeta(); + HRegionServer rsWithMeta = UTIL.getMiniHBaseCluster().getRegionServer(index); + rsWithMeta.abort("TESTING"); + assertTrue(m.waitUntilMetaOnline()); + } + /** * Does {@link MetaTableAccessor#getRegion(Connection, byte[])} and a write * against hbase:meta while its hosted server is restarted to prove our retrying