From f89faf3ac8d75240c724a18c8c3a9d505e06e900 Mon Sep 17 00:00:00 2001 From: Duo Zhang Date: Fri, 13 Nov 2020 14:28:31 +0800 Subject: [PATCH] HBASE-25255 Master fails to initialize when creating rs group table (#2638) Signed-off-by: Guanghao Zhang --- .../apache/hadoop/hbase/master/HMaster.java | 4 ++- .../master/assignment/AssignmentManager.java | 35 ++++++++++++++----- .../procedure/CreateTableProcedure.java | 9 ----- .../MasterProcedureTestingUtility.java | 1 + 4 files changed, 30 insertions(+), 19 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index 32b78b0bd93..bbd8c0e4fbb 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -964,6 +964,9 @@ public class HMaster extends HRegionServer implements MasterServices { } this.assignmentManager.joinCluster(); // The below depends on hbase:meta being online. + this.assignmentManager.processOfflineRegions(); + // this must be called after the above processOfflineRegions to prevent race + this.assignmentManager.wakeMetaLoadedEvent(); // for migrating from a version without HBASE-25099, and also for honoring the configuration // first. @@ -997,7 +1000,6 @@ public class HMaster extends HRegionServer implements MasterServices { } } } - this.assignmentManager.processOfflineRegions(); // Initialize after meta is up as below scans meta if (getFavoredNodesManager() != null && !maintenanceMode) { SnapshotOfRegionAssignmentFromMeta snapshotOfRegionAssignment = diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java index 49f1eb1fb56..355dfde013a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java @@ -505,8 +505,16 @@ public class AssignmentManager { return metaLoadEvent.suspendIfNotReady(proc); } + /** + * This method will be called in master initialization method after calling + * {@link #processOfflineRegions()}, as in processOfflineRegions we will generate assign + * procedures for offline regions, which may be conflict with creating table. + *

+ * This is a bit dirty, should be reconsidered after we decide whether to keep the + * {@link #processOfflineRegions()} method. + */ @VisibleForTesting - void wakeMetaLoadedEvent() { + public void wakeMetaLoadedEvent() { metaLoadEvent.wake(getProcedureScheduler()); assert isMetaLoaded() : "expected meta to be loaded"; } @@ -1505,12 +1513,23 @@ public class AssignmentManager { // Public so can be run by the Master as part of the startup. Needs hbase:meta to be online. // Needs to be done after the table state manager has been started. public void processOfflineRegions() { - List offlineRegions = regionStates.getRegionStates().stream() - .filter(RegionState::isOffline).filter(s -> isTableEnabled(s.getRegion().getTable())) - .map(RegionState::getRegion).collect(Collectors.toList()); - if (!offlineRegions.isEmpty()) { - master.getMasterProcedureExecutor().submitProcedures( - master.getAssignmentManager().createRoundRobinAssignProcedures(offlineRegions)); + TransitRegionStateProcedure[] procs = + regionStates.getRegionStateNodes().stream().filter(rsn -> rsn.isInState(State.OFFLINE)) + .filter(rsn -> isTableEnabled(rsn.getRegionInfo().getTable())).map(rsn -> { + rsn.lock(); + try { + if (rsn.getProcedure() != null) { + return null; + } else { + return rsn.setProcedure(TransitRegionStateProcedure.assign(getProcedureEnvironment(), + rsn.getRegionInfo(), null)); + } + } finally { + rsn.unlock(); + } + }).filter(p -> p != null).toArray(TransitRegionStateProcedure[]::new); + if (procs.length > 0) { + master.getMasterProcedureExecutor().submitProcedures(procs); } } @@ -1590,8 +1609,6 @@ public class AssignmentManager { private void loadMeta() throws IOException { // TODO: use a thread pool regionStateStore.visitMeta(new RegionMetaLoadingVisitor()); - // every assignment is blocked until meta is loaded. - wakeMetaLoadedEvent(); } /** diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateTableProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateTableProcedure.java index 3f171ee694d..b8ce2535238 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateTableProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateTableProcedure.java @@ -234,15 +234,6 @@ public class CreateTableProcedure } } - @Override - protected boolean waitInitialized(MasterProcedureEnv env) { - if (getTableName().isSystemTable()) { - // Creating system table is part of the initialization, so do not wait here. - return false; - } - return super.waitInitialized(env); - } - private boolean prepareCreate(final MasterProcedureEnv env) throws IOException { final TableName tableName = getTableName(); if (env.getMasterServices().getTableDescriptors().exists(tableName)) { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureTestingUtility.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureTestingUtility.java index 24245a2e7be..3dca5443982 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureTestingUtility.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureTestingUtility.java @@ -116,6 +116,7 @@ public class MasterProcedureTestingUtility { AssignmentManager am = env.getAssignmentManager(); try { am.joinCluster(); + am.wakeMetaLoadedEvent(); master.setInitialized(true); } catch (Exception e) { LOG.warn("Failed to load meta", e);