From 71224ee530d281feab39e5721dc87b9ea01a1982 Mon Sep 17 00:00:00 2001 From: Allan Yang Date: Fri, 26 Oct 2018 05:54:50 -0700 Subject: [PATCH] HBASE-21376 Add some verbose log to MasterProcedureScheduler --- .../master/procedure/MasterProcedureEnv.java | 4 +- .../procedure/MasterProcedureScheduler.java | 78 ++++++++++++++----- .../hadoop/hbase/master/procedure/Queue.java | 9 ++- 3 files changed, 68 insertions(+), 23 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureEnv.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureEnv.java index 1eec72ab862..008acf408a9 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureEnv.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureEnv.java @@ -82,7 +82,9 @@ public class MasterProcedureEnv implements ConfigurationObserver { public MasterProcedureEnv(final MasterServices master, final RSProcedureDispatcher remoteDispatcher) { this.master = master; - this.procSched = new MasterProcedureScheduler(); + boolean verbose = master.getConfiguration() + .getBoolean("hbase.master.procedure.scheduler.verbose", false); + this.procSched = new MasterProcedureScheduler(verbose); this.remoteDispatcher = remoteDispatcher; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureScheduler.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureScheduler.java index 422a11b921f..1b316f92554 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureScheduler.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureScheduler.java @@ -116,6 +116,16 @@ public class MasterProcedureScheduler extends AbstractProcedureScheduler { private final SchemaLocking locking = new SchemaLocking(); + private boolean verbose = false; + + public MasterProcedureScheduler() { + + } + + public MasterProcedureScheduler(boolean verbose) { + this.verbose = verbose; + } + @Override public void yield(final Procedure proc) { push(proc, isTableProcedure(proc), true); @@ -147,11 +157,11 @@ public class MasterProcedureScheduler extends AbstractProcedureScheduler { if (!queue.getLockStatus().hasExclusiveLock()) { // if the queue was not remove for an xlock execution,put the queue back into execution queue.add(proc, addFront); - addToRunQueue(fairq, queue); + addToRunQueue(fairq, queue, proc, "queue doesn't have exclusivelock"); } else if (queue.getLockStatus().hasLockAccess(proc)) { // always add it to front as the have the lock access. queue.add(proc, true); - addToRunQueue(fairq, queue); + addToRunQueue(fairq, queue, proc, "procedure has lock access"); } else { queue.add(proc, addFront); } @@ -185,6 +195,9 @@ public class MasterProcedureScheduler extends AbstractProcedureScheduler { private > Procedure doPoll(final FairQueue fairq) { final Queue rq = fairq.poll(); if (rq == null || !rq.isAvailable()) { + if (verbose && rq != null) { + LOG.debug("polled from {} but it is not available", rq); + } return null; } @@ -195,20 +208,30 @@ public class MasterProcedureScheduler extends AbstractProcedureScheduler { final boolean xlockReq = rq.requireExclusiveLock(pollResult); if (xlockReq && rq.getLockStatus().isLocked() && !rq.getLockStatus().hasLockAccess(pollResult)) { // someone is already holding the lock (e.g. shared lock). avoid a yield - removeFromRunQueue(fairq, rq); + removeFromRunQueue(fairq, rq, pollResult, + "exclusive lock not acquired: " + rq.getLockStatus()); return null; } rq.poll(); if (rq.isEmpty() || xlockReq) { - removeFromRunQueue(fairq, rq); + String why = ""; + if (rq.isEmpty()) { + why += "runqueue is empty "; + } + if (xlockReq) { + why += "xlock is required for next procedure"; + } + removeFromRunQueue(fairq, rq, pollResult, why); } else if (rq.getLockStatus().hasParentLock(pollResult)) { // if the rq is in the fairq because of runnable child // check if the next procedure is still a child. // if not, remove the rq from the fairq and go back to the xlock state Procedure nextProc = rq.peek(); if (nextProc != null && !Procedure.haveSameParent(nextProc, pollResult)) { - removeFromRunQueue(fairq, rq); + removeFromRunQueue(fairq, rq, pollResult, + " parent holds the lock and the next one is not the child of the parent, next procid=" + + nextProc.getProcId()); } } @@ -269,7 +292,9 @@ public class MasterProcedureScheduler extends AbstractProcedureScheduler { while (treeMap != null) { Queue node = AvlTree.getFirst(treeMap); treeMap = AvlTree.remove(treeMap, node.getKey(), comparator); - if (fairq != null) removeFromRunQueue(fairq, node); + if (fairq != null) { + removeFromRunQueue(fairq, node, null, "clear is called"); + } } } @@ -326,16 +351,25 @@ public class MasterProcedureScheduler extends AbstractProcedureScheduler { } } - private static > void addToRunQueue(FairQueue fairq, Queue queue) { + private > void addToRunQueue( + FairQueue fairq, Queue queue, final Procedure proc, String why) { if (!AvlIterableList.isLinked(queue) && !queue.isEmpty()) { fairq.add(queue); + if (verbose) { + LOG.debug("Adding {} to runqueue, triggered by {}, since {}", + queue.getKey(), proc, why); + } } } - private static > void removeFromRunQueue( - FairQueue fairq, Queue queue) { + private > void removeFromRunQueue( + FairQueue fairq, Queue queue, final Procedure proc, String why) { if (AvlIterableList.isLinked(queue)) { fairq.remove(queue); + if (verbose) { + LOG.debug("Remove {} from runqueue, triggered by {}, since {}", + queue.getKey(), proc, why); + } } } @@ -403,7 +437,7 @@ public class MasterProcedureScheduler extends AbstractProcedureScheduler { LockAndQueue lock = locking.getServerLock(serverName); if (node.isEmpty() && lock.tryExclusiveLock(proc)) { - removeFromRunQueue(serverRunQueue, node); + removeFromRunQueue(serverRunQueue, node, proc, "tryCleanupServerQueue is called"); removeServerQueue(serverName); } } finally { @@ -531,7 +565,8 @@ public class MasterProcedureScheduler extends AbstractProcedureScheduler { logLockedResource(LockedResourceType.TABLE, table.getNameAsString()); return true; } - removeFromRunQueue(tableRunQueue, getTableQueue(table)); + removeFromRunQueue(tableRunQueue, getTableQueue(table), procedure, + "need to wait TableExclusiveLock, lockstatus=" + tableLock); return false; } finally { schedUnlock(); @@ -555,7 +590,8 @@ public class MasterProcedureScheduler extends AbstractProcedureScheduler { if (namespaceLock.releaseSharedLock()) { waitingCount += wakeWaitingProcedures(namespaceLock); } - addToRunQueue(tableRunQueue, getTableQueue(table)); + addToRunQueue(tableRunQueue, getTableQueue(table), procedure, + "table's exclusive lock is available"); wakePollIfNeeded(waitingCount); } finally { schedUnlock(); @@ -607,7 +643,8 @@ public class MasterProcedureScheduler extends AbstractProcedureScheduler { final LockAndQueue tableLock = locking.getTableLock(table); int waitingCount = 0; if (tableLock.releaseSharedLock()) { - addToRunQueue(tableRunQueue, getTableQueue(table)); + addToRunQueue(tableRunQueue, getTableQueue(table), procedure, + "table's shard lock is available"); waitingCount += wakeWaitingProcedures(tableLock); } if (namespaceLock.releaseSharedLock()) { @@ -815,7 +852,8 @@ public class MasterProcedureScheduler extends AbstractProcedureScheduler { waitingCount += wakeWaitingProcedures(namespaceLock); } if (systemNamespaceTableLock.releaseSharedLock()) { - addToRunQueue(tableRunQueue, getTableQueue(TableName.NAMESPACE_TABLE_NAME)); + addToRunQueue(tableRunQueue, getTableQueue(TableName.NAMESPACE_TABLE_NAME), procedure, + "namespace exclusive lock is available"); waitingCount += wakeWaitingProcedures(systemNamespaceTableLock); } wakePollIfNeeded(waitingCount); @@ -845,7 +883,9 @@ public class MasterProcedureScheduler extends AbstractProcedureScheduler { removeFromRunQueue(serverRunQueue, getServerQueue(serverName, procedure instanceof ServerProcedureInterface ? (ServerProcedureInterface) procedure - : null)); + : null), procedure, + "need to wait ServerExclusiveLock, ownerid=" + lock + .getExclusiveLockProcIdOwner()); return false; } waitProcedure(lock, procedure); @@ -873,7 +913,7 @@ public class MasterProcedureScheduler extends AbstractProcedureScheduler { addToRunQueue(serverRunQueue, getServerQueue(serverName, procedure instanceof ServerProcedureInterface ? (ServerProcedureInterface) procedure - : null)); + : null), procedure, "Server's exclusive lock is available"); int waitingCount = wakeWaitingProcedures(lock); wakePollIfNeeded(waitingCount); } finally { @@ -948,7 +988,8 @@ public class MasterProcedureScheduler extends AbstractProcedureScheduler { try { final LockAndQueue lock = locking.getMetaLock(); if (lock.tryExclusiveLock(procedure)) { - removeFromRunQueue(metaRunQueue, getMetaQueue()); + removeFromRunQueue(metaRunQueue, getMetaQueue(), procedure, + "need to wait MetaExclusiveLock, lockStatus=" + lock); return false; } waitProcedure(lock, procedure); @@ -972,7 +1013,8 @@ public class MasterProcedureScheduler extends AbstractProcedureScheduler { try { final LockAndQueue lock = locking.getMetaLock(); lock.releaseExclusiveLock(procedure); - addToRunQueue(metaRunQueue, getMetaQueue()); + addToRunQueue(metaRunQueue, getMetaQueue(), procedure, + "meta's exclusive lock is available"); int waitingCount = wakeWaitingProcedures(lock); wakePollIfNeeded(waitingCount); } finally { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/Queue.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/Queue.java index 43e66d09708..4eb33906683 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/Queue.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/Queue.java @@ -118,9 +118,10 @@ abstract class Queue> extends AvlLinkedNode 0 ? peek() : "null")); } }