From 02f5f171f51b7ed801355c8acdcfc4ad47c23a3e Mon Sep 17 00:00:00 2001 From: zhangduo Date: Fri, 26 Oct 2018 20:24:57 +0800 Subject: [PATCH] HBASE-21389 Revisit the procedure lock for sync replication --- .../hbase/master/procedure/PeerQueue.java | 9 +- .../AbstractPeerNoLockProcedure.java | 98 +++++++++++++++++++ .../replication/AbstractPeerProcedure.java | 65 +----------- .../replication/RecoverStandbyProcedure.java | 2 +- .../SyncReplicationReplayWALProcedure.java | 2 +- 5 files changed, 108 insertions(+), 68 deletions(-) create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/AbstractPeerNoLockProcedure.java diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/PeerQueue.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/PeerQueue.java index 0e80e2a5724..ba765147982 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/PeerQueue.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/PeerQueue.java @@ -35,8 +35,11 @@ class PeerQueue extends Queue { } private static boolean requirePeerExclusiveLock(PeerProcedureInterface proc) { - return proc.getPeerOperationType() != PeerOperationType.REFRESH - && proc.getPeerOperationType() != PeerOperationType.SYNC_REPLICATION_REPLAY_WAL - && proc.getPeerOperationType() != PeerOperationType.SYNC_REPLICATION_REPLAY_WAL_REMOTE; + // These procedures will only be used as sub procedures, and if they are scheduled, it always + // means that the root procedure holds the xlock, so we do not need to hold any locks. + return proc.getPeerOperationType() != PeerOperationType.REFRESH && + proc.getPeerOperationType() != PeerOperationType.RECOVER_STANDBY && + proc.getPeerOperationType() != PeerOperationType.SYNC_REPLICATION_REPLAY_WAL && + proc.getPeerOperationType() != PeerOperationType.SYNC_REPLICATION_REPLAY_WAL_REMOTE; } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/AbstractPeerNoLockProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/AbstractPeerNoLockProcedure.java new file mode 100644 index 00000000000..8f8e1e1921e --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/AbstractPeerNoLockProcedure.java @@ -0,0 +1,98 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.replication; + +import java.io.IOException; +import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; +import org.apache.hadoop.hbase.master.procedure.PeerProcedureInterface; +import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer; +import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException; +import org.apache.hadoop.hbase.procedure2.StateMachineProcedure; +import org.apache.yetus.audience.InterfaceAudience; + +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.PeerProcedureStateData; +import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos; + +/** + * Base class for replication peer related procedures which do not need to hold locks(for most of + * the sub procedures). + */ +@InterfaceAudience.Private +public abstract class AbstractPeerNoLockProcedure + extends StateMachineProcedure implements PeerProcedureInterface { + + protected String peerId; + + protected int attempts; + + protected AbstractPeerNoLockProcedure() { + } + + protected AbstractPeerNoLockProcedure(String peerId) { + this.peerId = peerId; + } + + @Override + public String getPeerId() { + return peerId; + } + + @Override + protected boolean waitInitialized(MasterProcedureEnv env) { + return env.waitInitialized(this); + } + + @Override + protected void rollbackState(MasterProcedureEnv env, TState state) + throws IOException, InterruptedException { + if (state == getInitialState()) { + // actually the peer related operations has no rollback, but if we haven't done any + // modifications on the peer storage yet, we can just return. + return; + } + throw new UnsupportedOperationException(); + } + + @Override + protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException { + super.serializeStateData(serializer); + serializer.serialize(PeerProcedureStateData.newBuilder().setPeerId(peerId).build()); + } + + @Override + protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException { + super.deserializeStateData(serializer); + peerId = serializer.deserialize(PeerProcedureStateData.class).getPeerId(); + } + + @Override + protected synchronized boolean setTimeoutFailure(MasterProcedureEnv env) { + setState(ProcedureProtos.ProcedureState.RUNNABLE); + env.getProcedureScheduler().addFront(this); + return false; + } + + protected final ProcedureSuspendedException suspend(long backoff) + throws ProcedureSuspendedException { + attempts++; + setTimeout(Math.toIntExact(backoff)); + setState(ProcedureProtos.ProcedureState.WAITING_TIMEOUT); + skipPersistence(); + throw new ProcedureSuspendedException(); + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/AbstractPeerProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/AbstractPeerProcedure.java index d3a4eb8638b..882a050dffb 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/AbstractPeerProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/AbstractPeerProcedure.java @@ -17,41 +17,29 @@ */ package org.apache.hadoop.hbase.master.replication; -import java.io.IOException; - import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; import org.apache.hadoop.hbase.master.procedure.PeerProcedureInterface; import org.apache.hadoop.hbase.master.procedure.ProcedurePrepareLatch; -import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer; -import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException; -import org.apache.hadoop.hbase.procedure2.StateMachineProcedure; import org.apache.hadoop.hbase.replication.ReplicationException; import org.apache.yetus.audience.InterfaceAudience; import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting; -import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.PeerProcedureStateData; -import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos; - /** * The base class for all replication peer related procedure. */ @InterfaceAudience.Private public abstract class AbstractPeerProcedure - extends StateMachineProcedure implements PeerProcedureInterface { - - protected String peerId; + extends AbstractPeerNoLockProcedure implements PeerProcedureInterface { // used to keep compatible with old client where we can only returns after updateStorage. protected ProcedurePrepareLatch latch; - protected int attempts; - protected AbstractPeerProcedure() { } protected AbstractPeerProcedure(String peerId) { - this.peerId = peerId; + super(peerId); this.latch = ProcedurePrepareLatch.createLatch(2, 1); } @@ -59,16 +47,6 @@ public abstract class AbstractPeerProcedure return latch; } - @Override - public String getPeerId() { - return peerId; - } - - @Override - protected boolean waitInitialized(MasterProcedureEnv env) { - return env.waitInitialized(this); - } - @Override protected LockState acquireLock(MasterProcedureEnv env) { if (env.getProcedureScheduler().waitPeerExclusiveLock(this, peerId)) { @@ -87,50 +65,11 @@ public abstract class AbstractPeerProcedure return true; } - @Override - protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException { - super.serializeStateData(serializer); - serializer.serialize(PeerProcedureStateData.newBuilder().setPeerId(peerId).build()); - } - - @Override - protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException { - super.deserializeStateData(serializer); - peerId = serializer.deserialize(PeerProcedureStateData.class).getPeerId(); - } - - @Override - protected void rollbackState(MasterProcedureEnv env, TState state) - throws IOException, InterruptedException { - if (state == getInitialState()) { - // actually the peer related operations has no rollback, but if we haven't done any - // modifications on the peer storage yet, we can just return. - return; - } - throw new UnsupportedOperationException(); - } - protected final void refreshPeer(MasterProcedureEnv env, PeerOperationType type) { addChildProcedure(env.getMasterServices().getServerManager().getOnlineServersList().stream() .map(sn -> new RefreshPeerProcedure(peerId, type, sn)).toArray(RefreshPeerProcedure[]::new)); } - @Override - protected synchronized boolean setTimeoutFailure(MasterProcedureEnv env) { - setState(ProcedureProtos.ProcedureState.RUNNABLE); - env.getProcedureScheduler().addFront(this); - return false; - } - - protected final ProcedureSuspendedException suspend(long backoff) - throws ProcedureSuspendedException { - attempts++; - setTimeout(Math.toIntExact(backoff)); - setState(ProcedureProtos.ProcedureState.WAITING_TIMEOUT); - skipPersistence(); - throw new ProcedureSuspendedException(); - } - // will be override in test to simulate error @VisibleForTesting protected void enablePeer(MasterProcedureEnv env) throws ReplicationException { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/RecoverStandbyProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/RecoverStandbyProcedure.java index 54947429d8b..a1e2400fc97 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/RecoverStandbyProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/RecoverStandbyProcedure.java @@ -35,7 +35,7 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.R import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RecoverStandbyStateData; @InterfaceAudience.Private -public class RecoverStandbyProcedure extends AbstractPeerProcedure { +public class RecoverStandbyProcedure extends AbstractPeerNoLockProcedure { private static final Logger LOG = LoggerFactory.getLogger(RecoverStandbyProcedure.class); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/SyncReplicationReplayWALProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/SyncReplicationReplayWALProcedure.java index 21a8c81fe24..653b766926a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/SyncReplicationReplayWALProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/SyncReplicationReplayWALProcedure.java @@ -36,7 +36,7 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.S @InterfaceAudience.Private public class SyncReplicationReplayWALProcedure - extends AbstractPeerProcedure { + extends AbstractPeerNoLockProcedure { private static final Logger LOG = LoggerFactory.getLogger(SyncReplicationReplayWALProcedure.class);