HBASE-21389 Revisit the procedure lock for sync replication

This commit is contained in:
zhangduo 2018-10-26 20:24:57 +08:00 committed by Duo Zhang
parent 116eee6747
commit 02f5f171f5
5 changed files with 108 additions and 68 deletions

View File

@ -35,8 +35,11 @@ class PeerQueue extends Queue<String> {
}
private static boolean requirePeerExclusiveLock(PeerProcedureInterface proc) {
return proc.getPeerOperationType() != PeerOperationType.REFRESH
&& proc.getPeerOperationType() != PeerOperationType.SYNC_REPLICATION_REPLAY_WAL
&& proc.getPeerOperationType() != PeerOperationType.SYNC_REPLICATION_REPLAY_WAL_REMOTE;
// These procedures will only be used as sub procedures, and if they are scheduled, it always
// means that the root procedure holds the xlock, so we do not need to hold any locks.
return proc.getPeerOperationType() != PeerOperationType.REFRESH &&
proc.getPeerOperationType() != PeerOperationType.RECOVER_STANDBY &&
proc.getPeerOperationType() != PeerOperationType.SYNC_REPLICATION_REPLAY_WAL &&
proc.getPeerOperationType() != PeerOperationType.SYNC_REPLICATION_REPLAY_WAL_REMOTE;
}
}

View File

@ -0,0 +1,98 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.master.replication;
import java.io.IOException;
import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
import org.apache.hadoop.hbase.master.procedure.PeerProcedureInterface;
import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
import org.apache.hadoop.hbase.procedure2.StateMachineProcedure;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.PeerProcedureStateData;
import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos;
/**
* Base class for replication peer related procedures which do not need to hold locks(for most of
* the sub procedures).
*/
@InterfaceAudience.Private
public abstract class AbstractPeerNoLockProcedure<TState>
extends StateMachineProcedure<MasterProcedureEnv, TState> implements PeerProcedureInterface {
protected String peerId;
protected int attempts;
protected AbstractPeerNoLockProcedure() {
}
protected AbstractPeerNoLockProcedure(String peerId) {
this.peerId = peerId;
}
@Override
public String getPeerId() {
return peerId;
}
@Override
protected boolean waitInitialized(MasterProcedureEnv env) {
return env.waitInitialized(this);
}
@Override
protected void rollbackState(MasterProcedureEnv env, TState state)
throws IOException, InterruptedException {
if (state == getInitialState()) {
// actually the peer related operations has no rollback, but if we haven't done any
// modifications on the peer storage yet, we can just return.
return;
}
throw new UnsupportedOperationException();
}
@Override
protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException {
super.serializeStateData(serializer);
serializer.serialize(PeerProcedureStateData.newBuilder().setPeerId(peerId).build());
}
@Override
protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException {
super.deserializeStateData(serializer);
peerId = serializer.deserialize(PeerProcedureStateData.class).getPeerId();
}
@Override
protected synchronized boolean setTimeoutFailure(MasterProcedureEnv env) {
setState(ProcedureProtos.ProcedureState.RUNNABLE);
env.getProcedureScheduler().addFront(this);
return false;
}
protected final ProcedureSuspendedException suspend(long backoff)
throws ProcedureSuspendedException {
attempts++;
setTimeout(Math.toIntExact(backoff));
setState(ProcedureProtos.ProcedureState.WAITING_TIMEOUT);
skipPersistence();
throw new ProcedureSuspendedException();
}
}

View File

@ -17,41 +17,29 @@
*/
package org.apache.hadoop.hbase.master.replication;
import java.io.IOException;
import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
import org.apache.hadoop.hbase.master.procedure.PeerProcedureInterface;
import org.apache.hadoop.hbase.master.procedure.ProcedurePrepareLatch;
import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
import org.apache.hadoop.hbase.procedure2.StateMachineProcedure;
import org.apache.hadoop.hbase.replication.ReplicationException;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.PeerProcedureStateData;
import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos;
/**
* The base class for all replication peer related procedure.
*/
@InterfaceAudience.Private
public abstract class AbstractPeerProcedure<TState>
extends StateMachineProcedure<MasterProcedureEnv, TState> implements PeerProcedureInterface {
protected String peerId;
extends AbstractPeerNoLockProcedure<TState> implements PeerProcedureInterface {
// used to keep compatible with old client where we can only returns after updateStorage.
protected ProcedurePrepareLatch latch;
protected int attempts;
protected AbstractPeerProcedure() {
}
protected AbstractPeerProcedure(String peerId) {
this.peerId = peerId;
super(peerId);
this.latch = ProcedurePrepareLatch.createLatch(2, 1);
}
@ -59,16 +47,6 @@ public abstract class AbstractPeerProcedure<TState>
return latch;
}
@Override
public String getPeerId() {
return peerId;
}
@Override
protected boolean waitInitialized(MasterProcedureEnv env) {
return env.waitInitialized(this);
}
@Override
protected LockState acquireLock(MasterProcedureEnv env) {
if (env.getProcedureScheduler().waitPeerExclusiveLock(this, peerId)) {
@ -87,50 +65,11 @@ public abstract class AbstractPeerProcedure<TState>
return true;
}
@Override
protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException {
super.serializeStateData(serializer);
serializer.serialize(PeerProcedureStateData.newBuilder().setPeerId(peerId).build());
}
@Override
protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException {
super.deserializeStateData(serializer);
peerId = serializer.deserialize(PeerProcedureStateData.class).getPeerId();
}
@Override
protected void rollbackState(MasterProcedureEnv env, TState state)
throws IOException, InterruptedException {
if (state == getInitialState()) {
// actually the peer related operations has no rollback, but if we haven't done any
// modifications on the peer storage yet, we can just return.
return;
}
throw new UnsupportedOperationException();
}
protected final void refreshPeer(MasterProcedureEnv env, PeerOperationType type) {
addChildProcedure(env.getMasterServices().getServerManager().getOnlineServersList().stream()
.map(sn -> new RefreshPeerProcedure(peerId, type, sn)).toArray(RefreshPeerProcedure[]::new));
}
@Override
protected synchronized boolean setTimeoutFailure(MasterProcedureEnv env) {
setState(ProcedureProtos.ProcedureState.RUNNABLE);
env.getProcedureScheduler().addFront(this);
return false;
}
protected final ProcedureSuspendedException suspend(long backoff)
throws ProcedureSuspendedException {
attempts++;
setTimeout(Math.toIntExact(backoff));
setState(ProcedureProtos.ProcedureState.WAITING_TIMEOUT);
skipPersistence();
throw new ProcedureSuspendedException();
}
// will be override in test to simulate error
@VisibleForTesting
protected void enablePeer(MasterProcedureEnv env) throws ReplicationException {

View File

@ -35,7 +35,7 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.R
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RecoverStandbyStateData;
@InterfaceAudience.Private
public class RecoverStandbyProcedure extends AbstractPeerProcedure<RecoverStandbyState> {
public class RecoverStandbyProcedure extends AbstractPeerNoLockProcedure<RecoverStandbyState> {
private static final Logger LOG = LoggerFactory.getLogger(RecoverStandbyProcedure.class);

View File

@ -36,7 +36,7 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.S
@InterfaceAudience.Private
public class SyncReplicationReplayWALProcedure
extends AbstractPeerProcedure<SyncReplicationReplayWALState> {
extends AbstractPeerNoLockProcedure<SyncReplicationReplayWALState> {
private static final Logger LOG =
LoggerFactory.getLogger(SyncReplicationReplayWALProcedure.class);