HBASE-21389 Revisit the procedure lock for sync replication
This commit is contained in:
parent
116eee6747
commit
02f5f171f5
|
@ -35,8 +35,11 @@ class PeerQueue extends Queue<String> {
|
||||||
}
|
}
|
||||||
|
|
||||||
private static boolean requirePeerExclusiveLock(PeerProcedureInterface proc) {
|
private static boolean requirePeerExclusiveLock(PeerProcedureInterface proc) {
|
||||||
return proc.getPeerOperationType() != PeerOperationType.REFRESH
|
// These procedures will only be used as sub procedures, and if they are scheduled, it always
|
||||||
&& proc.getPeerOperationType() != PeerOperationType.SYNC_REPLICATION_REPLAY_WAL
|
// means that the root procedure holds the xlock, so we do not need to hold any locks.
|
||||||
&& proc.getPeerOperationType() != PeerOperationType.SYNC_REPLICATION_REPLAY_WAL_REMOTE;
|
return proc.getPeerOperationType() != PeerOperationType.REFRESH &&
|
||||||
|
proc.getPeerOperationType() != PeerOperationType.RECOVER_STANDBY &&
|
||||||
|
proc.getPeerOperationType() != PeerOperationType.SYNC_REPLICATION_REPLAY_WAL &&
|
||||||
|
proc.getPeerOperationType() != PeerOperationType.SYNC_REPLICATION_REPLAY_WAL_REMOTE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,98 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hbase.master.replication;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
|
||||||
|
import org.apache.hadoop.hbase.master.procedure.PeerProcedureInterface;
|
||||||
|
import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
|
||||||
|
import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
|
||||||
|
import org.apache.hadoop.hbase.procedure2.StateMachineProcedure;
|
||||||
|
import org.apache.yetus.audience.InterfaceAudience;
|
||||||
|
|
||||||
|
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.PeerProcedureStateData;
|
||||||
|
import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Base class for replication peer related procedures which do not need to hold locks(for most of
|
||||||
|
* the sub procedures).
|
||||||
|
*/
|
||||||
|
@InterfaceAudience.Private
|
||||||
|
public abstract class AbstractPeerNoLockProcedure<TState>
|
||||||
|
extends StateMachineProcedure<MasterProcedureEnv, TState> implements PeerProcedureInterface {
|
||||||
|
|
||||||
|
protected String peerId;
|
||||||
|
|
||||||
|
protected int attempts;
|
||||||
|
|
||||||
|
protected AbstractPeerNoLockProcedure() {
|
||||||
|
}
|
||||||
|
|
||||||
|
protected AbstractPeerNoLockProcedure(String peerId) {
|
||||||
|
this.peerId = peerId;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getPeerId() {
|
||||||
|
return peerId;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected boolean waitInitialized(MasterProcedureEnv env) {
|
||||||
|
return env.waitInitialized(this);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void rollbackState(MasterProcedureEnv env, TState state)
|
||||||
|
throws IOException, InterruptedException {
|
||||||
|
if (state == getInitialState()) {
|
||||||
|
// actually the peer related operations has no rollback, but if we haven't done any
|
||||||
|
// modifications on the peer storage yet, we can just return.
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException {
|
||||||
|
super.serializeStateData(serializer);
|
||||||
|
serializer.serialize(PeerProcedureStateData.newBuilder().setPeerId(peerId).build());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException {
|
||||||
|
super.deserializeStateData(serializer);
|
||||||
|
peerId = serializer.deserialize(PeerProcedureStateData.class).getPeerId();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected synchronized boolean setTimeoutFailure(MasterProcedureEnv env) {
|
||||||
|
setState(ProcedureProtos.ProcedureState.RUNNABLE);
|
||||||
|
env.getProcedureScheduler().addFront(this);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected final ProcedureSuspendedException suspend(long backoff)
|
||||||
|
throws ProcedureSuspendedException {
|
||||||
|
attempts++;
|
||||||
|
setTimeout(Math.toIntExact(backoff));
|
||||||
|
setState(ProcedureProtos.ProcedureState.WAITING_TIMEOUT);
|
||||||
|
skipPersistence();
|
||||||
|
throw new ProcedureSuspendedException();
|
||||||
|
}
|
||||||
|
}
|
|
@ -17,41 +17,29 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.hadoop.hbase.master.replication;
|
package org.apache.hadoop.hbase.master.replication;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
|
import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
|
||||||
import org.apache.hadoop.hbase.master.procedure.PeerProcedureInterface;
|
import org.apache.hadoop.hbase.master.procedure.PeerProcedureInterface;
|
||||||
import org.apache.hadoop.hbase.master.procedure.ProcedurePrepareLatch;
|
import org.apache.hadoop.hbase.master.procedure.ProcedurePrepareLatch;
|
||||||
import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
|
|
||||||
import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
|
|
||||||
import org.apache.hadoop.hbase.procedure2.StateMachineProcedure;
|
|
||||||
import org.apache.hadoop.hbase.replication.ReplicationException;
|
import org.apache.hadoop.hbase.replication.ReplicationException;
|
||||||
import org.apache.yetus.audience.InterfaceAudience;
|
import org.apache.yetus.audience.InterfaceAudience;
|
||||||
|
|
||||||
import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
|
import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
|
||||||
|
|
||||||
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.PeerProcedureStateData;
|
|
||||||
import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The base class for all replication peer related procedure.
|
* The base class for all replication peer related procedure.
|
||||||
*/
|
*/
|
||||||
@InterfaceAudience.Private
|
@InterfaceAudience.Private
|
||||||
public abstract class AbstractPeerProcedure<TState>
|
public abstract class AbstractPeerProcedure<TState>
|
||||||
extends StateMachineProcedure<MasterProcedureEnv, TState> implements PeerProcedureInterface {
|
extends AbstractPeerNoLockProcedure<TState> implements PeerProcedureInterface {
|
||||||
|
|
||||||
protected String peerId;
|
|
||||||
|
|
||||||
// used to keep compatible with old client where we can only returns after updateStorage.
|
// used to keep compatible with old client where we can only returns after updateStorage.
|
||||||
protected ProcedurePrepareLatch latch;
|
protected ProcedurePrepareLatch latch;
|
||||||
|
|
||||||
protected int attempts;
|
|
||||||
|
|
||||||
protected AbstractPeerProcedure() {
|
protected AbstractPeerProcedure() {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected AbstractPeerProcedure(String peerId) {
|
protected AbstractPeerProcedure(String peerId) {
|
||||||
this.peerId = peerId;
|
super(peerId);
|
||||||
this.latch = ProcedurePrepareLatch.createLatch(2, 1);
|
this.latch = ProcedurePrepareLatch.createLatch(2, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -59,16 +47,6 @@ public abstract class AbstractPeerProcedure<TState>
|
||||||
return latch;
|
return latch;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getPeerId() {
|
|
||||||
return peerId;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected boolean waitInitialized(MasterProcedureEnv env) {
|
|
||||||
return env.waitInitialized(this);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected LockState acquireLock(MasterProcedureEnv env) {
|
protected LockState acquireLock(MasterProcedureEnv env) {
|
||||||
if (env.getProcedureScheduler().waitPeerExclusiveLock(this, peerId)) {
|
if (env.getProcedureScheduler().waitPeerExclusiveLock(this, peerId)) {
|
||||||
|
@ -87,50 +65,11 @@ public abstract class AbstractPeerProcedure<TState>
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException {
|
|
||||||
super.serializeStateData(serializer);
|
|
||||||
serializer.serialize(PeerProcedureStateData.newBuilder().setPeerId(peerId).build());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException {
|
|
||||||
super.deserializeStateData(serializer);
|
|
||||||
peerId = serializer.deserialize(PeerProcedureStateData.class).getPeerId();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected void rollbackState(MasterProcedureEnv env, TState state)
|
|
||||||
throws IOException, InterruptedException {
|
|
||||||
if (state == getInitialState()) {
|
|
||||||
// actually the peer related operations has no rollback, but if we haven't done any
|
|
||||||
// modifications on the peer storage yet, we can just return.
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
throw new UnsupportedOperationException();
|
|
||||||
}
|
|
||||||
|
|
||||||
protected final void refreshPeer(MasterProcedureEnv env, PeerOperationType type) {
|
protected final void refreshPeer(MasterProcedureEnv env, PeerOperationType type) {
|
||||||
addChildProcedure(env.getMasterServices().getServerManager().getOnlineServersList().stream()
|
addChildProcedure(env.getMasterServices().getServerManager().getOnlineServersList().stream()
|
||||||
.map(sn -> new RefreshPeerProcedure(peerId, type, sn)).toArray(RefreshPeerProcedure[]::new));
|
.map(sn -> new RefreshPeerProcedure(peerId, type, sn)).toArray(RefreshPeerProcedure[]::new));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
protected synchronized boolean setTimeoutFailure(MasterProcedureEnv env) {
|
|
||||||
setState(ProcedureProtos.ProcedureState.RUNNABLE);
|
|
||||||
env.getProcedureScheduler().addFront(this);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected final ProcedureSuspendedException suspend(long backoff)
|
|
||||||
throws ProcedureSuspendedException {
|
|
||||||
attempts++;
|
|
||||||
setTimeout(Math.toIntExact(backoff));
|
|
||||||
setState(ProcedureProtos.ProcedureState.WAITING_TIMEOUT);
|
|
||||||
skipPersistence();
|
|
||||||
throw new ProcedureSuspendedException();
|
|
||||||
}
|
|
||||||
|
|
||||||
// will be override in test to simulate error
|
// will be override in test to simulate error
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
protected void enablePeer(MasterProcedureEnv env) throws ReplicationException {
|
protected void enablePeer(MasterProcedureEnv env) throws ReplicationException {
|
||||||
|
|
|
@ -35,7 +35,7 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.R
|
||||||
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RecoverStandbyStateData;
|
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RecoverStandbyStateData;
|
||||||
|
|
||||||
@InterfaceAudience.Private
|
@InterfaceAudience.Private
|
||||||
public class RecoverStandbyProcedure extends AbstractPeerProcedure<RecoverStandbyState> {
|
public class RecoverStandbyProcedure extends AbstractPeerNoLockProcedure<RecoverStandbyState> {
|
||||||
|
|
||||||
private static final Logger LOG = LoggerFactory.getLogger(RecoverStandbyProcedure.class);
|
private static final Logger LOG = LoggerFactory.getLogger(RecoverStandbyProcedure.class);
|
||||||
|
|
||||||
|
|
|
@ -36,7 +36,7 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.S
|
||||||
|
|
||||||
@InterfaceAudience.Private
|
@InterfaceAudience.Private
|
||||||
public class SyncReplicationReplayWALProcedure
|
public class SyncReplicationReplayWALProcedure
|
||||||
extends AbstractPeerProcedure<SyncReplicationReplayWALState> {
|
extends AbstractPeerNoLockProcedure<SyncReplicationReplayWALState> {
|
||||||
|
|
||||||
private static final Logger LOG =
|
private static final Logger LOG =
|
||||||
LoggerFactory.getLogger(SyncReplicationReplayWALProcedure.class);
|
LoggerFactory.getLogger(SyncReplicationReplayWALProcedure.class);
|
||||||
|
|
Loading…
Reference in New Issue