HBASE-19957 General framework to transit sync replication state

This commit is contained in:
zhangduo 2018-02-09 18:33:28 +08:00
parent 00e54aae24
commit 39dd81a7c6
33 changed files with 729 additions and 299 deletions

View File

@ -15,7 +15,6 @@
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
package org.apache.hadoop.hbase.replication; package org.apache.hadoop.hbase.replication;
import java.util.Collection; import java.util.Collection;
@ -25,7 +24,6 @@ import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
import java.util.TreeMap; import java.util.TreeMap;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Bytes;

View File

@ -20,7 +20,10 @@ package org.apache.hadoop.hbase.replication;
import org.apache.yetus.audience.InterfaceAudience; import org.apache.yetus.audience.InterfaceAudience;
/** /**
* The POJO equivalent of ReplicationProtos.ReplicationPeerDescription * The POJO equivalent of ReplicationProtos.ReplicationPeerDescription.
* <p>
* To developer, here we do not store the new sync replication state since it is just an
* intermediate state and this class is public.
*/ */
@InterfaceAudience.Public @InterfaceAudience.Public
public class ReplicationPeerDescription { public class ReplicationPeerDescription {

View File

@ -29,14 +29,19 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.ReplicationProtos;
/** /**
* Used by synchronous replication. Indicate the state of the current cluster in a synchronous * Used by synchronous replication. Indicate the state of the current cluster in a synchronous
* replication peer. The state may be one of {@link SyncReplicationState#ACTIVE}, * replication peer. The state may be one of {@link SyncReplicationState#ACTIVE},
* {@link SyncReplicationState#DOWNGRADE_ACTIVE} or * {@link SyncReplicationState#DOWNGRADE_ACTIVE} or {@link SyncReplicationState#STANDBY}.
* {@link SyncReplicationState#STANDBY}.
* <p> * <p>
* For asynchronous replication, the state is {@link SyncReplicationState#NONE}. * For asynchronous replication, the state is {@link SyncReplicationState#NONE}.
*/ */
@InterfaceAudience.Public @InterfaceAudience.Public
public enum SyncReplicationState { public enum SyncReplicationState {
NONE, ACTIVE, DOWNGRADE_ACTIVE, STANDBY; NONE(0), ACTIVE(1), DOWNGRADE_ACTIVE(2), STANDBY(3);
private final byte value;
private SyncReplicationState(int value) {
this.value = (byte) value;
}
public static SyncReplicationState valueOf(int value) { public static SyncReplicationState valueOf(int value) {
switch (value) { switch (value) {
@ -53,13 +58,17 @@ public enum SyncReplicationState {
} }
} }
public int value() {
return value & 0xFF;
}
public static byte[] toByteArray(SyncReplicationState state) { public static byte[] toByteArray(SyncReplicationState state) {
return ProtobufUtil return ProtobufUtil
.prependPBMagic(ReplicationPeerConfigUtil.toSyncReplicationState(state).toByteArray()); .prependPBMagic(ReplicationPeerConfigUtil.toSyncReplicationState(state).toByteArray());
} }
public static SyncReplicationState parseFrom(byte[] bytes) throws InvalidProtocolBufferException { public static SyncReplicationState parseFrom(byte[] bytes) throws InvalidProtocolBufferException {
return ReplicationPeerConfigUtil.toSyncReplicationState(ReplicationProtos.SyncReplicationState return ReplicationPeerConfigUtil.toSyncReplicationState(ReplicationProtos.SyncReplicationState
.parseFrom(Arrays.copyOfRange(bytes, ProtobufUtil.lengthOfPBMagic(), bytes.length))); .parseFrom(Arrays.copyOfRange(bytes, ProtobufUtil.lengthOfPBMagic(), bytes.length)));
} }
} }

View File

@ -1355,6 +1355,9 @@ public final class HConstants {
public static final String NOT_IMPLEMENTED = "Not implemented"; public static final String NOT_IMPLEMENTED = "Not implemented";
// TODO: need to find a better place to hold it.
public static final String SYNC_REPLICATION_ENABLED = "hbase.replication.sync.enabled";
private HConstants() { private HConstants() {
// Can't be instantiated with this ctor. // Can't be instantiated with this ctor.
} }

View File

@ -391,6 +391,17 @@ enum PeerModificationState {
POST_PEER_MODIFICATION = 8; POST_PEER_MODIFICATION = 8;
} }
enum PeerSyncReplicationStateTransitionState {
PRE_PEER_SYNC_REPLICATION_STATE_TRANSITION = 1;
SET_PEER_NEW_SYNC_REPLICATION_STATE = 2;
REFRESH_PEER_SYNC_REPLICATION_STATE_ON_RS_BEGIN = 3;
REPLAY_REMOTE_WAL_IN_PEER = 4;
REOPEN_ALL_REGIONS_IN_PEER = 5;
TRANSIT_PEER_NEW_SYNC_REPLICATION_STATE = 6;
REFRESH_PEER_SYNC_REPLICATION_STATE_ON_RS_END = 7;
POST_PEER_SYNC_REPLICATION_STATE_TRANSITION = 8;
}
message PeerModificationStateData { message PeerModificationStateData {
required string peer_id = 1; required string peer_id = 1;
} }
@ -401,18 +412,23 @@ enum PeerModificationType {
ENABLE_PEER = 3; ENABLE_PEER = 3;
DISABLE_PEER = 4; DISABLE_PEER = 4;
UPDATE_PEER_CONFIG = 5; UPDATE_PEER_CONFIG = 5;
TRANSIT_SYNC_REPLICATION_STATE = 6;
} }
message RefreshPeerStateData { message RefreshPeerStateData {
required string peer_id = 1; required string peer_id = 1;
required PeerModificationType type = 2; required PeerModificationType type = 2;
required ServerName target_server = 3; required ServerName target_server = 3;
/** We need multiple stages for sync replication state transition **/
optional uint32 stage = 4 [default = 0];
} }
message RefreshPeerParameter { message RefreshPeerParameter {
required string peer_id = 1; required string peer_id = 1;
required PeerModificationType type = 2; required PeerModificationType type = 2;
required ServerName target_server = 3; required ServerName target_server = 3;
/** We need multiple stages for sync replication state transition **/
optional uint32 stage = 4 [default = 0];;
} }
message PeerProcedureStateData { message PeerProcedureStateData {
@ -459,5 +475,7 @@ message InitMetaStateData {
} }
message TransitPeerSyncReplicationStateStateData { message TransitPeerSyncReplicationStateStateData {
required SyncReplicationState syncReplicationState = 1; /** Could be null if we fail in pre check, so optional */
optional SyncReplicationState fromState = 1;
required SyncReplicationState toState = 2;
} }

View File

@ -23,6 +23,7 @@ import java.util.Map;
import java.util.Set; import java.util.Set;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.yetus.audience.InterfaceAudience; import org.apache.yetus.audience.InterfaceAudience;
@InterfaceAudience.Private @InterfaceAudience.Private
@ -36,7 +37,14 @@ public class ReplicationPeerImpl implements ReplicationPeer {
private volatile PeerState peerState; private volatile PeerState peerState;
private volatile SyncReplicationState syncReplicationState; // The lower 16 bits are the current sync replication state, the higher 16 bits are the new sync
// replication state. Embedded in one int so user can not get an inconsistency view of state and
// new state.
private volatile int syncReplicationStateBits;
private static final int SHIFT = 16;
private static final int AND_BITS = 0xFFFF;
private final List<ReplicationPeerConfigListener> peerConfigListeners; private final List<ReplicationPeerConfigListener> peerConfigListeners;
@ -48,12 +56,14 @@ public class ReplicationPeerImpl implements ReplicationPeer {
* @param peerConfig configuration for the replication peer * @param peerConfig configuration for the replication peer
*/ */
public ReplicationPeerImpl(Configuration conf, String id, ReplicationPeerConfig peerConfig, public ReplicationPeerImpl(Configuration conf, String id, ReplicationPeerConfig peerConfig,
boolean peerState, SyncReplicationState syncReplicationState) { boolean peerState, SyncReplicationState syncReplicationState,
SyncReplicationState newSyncReplicationState) {
this.conf = conf; this.conf = conf;
this.id = id; this.id = id;
this.peerState = peerState ? PeerState.ENABLED : PeerState.DISABLED; this.peerState = peerState ? PeerState.ENABLED : PeerState.DISABLED;
this.peerConfig = peerConfig; this.peerConfig = peerConfig;
this.syncReplicationState = syncReplicationState; this.syncReplicationStateBits =
syncReplicationState.value() | (newSyncReplicationState.value() << SHIFT);
this.peerConfigListeners = new ArrayList<>(); this.peerConfigListeners = new ArrayList<>();
} }
@ -66,6 +76,16 @@ public class ReplicationPeerImpl implements ReplicationPeer {
peerConfigListeners.forEach(listener -> listener.peerConfigUpdated(peerConfig)); peerConfigListeners.forEach(listener -> listener.peerConfigUpdated(peerConfig));
} }
public void setNewSyncReplicationState(SyncReplicationState newState) {
this.syncReplicationStateBits =
(this.syncReplicationStateBits & AND_BITS) | (newState.value() << SHIFT);
}
public void transitSyncReplicationState() {
this.syncReplicationStateBits =
(this.syncReplicationStateBits >>> SHIFT) | (SyncReplicationState.NONE.value() << SHIFT);
}
/** /**
* Get the identifier of this peer * Get the identifier of this peer
* @return string representation of the id (short) * @return string representation of the id (short)
@ -80,9 +100,26 @@ public class ReplicationPeerImpl implements ReplicationPeer {
return peerState; return peerState;
} }
private static SyncReplicationState getSyncReplicationState(int bits) {
return SyncReplicationState.valueOf(bits & AND_BITS);
}
private static SyncReplicationState getNewSyncReplicationState(int bits) {
return SyncReplicationState.valueOf(bits >>> SHIFT);
}
public Pair<SyncReplicationState, SyncReplicationState> getSyncReplicationStateAndNewState() {
int bits = this.syncReplicationStateBits;
return Pair.newPair(getSyncReplicationState(bits), getNewSyncReplicationState(bits));
}
public SyncReplicationState getNewSyncReplicationState() {
return getNewSyncReplicationState(syncReplicationStateBits);
}
@Override @Override
public SyncReplicationState getSyncReplicationState() { public SyncReplicationState getSyncReplicationState() {
return syncReplicationState; return getSyncReplicationState(syncReplicationStateBits);
} }
@Override @Override

View File

@ -18,7 +18,6 @@
package org.apache.hadoop.hbase.replication; package org.apache.hadoop.hbase.replication;
import java.util.List; import java.util.List;
import org.apache.yetus.audience.InterfaceAudience; import org.apache.yetus.audience.InterfaceAudience;
/** /**
@ -72,16 +71,30 @@ public interface ReplicationPeerStorage {
ReplicationPeerConfig getPeerConfig(String peerId) throws ReplicationException; ReplicationPeerConfig getPeerConfig(String peerId) throws ReplicationException;
/** /**
* Set the state of current cluster in a synchronous replication peer. * Set the new sync replication state that we are going to transit to.
* @throws ReplicationException if there are errors accessing the storage service. * @throws ReplicationException if there are errors accessing the storage service.
*/ */
void setPeerSyncReplicationState(String peerId, SyncReplicationState state) void setPeerNewSyncReplicationState(String peerId, SyncReplicationState state)
throws ReplicationException; throws ReplicationException;
/** /**
* Get the state of current cluster in a synchronous replication peer. * Overwrite the sync replication state with the new sync replication state which is set with the
* {@link #setPeerNewSyncReplicationState(String, SyncReplicationState)} method above, and clear
* the new sync replication state.
* @throws ReplicationException if there are errors accessing the storage service. * @throws ReplicationException if there are errors accessing the storage service.
*/ */
SyncReplicationState getPeerSyncReplicationState(String peerId) void transitPeerSyncReplicationState(String peerId) throws ReplicationException;
throws ReplicationException;
/**
* Get the sync replication state.
* @throws ReplicationException if there are errors accessing the storage service.
*/
SyncReplicationState getPeerSyncReplicationState(String peerId) throws ReplicationException;
/**
* Get the new sync replication state. Will return {@link SyncReplicationState#NONE} if we are
* not in a transition.
* @throws ReplicationException if there are errors accessing the storage service.
*/
SyncReplicationState getPeerNewSyncReplicationState(String peerId) throws ReplicationException;
} }

View File

@ -80,8 +80,8 @@ public class ReplicationPeers {
return true; return true;
} }
public void removePeer(String peerId) { public ReplicationPeerImpl removePeer(String peerId) {
peerCache.remove(peerId); return peerCache.remove(peerId);
} }
/** /**
@ -110,22 +110,29 @@ public class ReplicationPeers {
public PeerState refreshPeerState(String peerId) throws ReplicationException { public PeerState refreshPeerState(String peerId) throws ReplicationException {
ReplicationPeerImpl peer = peerCache.get(peerId); ReplicationPeerImpl peer = peerCache.get(peerId);
if (peer == null) {
throw new ReplicationException("Peer with id=" + peerId + " is not cached.");
}
peer.setPeerState(peerStorage.isPeerEnabled(peerId)); peer.setPeerState(peerStorage.isPeerEnabled(peerId));
return peer.getPeerState(); return peer.getPeerState();
} }
public ReplicationPeerConfig refreshPeerConfig(String peerId) throws ReplicationException { public ReplicationPeerConfig refreshPeerConfig(String peerId) throws ReplicationException {
ReplicationPeerImpl peer = peerCache.get(peerId); ReplicationPeerImpl peer = peerCache.get(peerId);
if (peer == null) {
throw new ReplicationException("Peer with id=" + peerId + " is not cached.");
}
peer.setPeerConfig(peerStorage.getPeerConfig(peerId)); peer.setPeerConfig(peerStorage.getPeerConfig(peerId));
return peer.getPeerConfig(); return peer.getPeerConfig();
} }
public SyncReplicationState refreshPeerNewSyncReplicationState(String peerId)
throws ReplicationException {
ReplicationPeerImpl peer = peerCache.get(peerId);
SyncReplicationState newState = peerStorage.getPeerNewSyncReplicationState(peerId);
peer.setNewSyncReplicationState(newState);
return newState;
}
public void transitPeerSyncReplicationState(String peerId) {
ReplicationPeerImpl peer = peerCache.get(peerId);
peer.transitSyncReplicationState();
}
/** /**
* Helper method to connect to a peer * Helper method to connect to a peer
* @param peerId peer's identifier * @param peerId peer's identifier
@ -135,7 +142,9 @@ public class ReplicationPeers {
ReplicationPeerConfig peerConfig = peerStorage.getPeerConfig(peerId); ReplicationPeerConfig peerConfig = peerStorage.getPeerConfig(peerId);
boolean enabled = peerStorage.isPeerEnabled(peerId); boolean enabled = peerStorage.isPeerEnabled(peerId);
SyncReplicationState syncReplicationState = peerStorage.getPeerSyncReplicationState(peerId); SyncReplicationState syncReplicationState = peerStorage.getPeerSyncReplicationState(peerId);
SyncReplicationState newSyncReplicationState =
peerStorage.getPeerNewSyncReplicationState(peerId);
return new ReplicationPeerImpl(ReplicationUtils.getPeerClusterConfiguration(peerConfig, conf), return new ReplicationPeerImpl(ReplicationUtils.getPeerClusterConfiguration(peerConfig, conf),
peerId, peerConfig, enabled, syncReplicationState); peerId, peerConfig, enabled, syncReplicationState, newSyncReplicationState);
} }
} }

View File

@ -53,7 +53,12 @@ public class ZKReplicationPeerStorage extends ZKReplicationStorageBase
public static final byte[] DISABLED_ZNODE_BYTES = public static final byte[] DISABLED_ZNODE_BYTES =
toByteArray(ReplicationProtos.ReplicationState.State.DISABLED); toByteArray(ReplicationProtos.ReplicationState.State.DISABLED);
public static final String SYNCHRONOUS_REPLICATION_STATE_ZNODE = "sync-rep-state"; public static final String SYNC_REPLICATION_STATE_ZNODE = "sync-rep-state";
public static final String NEW_SYNC_REPLICATION_STATE_ZNODE = "new-sync-rep-state";
public static final byte[] NONE_STATE_ZNODE_BYTES =
SyncReplicationState.toByteArray(SyncReplicationState.NONE);
/** /**
* The name of the znode that contains the replication status of a remote slave (i.e. peer) * The name of the znode that contains the replication status of a remote slave (i.e. peer)
@ -85,7 +90,11 @@ public class ZKReplicationPeerStorage extends ZKReplicationStorageBase
@VisibleForTesting @VisibleForTesting
public String getSyncReplicationStateNode(String peerId) { public String getSyncReplicationStateNode(String peerId) {
return ZNodePaths.joinZNode(getPeerNode(peerId), SYNCHRONOUS_REPLICATION_STATE_ZNODE); return ZNodePaths.joinZNode(getPeerNode(peerId), SYNC_REPLICATION_STATE_ZNODE);
}
private String getNewSyncReplicationStateNode(String peerId) {
return ZNodePaths.joinZNode(getPeerNode(peerId), NEW_SYNC_REPLICATION_STATE_ZNODE);
} }
@Override @Override
@ -97,14 +106,15 @@ public class ZKReplicationPeerStorage extends ZKReplicationStorageBase
ZKUtilOp.createAndFailSilent(getPeerStateNode(peerId), ZKUtilOp.createAndFailSilent(getPeerStateNode(peerId),
enabled ? ENABLED_ZNODE_BYTES : DISABLED_ZNODE_BYTES), enabled ? ENABLED_ZNODE_BYTES : DISABLED_ZNODE_BYTES),
ZKUtilOp.createAndFailSilent(getSyncReplicationStateNode(peerId), ZKUtilOp.createAndFailSilent(getSyncReplicationStateNode(peerId),
SyncReplicationState.toByteArray(syncReplicationState))); SyncReplicationState.toByteArray(syncReplicationState)),
ZKUtilOp.createAndFailSilent(getNewSyncReplicationStateNode(peerId), NONE_STATE_ZNODE_BYTES));
try { try {
ZKUtil.createWithParents(zookeeper, peersZNode); ZKUtil.createWithParents(zookeeper, peersZNode);
ZKUtil.multiOrSequential(zookeeper, multiOps, false); ZKUtil.multiOrSequential(zookeeper, multiOps, false);
} catch (KeeperException e) { } catch (KeeperException e) {
throw new ReplicationException( throw new ReplicationException(
"Could not add peer with id=" + peerId + ", peerConfig=>" + peerConfig + ", state=" + "Could not add peer with id=" + peerId + ", peerConfig=>" + peerConfig + ", state=" +
(enabled ? "ENABLED" : "DISABLED") + ", syncReplicationState=" + syncReplicationState, (enabled ? "ENABLED" : "DISABLED") + ", syncReplicationState=" + syncReplicationState,
e); e);
} }
} }
@ -136,7 +146,7 @@ public class ZKReplicationPeerStorage extends ZKReplicationStorageBase
ReplicationPeerConfigUtil.toByteArray(peerConfig)); ReplicationPeerConfigUtil.toByteArray(peerConfig));
} catch (KeeperException e) { } catch (KeeperException e) {
throw new ReplicationException( throw new ReplicationException(
"There was a problem trying to save changes to the " + "replication peer " + peerId, e); "There was a problem trying to save changes to the " + "replication peer " + peerId, e);
} }
} }
@ -170,38 +180,63 @@ public class ZKReplicationPeerStorage extends ZKReplicationStorageBase
} }
if (data == null || data.length == 0) { if (data == null || data.length == 0) {
throw new ReplicationException( throw new ReplicationException(
"Replication peer config data shouldn't be empty, peerId=" + peerId); "Replication peer config data shouldn't be empty, peerId=" + peerId);
} }
try { try {
return ReplicationPeerConfigUtil.parsePeerFrom(data); return ReplicationPeerConfigUtil.parsePeerFrom(data);
} catch (DeserializationException e) { } catch (DeserializationException e) {
throw new ReplicationException( throw new ReplicationException(
"Failed to parse replication peer config for peer with id=" + peerId, e); "Failed to parse replication peer config for peer with id=" + peerId, e);
} }
} }
@Override @Override
public void setPeerSyncReplicationState(String peerId, SyncReplicationState state) public void setPeerNewSyncReplicationState(String peerId, SyncReplicationState state)
throws ReplicationException { throws ReplicationException {
try { try {
ZKUtil.setData(zookeeper, getSyncReplicationStateNode(peerId), ZKUtil.createSetData(zookeeper, getNewSyncReplicationStateNode(peerId),
SyncReplicationState.toByteArray(state)); SyncReplicationState.toByteArray(state));
} catch (KeeperException e) { } catch (KeeperException e) {
throw new ReplicationException( throw new ReplicationException(
"Unable to change the cluster state for the synchronous replication peer with id=" + peerId, "Unable to set the new sync replication state for peer with id=" + peerId, e);
e);
} }
} }
@Override
public void transitPeerSyncReplicationState(String peerId) throws ReplicationException {
String newStateNode = getNewSyncReplicationStateNode(peerId);
try {
byte[] data = ZKUtil.getData(zookeeper, newStateNode);
ZKUtil.multiOrSequential(zookeeper,
Arrays.asList(ZKUtilOp.setData(newStateNode, NONE_STATE_ZNODE_BYTES),
ZKUtilOp.setData(getSyncReplicationStateNode(peerId), data)),
false);
} catch (KeeperException | InterruptedException e) {
throw new ReplicationException(
"Error transiting sync replication state for peer with id=" + peerId, e);
}
}
private SyncReplicationState getSyncReplicationState(String peerId, String path)
throws ReplicationException {
try {
byte[] data = ZKUtil.getData(zookeeper, path);
return SyncReplicationState.parseFrom(data);
} catch (KeeperException | InterruptedException | IOException e) {
throw new ReplicationException(
"Error getting sync replication state of path " + path + " for peer with id=" + peerId, e);
}
}
@Override
public SyncReplicationState getPeerNewSyncReplicationState(String peerId)
throws ReplicationException {
return getSyncReplicationState(peerId, getNewSyncReplicationStateNode(peerId));
}
@Override @Override
public SyncReplicationState getPeerSyncReplicationState(String peerId) public SyncReplicationState getPeerSyncReplicationState(String peerId)
throws ReplicationException { throws ReplicationException {
try { return getSyncReplicationState(peerId, getSyncReplicationStateNode(peerId));
byte[] data = ZKUtil.getData(zookeeper, getSyncReplicationStateNode(peerId));
return SyncReplicationState.parseFrom(data);
} catch (KeeperException | InterruptedException | IOException e) {
throw new ReplicationException(
"Error getting cluster state for the synchronous replication peer with id=" + peerId, e);
}
} }
} }

View File

@ -1395,7 +1395,7 @@ public interface MasterObserver {
* Called before transit current cluster state for the specified synchronous replication peer * Called before transit current cluster state for the specified synchronous replication peer
* @param ctx the environment to interact with the framework and master * @param ctx the environment to interact with the framework and master
* @param peerId a short name that identifies the peer * @param peerId a short name that identifies the peer
* @param state a new state * @param state the new state
*/ */
default void preTransitReplicationPeerSyncReplicationState( default void preTransitReplicationPeerSyncReplicationState(
final ObserverContext<MasterCoprocessorEnvironment> ctx, String peerId, final ObserverContext<MasterCoprocessorEnvironment> ctx, String peerId,
@ -1406,11 +1406,12 @@ public interface MasterObserver {
* Called after transit current cluster state for the specified synchronous replication peer * Called after transit current cluster state for the specified synchronous replication peer
* @param ctx the environment to interact with the framework and master * @param ctx the environment to interact with the framework and master
* @param peerId a short name that identifies the peer * @param peerId a short name that identifies the peer
* @param state a new state * @param from the old state
* @param to the new state
*/ */
default void postTransitReplicationPeerSyncReplicationState( default void postTransitReplicationPeerSyncReplicationState(
final ObserverContext<MasterCoprocessorEnvironment> ctx, String peerId, final ObserverContext<MasterCoprocessorEnvironment> ctx, String peerId,
SyncReplicationState state) throws IOException { SyncReplicationState from, SyncReplicationState to) throws IOException {
} }
/** /**

View File

@ -133,10 +133,10 @@ import org.apache.hadoop.hbase.master.procedure.ModifyTableProcedure;
import org.apache.hadoop.hbase.master.procedure.ProcedurePrepareLatch; import org.apache.hadoop.hbase.master.procedure.ProcedurePrepareLatch;
import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure; import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure;
import org.apache.hadoop.hbase.master.procedure.TruncateTableProcedure; import org.apache.hadoop.hbase.master.procedure.TruncateTableProcedure;
import org.apache.hadoop.hbase.master.replication.AbstractPeerProcedure;
import org.apache.hadoop.hbase.master.replication.AddPeerProcedure; import org.apache.hadoop.hbase.master.replication.AddPeerProcedure;
import org.apache.hadoop.hbase.master.replication.DisablePeerProcedure; import org.apache.hadoop.hbase.master.replication.DisablePeerProcedure;
import org.apache.hadoop.hbase.master.replication.EnablePeerProcedure; import org.apache.hadoop.hbase.master.replication.EnablePeerProcedure;
import org.apache.hadoop.hbase.master.replication.ModifyPeerProcedure;
import org.apache.hadoop.hbase.master.replication.RemovePeerProcedure; import org.apache.hadoop.hbase.master.replication.RemovePeerProcedure;
import org.apache.hadoop.hbase.master.replication.ReplicationPeerManager; import org.apache.hadoop.hbase.master.replication.ReplicationPeerManager;
import org.apache.hadoop.hbase.master.replication.TransitPeerSyncReplicationStateProcedure; import org.apache.hadoop.hbase.master.replication.TransitPeerSyncReplicationStateProcedure;
@ -3493,7 +3493,7 @@ public class HMaster extends HRegionServer implements MasterServices {
return favoredNodesManager; return favoredNodesManager;
} }
private long executePeerProcedure(ModifyPeerProcedure procedure) throws IOException { private long executePeerProcedure(AbstractPeerProcedure<?> procedure) throws IOException {
long procId = procedureExecutor.submitProcedure(procedure); long procId = procedureExecutor.submitProcedure(procedure);
procedure.getLatch().await(); procedure.getLatch().await();
return procId; return procId;

View File

@ -1607,22 +1607,22 @@ public class MasterCoprocessorHost
}); });
} }
public void preTransitReplicationPeerSyncReplicationState(final String peerId, public void preTransitReplicationPeerSyncReplicationState(String peerId,
final SyncReplicationState clusterState) throws IOException { SyncReplicationState state) throws IOException {
execOperation(coprocEnvironments.isEmpty() ? null : new MasterObserverOperation() { execOperation(coprocEnvironments.isEmpty() ? null : new MasterObserverOperation() {
@Override @Override
public void call(MasterObserver observer) throws IOException { public void call(MasterObserver observer) throws IOException {
observer.preTransitReplicationPeerSyncReplicationState(this, peerId, clusterState); observer.preTransitReplicationPeerSyncReplicationState(this, peerId, state);
} }
}); });
} }
public void postTransitReplicationPeerSyncReplicationState(final String peerId, public void postTransitReplicationPeerSyncReplicationState(String peerId,
final SyncReplicationState clusterState) throws IOException { SyncReplicationState from, SyncReplicationState to) throws IOException {
execOperation(coprocEnvironments.isEmpty() ? null : new MasterObserverOperation() { execOperation(coprocEnvironments.isEmpty() ? null : new MasterObserverOperation() {
@Override @Override
public void call(MasterObserver observer) throws IOException { public void call(MasterObserver observer) throws IOException {
observer.postTransitReplicationPeerSyncReplicationState(this, peerId, clusterState); observer.postTransitReplicationPeerSyncReplicationState(this, peerId, from, to);
} }
}); });
} }

View File

@ -46,7 +46,7 @@ public abstract class AbstractPeerProcedure<TState>
protected AbstractPeerProcedure(String peerId) { protected AbstractPeerProcedure(String peerId) {
this.peerId = peerId; this.peerId = peerId;
this.latch = ProcedurePrepareLatch.createLatch(2, 0); this.latch = ProcedurePrepareLatch.createLatch(2, 1);
} }
public ProcedurePrepareLatch getLatch() { public ProcedurePrepareLatch getLatch() {
@ -94,4 +94,16 @@ public abstract class AbstractPeerProcedure<TState>
super.deserializeStateData(serializer); super.deserializeStateData(serializer);
peerId = serializer.deserialize(PeerProcedureStateData.class).getPeerId(); peerId = serializer.deserialize(PeerProcedureStateData.class).getPeerId();
} }
@Override
protected void rollbackState(MasterProcedureEnv env, TState state)
throws IOException, InterruptedException {
if (state == getInitialState()) {
// actually the peer related operations has no rollback, but if we haven't done any
// modifications on the peer storage yet, we can just return.
return;
}
throw new UnsupportedOperationException();
}
} }

View File

@ -327,17 +327,6 @@ public abstract class ModifyPeerProcedure extends AbstractPeerProcedure<PeerModi
} }
} }
@Override
protected void rollbackState(MasterProcedureEnv env, PeerModificationState state)
throws IOException, InterruptedException {
if (state == PeerModificationState.PRE_PEER_MODIFICATION) {
// actually the peer related operations has no rollback, but if we haven't done any
// modifications on the peer storage yet, we can just return.
return;
}
throw new UnsupportedOperationException();
}
@Override @Override
protected PeerModificationState getState(int stateId) { protected PeerModificationState getState(int stateId) {
return PeerModificationState.forNumber(stateId); return PeerModificationState.forNumber(stateId);

View File

@ -55,6 +55,8 @@ public class RefreshPeerProcedure extends Procedure<MasterProcedureEnv>
justification = "Will never change after construction") justification = "Will never change after construction")
private ServerName targetServer; private ServerName targetServer;
private int stage;
private boolean dispatched; private boolean dispatched;
private ProcedureEvent<?> event; private ProcedureEvent<?> event;
@ -65,9 +67,15 @@ public class RefreshPeerProcedure extends Procedure<MasterProcedureEnv>
} }
public RefreshPeerProcedure(String peerId, PeerOperationType type, ServerName targetServer) { public RefreshPeerProcedure(String peerId, PeerOperationType type, ServerName targetServer) {
this(peerId, type, targetServer, 0);
}
public RefreshPeerProcedure(String peerId, PeerOperationType type, ServerName targetServer,
int stage) {
this.peerId = peerId; this.peerId = peerId;
this.type = type; this.type = type;
this.targetServer = targetServer; this.targetServer = targetServer;
this.stage = stage;
} }
@Override @Override
@ -92,6 +100,8 @@ public class RefreshPeerProcedure extends Procedure<MasterProcedureEnv>
return PeerModificationType.DISABLE_PEER; return PeerModificationType.DISABLE_PEER;
case UPDATE_CONFIG: case UPDATE_CONFIG:
return PeerModificationType.UPDATE_PEER_CONFIG; return PeerModificationType.UPDATE_PEER_CONFIG;
case TRANSIT_SYNC_REPLICATION_STATE:
return PeerModificationType.TRANSIT_SYNC_REPLICATION_STATE;
default: default:
throw new IllegalArgumentException("Unknown type: " + type); throw new IllegalArgumentException("Unknown type: " + type);
} }
@ -109,6 +119,8 @@ public class RefreshPeerProcedure extends Procedure<MasterProcedureEnv>
return PeerOperationType.DISABLE; return PeerOperationType.DISABLE;
case UPDATE_PEER_CONFIG: case UPDATE_PEER_CONFIG:
return PeerOperationType.UPDATE_CONFIG; return PeerOperationType.UPDATE_CONFIG;
case TRANSIT_SYNC_REPLICATION_STATE:
return PeerOperationType.TRANSIT_SYNC_REPLICATION_STATE;
default: default:
throw new IllegalArgumentException("Unknown type: " + type); throw new IllegalArgumentException("Unknown type: " + type);
} }
@ -119,7 +131,8 @@ public class RefreshPeerProcedure extends Procedure<MasterProcedureEnv>
assert targetServer.equals(remote); assert targetServer.equals(remote);
return new ServerOperation(this, getProcId(), RefreshPeerCallable.class, return new ServerOperation(this, getProcId(), RefreshPeerCallable.class,
RefreshPeerParameter.newBuilder().setPeerId(peerId).setType(toPeerModificationType(type)) RefreshPeerParameter.newBuilder().setPeerId(peerId).setType(toPeerModificationType(type))
.setTargetServer(ProtobufUtil.toServerName(remote)).build().toByteArray()); .setTargetServer(ProtobufUtil.toServerName(remote)).setStage(stage).build()
.toByteArray());
} }
private void complete(MasterProcedureEnv env, Throwable error) { private void complete(MasterProcedureEnv env, Throwable error) {
@ -196,7 +209,7 @@ public class RefreshPeerProcedure extends Procedure<MasterProcedureEnv>
protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException { protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException {
serializer.serialize( serializer.serialize(
RefreshPeerStateData.newBuilder().setPeerId(peerId).setType(toPeerModificationType(type)) RefreshPeerStateData.newBuilder().setPeerId(peerId).setType(toPeerModificationType(type))
.setTargetServer(ProtobufUtil.toServerName(targetServer)).build()); .setTargetServer(ProtobufUtil.toServerName(targetServer)).setStage(stage).build());
} }
@Override @Override
@ -205,5 +218,6 @@ public class RefreshPeerProcedure extends Procedure<MasterProcedureEnv>
peerId = data.getPeerId(); peerId = data.getPeerId();
type = toPeerOperationType(data.getType()); type = toPeerOperationType(data.getType());
targetServer = ProtobufUtil.toServerName(data.getTargetServer()); targetServer = ProtobufUtil.toServerName(data.getTargetServer());
stage = data.getStage();
} }
} }

View File

@ -20,7 +20,6 @@ package org.apache.hadoop.hbase.master.replication;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.EnumMap;
import java.util.EnumSet; import java.util.EnumSet;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
@ -50,6 +49,9 @@ import org.apache.hadoop.hbase.zookeeper.ZKConfig;
import org.apache.hadoop.hbase.zookeeper.ZKWatcher; import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
import org.apache.yetus.audience.InterfaceAudience; import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableMap;
import org.apache.hbase.thirdparty.com.google.common.collect.Maps;
/** /**
* Manages and performs all replication admin operations. * Manages and performs all replication admin operations.
* <p> * <p>
@ -64,15 +66,11 @@ public class ReplicationPeerManager {
private final ConcurrentMap<String, ReplicationPeerDescription> peers; private final ConcurrentMap<String, ReplicationPeerDescription> peers;
private final EnumMap<SyncReplicationState, EnumSet<SyncReplicationState>> allowedTransition = private final ImmutableMap<SyncReplicationState, EnumSet<SyncReplicationState>>
new EnumMap<SyncReplicationState, EnumSet<SyncReplicationState>>(SyncReplicationState.class) { allowedTransition = Maps.immutableEnumMap(ImmutableMap.of(SyncReplicationState.ACTIVE,
{ EnumSet.of(SyncReplicationState.DOWNGRADE_ACTIVE), SyncReplicationState.STANDBY,
put(SyncReplicationState.ACTIVE, EnumSet.of(SyncReplicationState.DOWNGRADE_ACTIVE)); EnumSet.of(SyncReplicationState.DOWNGRADE_ACTIVE), SyncReplicationState.DOWNGRADE_ACTIVE,
put(SyncReplicationState.STANDBY, EnumSet.of(SyncReplicationState.DOWNGRADE_ACTIVE)); EnumSet.of(SyncReplicationState.STANDBY, SyncReplicationState.ACTIVE)));
put(SyncReplicationState.DOWNGRADE_ACTIVE,
EnumSet.of(SyncReplicationState.STANDBY, SyncReplicationState.ACTIVE));
}
};
ReplicationPeerManager(ReplicationPeerStorage peerStorage, ReplicationQueueStorage queueStorage, ReplicationPeerManager(ReplicationPeerStorage peerStorage, ReplicationQueueStorage queueStorage,
ConcurrentMap<String, ReplicationPeerDescription> peers) { ConcurrentMap<String, ReplicationPeerDescription> peers) {
@ -165,9 +163,9 @@ public class ReplicationPeerManager {
if (!isStringEquals(peerConfig.getRemoteWALDir(), oldPeerConfig.getRemoteWALDir())) { if (!isStringEquals(peerConfig.getRemoteWALDir(), oldPeerConfig.getRemoteWALDir())) {
throw new DoNotRetryIOException( throw new DoNotRetryIOException(
"Changing the remote wal dir on an existing peer is not allowed. Existing remote wal " + "Changing the remote wal dir on an existing peer is not allowed. Existing remote wal " +
"dir '" + oldPeerConfig.getRemoteWALDir() + "' for peer " + peerId + "dir '" + oldPeerConfig.getRemoteWALDir() + "' for peer " + peerId +
" does not match new remote wal dir '" + peerConfig.getRemoteWALDir() + "'"); " does not match new remote wal dir '" + peerConfig.getRemoteWALDir() + "'");
} }
if (oldPeerConfig.isSyncReplication()) { if (oldPeerConfig.isSyncReplication()) {
@ -180,15 +178,19 @@ public class ReplicationPeerManager {
return desc; return desc;
} }
public void preTransitPeerSyncReplicationState(String peerId, SyncReplicationState state) /**
throws DoNotRetryIOException { * @return the old state.
*/
public SyncReplicationState preTransitPeerSyncReplicationState(String peerId,
SyncReplicationState state) throws DoNotRetryIOException {
ReplicationPeerDescription desc = checkPeerExists(peerId); ReplicationPeerDescription desc = checkPeerExists(peerId);
SyncReplicationState fromState = desc.getSyncReplicationState(); SyncReplicationState fromState = desc.getSyncReplicationState();
EnumSet<SyncReplicationState> allowedToStates = allowedTransition.get(fromState); EnumSet<SyncReplicationState> allowedToStates = allowedTransition.get(fromState);
if (allowedToStates == null || !allowedToStates.contains(state)) { if (allowedToStates == null || !allowedToStates.contains(state)) {
throw new DoNotRetryIOException("Can not transit current cluster state from " + fromState + throw new DoNotRetryIOException("Can not transit current cluster state from " + fromState +
" to " + state + " for peer id=" + peerId); " to " + state + " for peer id=" + peerId);
} }
return fromState;
} }
public void addPeer(String peerId, ReplicationPeerConfig peerConfig, boolean enabled) public void addPeer(String peerId, ReplicationPeerConfig peerConfig, boolean enabled)
@ -199,8 +201,8 @@ public class ReplicationPeerManager {
} }
ReplicationPeerConfig copiedPeerConfig = ReplicationPeerConfig.newBuilder(peerConfig).build(); ReplicationPeerConfig copiedPeerConfig = ReplicationPeerConfig.newBuilder(peerConfig).build();
SyncReplicationState syncReplicationState = SyncReplicationState syncReplicationState =
copiedPeerConfig.isSyncReplication() ? SyncReplicationState.DOWNGRADE_ACTIVE copiedPeerConfig.isSyncReplication() ? SyncReplicationState.DOWNGRADE_ACTIVE
: SyncReplicationState.NONE; : SyncReplicationState.NONE;
peerStorage.addPeer(peerId, copiedPeerConfig, enabled, syncReplicationState); peerStorage.addPeer(peerId, copiedPeerConfig, enabled, syncReplicationState);
peers.put(peerId, peers.put(peerId,
new ReplicationPeerDescription(peerId, enabled, copiedPeerConfig, syncReplicationState)); new ReplicationPeerDescription(peerId, enabled, copiedPeerConfig, syncReplicationState));
@ -240,7 +242,7 @@ public class ReplicationPeerManager {
ReplicationPeerDescription desc = peers.get(peerId); ReplicationPeerDescription desc = peers.get(peerId);
ReplicationPeerConfig oldPeerConfig = desc.getPeerConfig(); ReplicationPeerConfig oldPeerConfig = desc.getPeerConfig();
ReplicationPeerConfigBuilder newPeerConfigBuilder = ReplicationPeerConfigBuilder newPeerConfigBuilder =
ReplicationPeerConfig.newBuilder(peerConfig); ReplicationPeerConfig.newBuilder(peerConfig);
// we need to use the new conf to overwrite the old one. // we need to use the new conf to overwrite the old one.
newPeerConfigBuilder.putAllConfiguration(oldPeerConfig.getConfiguration()); newPeerConfigBuilder.putAllConfiguration(oldPeerConfig.getConfiguration());
newPeerConfigBuilder.putAllConfiguration(peerConfig.getConfiguration()); newPeerConfigBuilder.putAllConfiguration(peerConfig.getConfiguration());
@ -257,7 +259,7 @@ public class ReplicationPeerManager {
return new ArrayList<>(peers.values()); return new ArrayList<>(peers.values());
} }
return peers.values().stream().filter(r -> pattern.matcher(r.getPeerId()).matches()) return peers.values().stream().filter(r -> pattern.matcher(r.getPeerId()).matches())
.collect(Collectors.toList()); .collect(Collectors.toList());
} }
public Optional<ReplicationPeerConfig> getPeerConfig(String peerId) { public Optional<ReplicationPeerConfig> getPeerConfig(String peerId) {
@ -269,12 +271,23 @@ public class ReplicationPeerManager {
queueStorage.removeLastSequenceIds(peerId); queueStorage.removeLastSequenceIds(peerId);
} }
public void transitPeerSyncReplicationState(String peerId, SyncReplicationState state) public void setPeerNewSyncReplicationState(String peerId, SyncReplicationState state)
throws ReplicationException { throws ReplicationException {
peerStorage.setPeerNewSyncReplicationState(peerId, state);
}
public void transitPeerSyncReplicationState(String peerId, SyncReplicationState newState)
throws ReplicationException {
if (peerStorage.getPeerNewSyncReplicationState(peerId) != SyncReplicationState.NONE) {
// Only transit if this is not a retry
peerStorage.transitPeerSyncReplicationState(peerId);
}
ReplicationPeerDescription desc = peers.get(peerId); ReplicationPeerDescription desc = peers.get(peerId);
peerStorage.setPeerSyncReplicationState(peerId, state); if (desc.getSyncReplicationState() != newState) {
peers.put(peerId, // Only recreate the desc if this is not a retry
new ReplicationPeerDescription(peerId, desc.isEnabled(), desc.getPeerConfig(), state)); peers.put(peerId,
new ReplicationPeerDescription(peerId, desc.isEnabled(), desc.getPeerConfig(), newState));
}
} }
public void removeAllQueuesAndHFileRefs(String peerId) throws ReplicationException { public void removeAllQueuesAndHFileRefs(String peerId) throws ReplicationException {
@ -301,10 +314,10 @@ public class ReplicationPeerManager {
// If replicate_all flag is true, it means all user tables will be replicated to peer cluster. // If replicate_all flag is true, it means all user tables will be replicated to peer cluster.
// Then allow config exclude namespaces or exclude table-cfs which can't be replicated to peer // Then allow config exclude namespaces or exclude table-cfs which can't be replicated to peer
// cluster. // cluster.
if ((peerConfig.getNamespaces() != null && !peerConfig.getNamespaces().isEmpty()) if ((peerConfig.getNamespaces() != null && !peerConfig.getNamespaces().isEmpty()) ||
|| (peerConfig.getTableCFsMap() != null && !peerConfig.getTableCFsMap().isEmpty())) { (peerConfig.getTableCFsMap() != null && !peerConfig.getTableCFsMap().isEmpty())) {
throw new DoNotRetryIOException("Need clean namespaces or table-cfs config firstly " throw new DoNotRetryIOException("Need clean namespaces or table-cfs config firstly " +
+ "when you want replicate all cluster"); "when you want replicate all cluster");
} }
checkNamespacesAndTableCfsConfigConflict(peerConfig.getExcludeNamespaces(), checkNamespacesAndTableCfsConfigConflict(peerConfig.getExcludeNamespaces(),
peerConfig.getExcludeTableCFsMap()); peerConfig.getExcludeTableCFsMap());
@ -312,13 +325,13 @@ public class ReplicationPeerManager {
// If replicate_all flag is false, it means all user tables can't be replicated to peer // If replicate_all flag is false, it means all user tables can't be replicated to peer
// cluster. Then allow to config namespaces or table-cfs which will be replicated to peer // cluster. Then allow to config namespaces or table-cfs which will be replicated to peer
// cluster. // cluster.
if ((peerConfig.getExcludeNamespaces() != null if ((peerConfig.getExcludeNamespaces() != null &&
&& !peerConfig.getExcludeNamespaces().isEmpty()) !peerConfig.getExcludeNamespaces().isEmpty()) ||
|| (peerConfig.getExcludeTableCFsMap() != null (peerConfig.getExcludeTableCFsMap() != null &&
&& !peerConfig.getExcludeTableCFsMap().isEmpty())) { !peerConfig.getExcludeTableCFsMap().isEmpty())) {
throw new DoNotRetryIOException( throw new DoNotRetryIOException(
"Need clean exclude-namespaces or exclude-table-cfs config firstly" "Need clean exclude-namespaces or exclude-table-cfs config firstly" +
+ " when replicate_all flag is false"); " when replicate_all flag is false");
} }
checkNamespacesAndTableCfsConfigConflict(peerConfig.getNamespaces(), checkNamespacesAndTableCfsConfigConflict(peerConfig.getNamespaces(),
peerConfig.getTableCFsMap()); peerConfig.getTableCFsMap());
@ -338,11 +351,11 @@ public class ReplicationPeerManager {
// TODO: Add namespace, replicat_all flag back // TODO: Add namespace, replicat_all flag back
if (peerConfig.replicateAllUserTables()) { if (peerConfig.replicateAllUserTables()) {
throw new DoNotRetryIOException( throw new DoNotRetryIOException(
"Only support replicated table config for sync replication peer"); "Only support replicated table config for sync replication peer");
} }
if (peerConfig.getNamespaces() != null && !peerConfig.getNamespaces().isEmpty()) { if (peerConfig.getNamespaces() != null && !peerConfig.getNamespaces().isEmpty()) {
throw new DoNotRetryIOException( throw new DoNotRetryIOException(
"Only support replicated table config for sync replication peer"); "Only support replicated table config for sync replication peer");
} }
if (peerConfig.getTableCFsMap() == null || peerConfig.getTableCFsMap().isEmpty()) { if (peerConfig.getTableCFsMap() == null || peerConfig.getTableCFsMap().isEmpty()) {
throw new DoNotRetryIOException("Need config replicated tables for sync replication peer"); throw new DoNotRetryIOException("Need config replicated tables for sync replication peer");
@ -350,7 +363,7 @@ public class ReplicationPeerManager {
for (List<String> cfs : peerConfig.getTableCFsMap().values()) { for (List<String> cfs : peerConfig.getTableCFsMap().values()) {
if (cfs != null && !cfs.isEmpty()) { if (cfs != null && !cfs.isEmpty()) {
throw new DoNotRetryIOException( throw new DoNotRetryIOException(
"Only support replicated table config for sync replication peer"); "Only support replicated table config for sync replication peer");
} }
} }
} }
@ -394,7 +407,7 @@ public class ReplicationPeerManager {
private void checkConfiguredWALEntryFilters(ReplicationPeerConfig peerConfig) private void checkConfiguredWALEntryFilters(ReplicationPeerConfig peerConfig)
throws DoNotRetryIOException { throws DoNotRetryIOException {
String filterCSV = peerConfig.getConfiguration() String filterCSV = peerConfig.getConfiguration()
.get(BaseReplicationEndpoint.REPLICATION_WALENTRYFILTER_CONFIG_KEY); .get(BaseReplicationEndpoint.REPLICATION_WALENTRYFILTER_CONFIG_KEY);
if (filterCSV != null && !filterCSV.isEmpty()) { if (filterCSV != null && !filterCSV.isEmpty()) {
String[] filters = filterCSV.split(","); String[] filters = filterCSV.split(",");
for (String filter : filters) { for (String filter : filters) {

View File

@ -18,11 +18,12 @@
package org.apache.hadoop.hbase.master.replication; package org.apache.hadoop.hbase.master.replication;
import java.io.IOException; import java.io.IOException;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.client.replication.ReplicationPeerConfigUtil; import org.apache.hadoop.hbase.client.replication.ReplicationPeerConfigUtil;
import org.apache.hadoop.hbase.master.MasterCoprocessorHost; import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
import org.apache.hadoop.hbase.master.procedure.ProcedurePrepareLatch;
import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer; import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException; import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
import org.apache.hadoop.hbase.procedure2.ProcedureYieldException; import org.apache.hadoop.hbase.procedure2.ProcedureYieldException;
@ -32,26 +33,29 @@ import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.PeerModificationState; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.PeerSyncReplicationStateTransitionState;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.TransitPeerSyncReplicationStateStateData; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.TransitPeerSyncReplicationStateStateData;
/** /**
* The procedure for transit current cluster state for a synchronous replication peer. * The procedure for transit current sync replication state for a synchronous replication peer.
*/ */
@InterfaceAudience.Private @InterfaceAudience.Private
public class TransitPeerSyncReplicationStateProcedure extends ModifyPeerProcedure { public class TransitPeerSyncReplicationStateProcedure
extends AbstractPeerProcedure<PeerSyncReplicationStateTransitionState> {
private static final Logger LOG = private static final Logger LOG =
LoggerFactory.getLogger(TransitPeerSyncReplicationStateProcedure.class); LoggerFactory.getLogger(TransitPeerSyncReplicationStateProcedure.class);
private SyncReplicationState state; private SyncReplicationState fromState;
private SyncReplicationState toState;
public TransitPeerSyncReplicationStateProcedure() { public TransitPeerSyncReplicationStateProcedure() {
} }
public TransitPeerSyncReplicationStateProcedure(String peerId, SyncReplicationState state) { public TransitPeerSyncReplicationStateProcedure(String peerId, SyncReplicationState state) {
super(peerId); super(peerId);
this.state = state; this.toState = state;
} }
@Override @Override
@ -59,100 +63,155 @@ public class TransitPeerSyncReplicationStateProcedure extends ModifyPeerProcedur
return PeerOperationType.TRANSIT_SYNC_REPLICATION_STATE; return PeerOperationType.TRANSIT_SYNC_REPLICATION_STATE;
} }
@Override
protected void prePeerModification(MasterProcedureEnv env)
throws IOException, ReplicationException {
MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
if (cpHost != null) {
cpHost.preTransitReplicationPeerSyncReplicationState(peerId, state);
}
env.getReplicationPeerManager().preTransitPeerSyncReplicationState(peerId, state);
}
@Override
protected void updatePeerStorage(MasterProcedureEnv env) throws ReplicationException {
env.getReplicationPeerManager().transitPeerSyncReplicationState(peerId, state);
}
@Override
protected void postPeerModification(MasterProcedureEnv env)
throws IOException, ReplicationException {
LOG.info("Successfully transit current cluster state to {} in synchronous replication peer {}",
state, peerId);
MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
if (cpHost != null) {
env.getMasterCoprocessorHost().postTransitReplicationPeerSyncReplicationState(peerId, state);
}
}
@Override @Override
protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException { protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException {
super.serializeStateData(serializer); super.serializeStateData(serializer);
serializer.serialize(TransitPeerSyncReplicationStateStateData.newBuilder() TransitPeerSyncReplicationStateStateData.Builder builder =
.setSyncReplicationState(ReplicationPeerConfigUtil.toSyncReplicationState(state)).build()); TransitPeerSyncReplicationStateStateData.newBuilder()
.setToState(ReplicationPeerConfigUtil.toSyncReplicationState(toState));
if (fromState != null) {
builder.setFromState(ReplicationPeerConfigUtil.toSyncReplicationState(fromState));
}
serializer.serialize(builder.build());
} }
@Override @Override
protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException { protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException {
super.deserializeStateData(serializer); super.deserializeStateData(serializer);
TransitPeerSyncReplicationStateStateData data = TransitPeerSyncReplicationStateStateData data =
serializer.deserialize(TransitPeerSyncReplicationStateStateData.class); serializer.deserialize(TransitPeerSyncReplicationStateStateData.class);
state = ReplicationPeerConfigUtil.toSyncReplicationState(data.getSyncReplicationState()); toState = ReplicationPeerConfigUtil.toSyncReplicationState(data.getToState());
if (data.hasFromState()) {
fromState = ReplicationPeerConfigUtil.toSyncReplicationState(data.getFromState());
}
} }
@Override @Override
protected Flow executeFromState(MasterProcedureEnv env, PeerModificationState state) protected PeerSyncReplicationStateTransitionState getState(int stateId) {
return PeerSyncReplicationStateTransitionState.forNumber(stateId);
}
@Override
protected int getStateId(PeerSyncReplicationStateTransitionState state) {
return state.getNumber();
}
@Override
protected PeerSyncReplicationStateTransitionState getInitialState() {
return PeerSyncReplicationStateTransitionState.PRE_PEER_SYNC_REPLICATION_STATE_TRANSITION;
}
private void preTransit(MasterProcedureEnv env) throws IOException {
MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
if (cpHost != null) {
cpHost.preTransitReplicationPeerSyncReplicationState(peerId, toState);
}
fromState = env.getReplicationPeerManager().preTransitPeerSyncReplicationState(peerId, toState);
}
private void postTransit(MasterProcedureEnv env) throws IOException {
LOG.info(
"Successfully transit current cluster state from {} to {} for sync replication peer {}",
fromState, toState, peerId);
MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
if (cpHost != null) {
env.getMasterCoprocessorHost().postTransitReplicationPeerSyncReplicationState(peerId,
fromState, toState);
}
}
private List<RegionInfo> getRegionsToReopen(MasterProcedureEnv env) {
return env.getReplicationPeerManager().getPeerConfig(peerId).get().getTableCFsMap().keySet()
.stream()
.flatMap(tn -> env.getAssignmentManager().getRegionStates().getRegionsOfTable(tn).stream())
.collect(Collectors.toList());
}
@Override
protected Flow executeFromState(MasterProcedureEnv env,
PeerSyncReplicationStateTransitionState state)
throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException { throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException {
switch (state) { switch (state) {
case PRE_PEER_MODIFICATION: case PRE_PEER_SYNC_REPLICATION_STATE_TRANSITION:
try { try {
prePeerModification(env); preTransit(env);
} catch (IOException e) { } catch (IOException e) {
LOG.warn("{} failed to call pre CP hook or the pre check is failed for peer {}, " + LOG.warn("Failed to call pre CP hook or the pre check is failed for peer {} " +
"mark the procedure as failure and give up", getClass().getName(), peerId, e); "when transiting sync replication peer state to {}, " +
setFailure("master-" + getPeerOperationType().name().toLowerCase() + "-peer", e); "mark the procedure as failure and give up", peerId, toState, e);
releaseLatch(); setFailure("master-transit-peer-sync-replication-state", e);
return Flow.NO_MORE_STATE; return Flow.NO_MORE_STATE;
}
setNextState(PeerSyncReplicationStateTransitionState.SET_PEER_NEW_SYNC_REPLICATION_STATE);
return Flow.HAS_MORE_STATE;
case SET_PEER_NEW_SYNC_REPLICATION_STATE:
try {
env.getReplicationPeerManager().setPeerNewSyncReplicationState(peerId, toState);
} catch (ReplicationException e) { } catch (ReplicationException e) {
LOG.warn("{} failed to call prePeerModification for peer {}, retry", getClass().getName(), LOG.warn("Failed to update peer storage for peer {} when starting transiting sync " +
peerId, e); "replication peer state from {} to {}, retry", peerId, fromState, toState, e);
throw new ProcedureYieldException(); throw new ProcedureYieldException();
} }
setNextState(PeerModificationState.UPDATE_PEER_STORAGE); setNextState(
PeerSyncReplicationStateTransitionState.REFRESH_PEER_SYNC_REPLICATION_STATE_ON_RS_BEGIN);
return Flow.HAS_MORE_STATE; return Flow.HAS_MORE_STATE;
case UPDATE_PEER_STORAGE: case REFRESH_PEER_SYNC_REPLICATION_STATE_ON_RS_BEGIN:
try { addChildProcedure(env.getMasterServices().getServerManager().getOnlineServersList().stream()
updatePeerStorage(env); .map(sn -> new RefreshPeerProcedure(peerId, getPeerOperationType(), sn, 0))
} catch (ReplicationException e) { .toArray(RefreshPeerProcedure[]::new));
LOG.warn("{} update peer storage for peer {} failed, retry", getClass().getName(), peerId, if (fromState == SyncReplicationState.STANDBY &&
e); toState == SyncReplicationState.DOWNGRADE_ACTIVE) {
throw new ProcedureYieldException(); setNextState(PeerSyncReplicationStateTransitionState.REPLAY_REMOTE_WAL_IN_PEER);
} else {
setNextState(PeerSyncReplicationStateTransitionState.REOPEN_ALL_REGIONS_IN_PEER);
} }
setNextState(PeerModificationState.REFRESH_PEER_ON_RS);
return Flow.HAS_MORE_STATE; return Flow.HAS_MORE_STATE;
case REFRESH_PEER_ON_RS: case REPLAY_REMOTE_WAL_IN_PEER:
// TODO: Need add child procedure for every RegionServer // TODO: replay remote wal when transiting from S to DA.
setNextState(PeerModificationState.POST_PEER_MODIFICATION); setNextState(PeerSyncReplicationStateTransitionState.REOPEN_ALL_REGIONS_IN_PEER);
return Flow.HAS_MORE_STATE; return Flow.HAS_MORE_STATE;
case POST_PEER_MODIFICATION: case REOPEN_ALL_REGIONS_IN_PEER:
try { try {
postPeerModification(env); addChildProcedure(
} catch (ReplicationException e) { env.getAssignmentManager().createReopenProcedures(getRegionsToReopen(env)));
LOG.warn("{} failed to call postPeerModification for peer {}, retry",
getClass().getName(), peerId, e);
throw new ProcedureYieldException();
} catch (IOException e) { } catch (IOException e) {
LOG.warn("{} failed to call post CP hook for peer {}, " + LOG.warn("Failed to schedule region reopen for peer {} when starting transiting sync " +
"ignore since the procedure has already done", getClass().getName(), peerId, e); "replication peer state from {} to {}, retry", peerId, fromState, toState, e);
throw new ProcedureYieldException();
}
setNextState(
PeerSyncReplicationStateTransitionState.TRANSIT_PEER_NEW_SYNC_REPLICATION_STATE);
return Flow.HAS_MORE_STATE;
case TRANSIT_PEER_NEW_SYNC_REPLICATION_STATE:
try {
env.getReplicationPeerManager().transitPeerSyncReplicationState(peerId, toState);
} catch (ReplicationException e) {
LOG.warn("Failed to update peer storage for peer {} when ending transiting sync " +
"replication peer state from {} to {}, retry", peerId, fromState, toState, e);
throw new ProcedureYieldException();
}
setNextState(
PeerSyncReplicationStateTransitionState.REFRESH_PEER_SYNC_REPLICATION_STATE_ON_RS_END);
return Flow.HAS_MORE_STATE;
case REFRESH_PEER_SYNC_REPLICATION_STATE_ON_RS_END:
addChildProcedure(env.getMasterServices().getServerManager().getOnlineServersList().stream()
.map(sn -> new RefreshPeerProcedure(peerId, getPeerOperationType(), sn, 1))
.toArray(RefreshPeerProcedure[]::new));
setNextState(
PeerSyncReplicationStateTransitionState.POST_PEER_SYNC_REPLICATION_STATE_TRANSITION);
case POST_PEER_SYNC_REPLICATION_STATE_TRANSITION:
try {
postTransit(env);
} catch (IOException e) {
LOG.warn(
"Failed to call post CP hook for peer {} when transiting sync replication " +
"peer state from {} to {}, ignore since the procedure has already done",
peerId, fromState, toState, e);
} }
releaseLatch();
return Flow.NO_MORE_STATE; return Flow.NO_MORE_STATE;
default: default:
throw new UnsupportedOperationException("unhandled state=" + state); throw new UnsupportedOperationException("unhandled state=" + state);
} }
} }
private void releaseLatch() {
ProcedurePrepareLatch.releaseLatch(latch, this);
}
} }

View File

@ -1802,26 +1802,32 @@ public class HRegionServer extends HasThread implements
* be hooked up to WAL. * be hooked up to WAL.
*/ */
private void setupWALAndReplication() throws IOException { private void setupWALAndReplication() throws IOException {
boolean isMasterNoTableOrSystemTableOnly = this instanceof HMaster &&
(!LoadBalancer.isTablesOnMaster(conf) || LoadBalancer.isSystemTablesOnlyOnMaster(conf));
if (isMasterNoTableOrSystemTableOnly) {
conf.setBoolean(HConstants.SYNC_REPLICATION_ENABLED, false);
}
WALFactory factory = new WALFactory(conf, serverName.toString()); WALFactory factory = new WALFactory(conf, serverName.toString());
if (!isMasterNoTableOrSystemTableOnly) {
// TODO Replication make assumptions here based on the default filesystem impl
Path oldLogDir = new Path(walRootDir, HConstants.HREGION_OLDLOGDIR_NAME);
String logName = AbstractFSWALProvider.getWALDirectoryName(this.serverName.toString());
// TODO Replication make assumptions here based on the default filesystem impl Path logDir = new Path(walRootDir, logName);
Path oldLogDir = new Path(walRootDir, HConstants.HREGION_OLDLOGDIR_NAME); LOG.debug("logDir={}", logDir);
String logName = AbstractFSWALProvider.getWALDirectoryName(this.serverName.toString()); if (this.walFs.exists(logDir)) {
throw new RegionServerRunningException(
Path logDir = new Path(walRootDir, logName); "Region server has already created directory at " + this.serverName.toString());
LOG.debug("logDir={}", logDir); }
if (this.walFs.exists(logDir)) { // Always create wal directory as now we need this when master restarts to find out the live
throw new RegionServerRunningException( // region servers.
"Region server has already created directory at " + this.serverName.toString()); if (!this.walFs.mkdirs(logDir)) {
throw new IOException("Can not create wal directory " + logDir);
}
// Instantiate replication if replication enabled. Pass it the log directories.
createNewReplicationInstance(conf, this, this.walFs, logDir, oldLogDir,
factory.getWALProvider());
} }
// Always create wal directory as now we need this when master restarts to find out the live
// region servers.
if (!this.walFs.mkdirs(logDir)) {
throw new IOException("Can not create wal directory " + logDir);
}
// Instantiate replication if replication enabled. Pass it the log directories.
createNewReplicationInstance(conf, this, this.walFs, logDir, oldLogDir,
factory.getWALProvider());
this.walFactory = factory; this.walFactory = factory;
} }
@ -2943,11 +2949,6 @@ public class HRegionServer extends HasThread implements
*/ */
private static void createNewReplicationInstance(Configuration conf, HRegionServer server, private static void createNewReplicationInstance(Configuration conf, HRegionServer server,
FileSystem walFs, Path walDir, Path oldWALDir, WALProvider walProvider) throws IOException { FileSystem walFs, Path walDir, Path oldWALDir, WALProvider walProvider) throws IOException {
if ((server instanceof HMaster) &&
(!LoadBalancer.isTablesOnMaster(conf) || LoadBalancer.isSystemTablesOnlyOnMaster(conf))) {
return;
}
// read in the name of the source replication class from the config file. // read in the name of the source replication class from the config file.
String sourceClassname = conf.get(HConstants.REPLICATION_SOURCE_SERVICE_CLASSNAME, String sourceClassname = conf.get(HConstants.REPLICATION_SOURCE_SERVICE_CLASSNAME,
HConstants.REPLICATION_SERVICE_CLASSNAME_DEFAULT); HConstants.REPLICATION_SERVICE_CLASSNAME_DEFAULT);

View File

@ -18,16 +18,21 @@
package org.apache.hadoop.hbase.regionserver; package org.apache.hadoop.hbase.regionserver;
import org.apache.hadoop.hbase.replication.regionserver.PeerProcedureHandler; import org.apache.hadoop.hbase.replication.regionserver.PeerProcedureHandler;
import org.apache.hadoop.hbase.replication.regionserver.SyncReplicationPeerInfoProvider;
import org.apache.yetus.audience.InterfaceAudience; import org.apache.yetus.audience.InterfaceAudience;
/** /**
* A source for a replication stream has to expose this service. * A source for a replication stream has to expose this service. This service allows an application
* This service allows an application to hook into the * to hook into the regionserver and watch for new transactions.
* regionserver and watch for new transactions.
*/ */
@InterfaceAudience.Private @InterfaceAudience.Private
public interface ReplicationSourceService extends ReplicationService { public interface ReplicationSourceService extends ReplicationService {
/**
* Returns an info provider for sync replication peer.
*/
SyncReplicationPeerInfoProvider getSyncReplicationPeerInfoProvider();
/** /**
* Returns a Handler to handle peer procedures. * Returns a Handler to handle peer procedures.
*/ */

View File

@ -28,8 +28,8 @@ import org.apache.yetus.audience.InterfaceAudience;
@InterfaceAudience.Private @InterfaceAudience.Private
public interface PeerActionListener { public interface PeerActionListener {
default void peerRemoved(String peerId) {} static final PeerActionListener DUMMY = new PeerActionListener() {};
default void peerSyncReplicationStateChange(String peerId, SyncReplicationState from, default void peerSyncReplicationStateChange(String peerId, SyncReplicationState from,
SyncReplicationState to) {} SyncReplicationState to, int stage) {}
} }

View File

@ -15,11 +15,10 @@
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
package org.apache.hadoop.hbase.replication.regionserver; package org.apache.hadoop.hbase.replication.regionserver;
import java.io.IOException; import java.io.IOException;
import org.apache.hadoop.hbase.regionserver.HRegionServer;
import org.apache.hadoop.hbase.replication.ReplicationException; import org.apache.hadoop.hbase.replication.ReplicationException;
import org.apache.yetus.audience.InterfaceAudience; import org.apache.yetus.audience.InterfaceAudience;
@ -29,13 +28,16 @@ import org.apache.yetus.audience.InterfaceAudience;
@InterfaceAudience.Private @InterfaceAudience.Private
public interface PeerProcedureHandler { public interface PeerProcedureHandler {
public void addPeer(String peerId) throws ReplicationException, IOException; void addPeer(String peerId) throws ReplicationException, IOException;
public void removePeer(String peerId) throws ReplicationException, IOException; void removePeer(String peerId) throws ReplicationException, IOException;
public void disablePeer(String peerId) throws ReplicationException, IOException; void disablePeer(String peerId) throws ReplicationException, IOException;
public void enablePeer(String peerId) throws ReplicationException, IOException; void enablePeer(String peerId) throws ReplicationException, IOException;
public void updatePeerConfig(String peerId) throws ReplicationException, IOException; void updatePeerConfig(String peerId) throws ReplicationException, IOException;
void transitSyncReplicationPeerState(String peerId, int stage, HRegionServer rs)
throws ReplicationException, IOException;
} }

View File

@ -19,23 +19,32 @@ package org.apache.hadoop.hbase.replication.regionserver;
import java.io.IOException; import java.io.IOException;
import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.Lock;
import org.apache.hadoop.hbase.regionserver.HRegionServer;
import org.apache.hadoop.hbase.replication.ReplicationException; import org.apache.hadoop.hbase.replication.ReplicationException;
import org.apache.hadoop.hbase.replication.ReplicationPeer.PeerState; import org.apache.hadoop.hbase.replication.ReplicationPeer.PeerState;
import org.apache.hadoop.hbase.replication.ReplicationPeerConfig; import org.apache.hadoop.hbase.replication.ReplicationPeerConfig;
import org.apache.hadoop.hbase.replication.ReplicationPeerImpl; import org.apache.hadoop.hbase.replication.ReplicationPeerImpl;
import org.apache.hadoop.hbase.replication.ReplicationPeers; import org.apache.hadoop.hbase.replication.ReplicationPeers;
import org.apache.hadoop.hbase.replication.ReplicationUtils; import org.apache.hadoop.hbase.replication.ReplicationUtils;
import org.apache.hadoop.hbase.replication.SyncReplicationState;
import org.apache.hadoop.hbase.util.KeyLocker; import org.apache.hadoop.hbase.util.KeyLocker;
import org.apache.yetus.audience.InterfaceAudience; import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@InterfaceAudience.Private @InterfaceAudience.Private
public class PeerProcedureHandlerImpl implements PeerProcedureHandler { public class PeerProcedureHandlerImpl implements PeerProcedureHandler {
private static final Logger LOG = LoggerFactory.getLogger(PeerProcedureHandlerImpl.class);
private final ReplicationSourceManager replicationSourceManager; private final ReplicationSourceManager replicationSourceManager;
private final PeerActionListener peerActionListener;
private final KeyLocker<String> peersLock = new KeyLocker<>(); private final KeyLocker<String> peersLock = new KeyLocker<>();
public PeerProcedureHandlerImpl(ReplicationSourceManager replicationSourceManager) { public PeerProcedureHandlerImpl(ReplicationSourceManager replicationSourceManager,
PeerActionListener peerActionListener) {
this.replicationSourceManager = replicationSourceManager; this.replicationSourceManager = replicationSourceManager;
this.peerActionListener = peerActionListener;
} }
@Override @Override
@ -61,7 +70,6 @@ public class PeerProcedureHandlerImpl implements PeerProcedureHandler {
} }
private void refreshPeerState(String peerId) throws ReplicationException, IOException { private void refreshPeerState(String peerId) throws ReplicationException, IOException {
PeerState newState;
Lock peerLock = peersLock.acquireLock(peerId); Lock peerLock = peersLock.acquireLock(peerId);
ReplicationPeerImpl peer = null; ReplicationPeerImpl peer = null;
PeerState oldState = null; PeerState oldState = null;
@ -72,7 +80,7 @@ public class PeerProcedureHandlerImpl implements PeerProcedureHandler {
throw new ReplicationException("Peer with id=" + peerId + " is not cached."); throw new ReplicationException("Peer with id=" + peerId + " is not cached.");
} }
oldState = peer.getPeerState(); oldState = peer.getPeerState();
newState = replicationSourceManager.getReplicationPeers().refreshPeerState(peerId); PeerState newState = replicationSourceManager.getReplicationPeers().refreshPeerState(peerId);
// RS need to start work with the new replication state change // RS need to start work with the new replication state change
if (oldState.equals(PeerState.ENABLED) && newState.equals(PeerState.DISABLED)) { if (oldState.equals(PeerState.ENABLED) && newState.equals(PeerState.DISABLED)) {
replicationSourceManager.refreshSources(peerId); replicationSourceManager.refreshSources(peerId);
@ -132,4 +140,42 @@ public class PeerProcedureHandlerImpl implements PeerProcedureHandler {
peerLock.unlock(); peerLock.unlock();
} }
} }
@Override
public void transitSyncReplicationPeerState(String peerId, int stage, HRegionServer rs)
throws ReplicationException, IOException {
ReplicationPeers replicationPeers = replicationSourceManager.getReplicationPeers();
Lock peerLock = peersLock.acquireLock(peerId);
try {
ReplicationPeerImpl peer = replicationPeers.getPeer(peerId);
if (peer == null) {
throw new ReplicationException("Peer with id=" + peerId + " is not cached.");
}
if (!peer.getPeerConfig().isSyncReplication()) {
throw new ReplicationException("Peer with id=" + peerId + " is not synchronous.");
}
SyncReplicationState newState = peer.getNewSyncReplicationState();
if (stage == 0) {
if (newState != SyncReplicationState.NONE) {
LOG.warn("The new sync replication state for peer {} has already been set to {}, " +
"this should be a retry, give up", peerId, newState);
return;
}
newState = replicationPeers.refreshPeerNewSyncReplicationState(peerId);
SyncReplicationState oldState = peer.getSyncReplicationState();
peerActionListener.peerSyncReplicationStateChange(peerId, oldState, newState, stage);
} else {
if (newState == SyncReplicationState.NONE) {
LOG.warn("The new sync replication state for peer {} has already been clear, and the " +
"current state is {}, this should be a retry, give up", peerId, newState);
return;
}
SyncReplicationState oldState = peer.getSyncReplicationState();
peerActionListener.peerSyncReplicationStateChange(peerId, oldState, newState, stage);
peer.transitSyncReplicationState();
}
} finally {
peerLock.unlock();
}
}
} }

View File

@ -35,12 +35,15 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.R
public class RefreshPeerCallable implements RSProcedureCallable { public class RefreshPeerCallable implements RSProcedureCallable {
private static final Logger LOG = Logger.getLogger(RefreshPeerCallable.class); private static final Logger LOG = Logger.getLogger(RefreshPeerCallable.class);
private HRegionServer rs; private HRegionServer rs;
private String peerId; private String peerId;
private PeerModificationType type; private PeerModificationType type;
private int stage;
private Exception initError; private Exception initError;
@Override @Override
@ -67,6 +70,9 @@ public class RefreshPeerCallable implements RSProcedureCallable {
case UPDATE_PEER_CONFIG: case UPDATE_PEER_CONFIG:
handler.updatePeerConfig(this.peerId); handler.updatePeerConfig(this.peerId);
break; break;
case TRANSIT_SYNC_REPLICATION_STATE:
handler.transitSyncReplicationPeerState(peerId, stage, rs);
break;
default: default:
throw new IllegalArgumentException("Unknown peer modification type: " + type); throw new IllegalArgumentException("Unknown peer modification type: " + type);
} }
@ -80,6 +86,7 @@ public class RefreshPeerCallable implements RSProcedureCallable {
RefreshPeerParameter param = RefreshPeerParameter.parseFrom(parameter); RefreshPeerParameter param = RefreshPeerParameter.parseFrom(parameter);
this.peerId = param.getPeerId(); this.peerId = param.getPeerId();
this.type = param.getType(); this.type = param.getType();
this.stage = param.getStage();
} catch (InvalidProtocolBufferException e) { } catch (InvalidProtocolBufferException e) {
initError = e; initError = e;
} }

View File

@ -41,6 +41,7 @@ import org.apache.hadoop.hbase.replication.ReplicationStorageFactory;
import org.apache.hadoop.hbase.replication.ReplicationTracker; import org.apache.hadoop.hbase.replication.ReplicationTracker;
import org.apache.hadoop.hbase.replication.ReplicationUtils; import org.apache.hadoop.hbase.replication.ReplicationUtils;
import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.hbase.wal.SyncReplicationWALProvider;
import org.apache.hadoop.hbase.wal.WALProvider; import org.apache.hadoop.hbase.wal.WALProvider;
import org.apache.hadoop.hbase.zookeeper.ZKClusterId; import org.apache.hadoop.hbase.zookeeper.ZKClusterId;
import org.apache.yetus.audience.InterfaceAudience; import org.apache.yetus.audience.InterfaceAudience;
@ -66,6 +67,7 @@ public class Replication implements ReplicationSourceService, ReplicationSinkSer
private ReplicationTracker replicationTracker; private ReplicationTracker replicationTracker;
private Configuration conf; private Configuration conf;
private ReplicationSink replicationSink; private ReplicationSink replicationSink;
private SyncReplicationPeerInfoProvider syncReplicationPeerInfoProvider;
// Hosting server // Hosting server
private Server server; private Server server;
/** Statistics thread schedule pool */ /** Statistics thread schedule pool */
@ -120,19 +122,30 @@ public class Replication implements ReplicationSourceService, ReplicationSinkSer
} catch (KeeperException ke) { } catch (KeeperException ke) {
throw new IOException("Could not read cluster id", ke); throw new IOException("Could not read cluster id", ke);
} }
SyncReplicationPeerMappingManager mapping = new SyncReplicationPeerMappingManager();
this.replicationManager = new ReplicationSourceManager(queueStorage, replicationPeers, this.replicationManager = new ReplicationSourceManager(queueStorage, replicationPeers,
replicationTracker, conf, this.server, fs, logDir, oldLogDir, clusterId, replicationTracker, conf, this.server, fs, logDir, oldLogDir, clusterId,
walProvider != null ? walProvider.getWALFileLengthProvider() : p -> OptionalLong.empty()); walProvider != null ? walProvider.getWALFileLengthProvider() : p -> OptionalLong.empty(),
mapping);
this.syncReplicationPeerInfoProvider =
new SyncReplicationPeerInfoProviderImpl(replicationPeers, mapping);
PeerActionListener peerActionListener = PeerActionListener.DUMMY;
if (walProvider != null) { if (walProvider != null) {
walProvider walProvider
.addWALActionsListener(new ReplicationSourceWALActionListener(conf, replicationManager)); .addWALActionsListener(new ReplicationSourceWALActionListener(conf, replicationManager));
if (walProvider instanceof SyncReplicationWALProvider) {
SyncReplicationWALProvider syncWALProvider = (SyncReplicationWALProvider) walProvider;
peerActionListener = syncWALProvider;
syncWALProvider.setPeerInfoProvider(syncReplicationPeerInfoProvider);
}
} }
this.statsThreadPeriod = this.statsThreadPeriod =
this.conf.getInt("replication.stats.thread.period.seconds", 5 * 60); this.conf.getInt("replication.stats.thread.period.seconds", 5 * 60);
LOG.debug("Replication stats-in-log period={} seconds", this.statsThreadPeriod); LOG.debug("Replication stats-in-log period={} seconds", this.statsThreadPeriod);
this.replicationLoad = new ReplicationLoad(); this.replicationLoad = new ReplicationLoad();
this.peerProcedureHandler = new PeerProcedureHandlerImpl(replicationManager); this.peerProcedureHandler =
new PeerProcedureHandlerImpl(replicationManager, peerActionListener);
} }
@Override @Override
@ -270,4 +283,9 @@ public class Replication implements ReplicationSourceService, ReplicationSinkSer
MetricsSink sinkMetrics = this.replicationSink.getSinkMetrics(); MetricsSink sinkMetrics = this.replicationSink.getSinkMetrics();
this.replicationLoad.buildReplicationLoad(sourceMetricsList, sinkMetrics); this.replicationLoad.buildReplicationLoad(sourceMetricsList, sinkMetrics);
} }
@Override
public SyncReplicationPeerInfoProvider getSyncReplicationPeerInfoProvider() {
return syncReplicationPeerInfoProvider;
}
} }

View File

@ -51,6 +51,7 @@ import org.apache.hadoop.hbase.replication.ReplicationException;
import org.apache.hadoop.hbase.replication.ReplicationListener; import org.apache.hadoop.hbase.replication.ReplicationListener;
import org.apache.hadoop.hbase.replication.ReplicationPeer; import org.apache.hadoop.hbase.replication.ReplicationPeer;
import org.apache.hadoop.hbase.replication.ReplicationPeer.PeerState; import org.apache.hadoop.hbase.replication.ReplicationPeer.PeerState;
import org.apache.hadoop.hbase.replication.ReplicationPeerConfig;
import org.apache.hadoop.hbase.replication.ReplicationPeerImpl; import org.apache.hadoop.hbase.replication.ReplicationPeerImpl;
import org.apache.hadoop.hbase.replication.ReplicationPeers; import org.apache.hadoop.hbase.replication.ReplicationPeers;
import org.apache.hadoop.hbase.replication.ReplicationQueueInfo; import org.apache.hadoop.hbase.replication.ReplicationQueueInfo;
@ -136,6 +137,8 @@ public class ReplicationSourceManager implements ReplicationListener {
// For recovered source, the queue id's format is peer_id-servername-* // For recovered source, the queue id's format is peer_id-servername-*
private final ConcurrentMap<String, Map<String, NavigableSet<String>>> walsByIdRecoveredQueues; private final ConcurrentMap<String, Map<String, NavigableSet<String>>> walsByIdRecoveredQueues;
private final SyncReplicationPeerMappingManager syncReplicationPeerMappingManager;
private final Configuration conf; private final Configuration conf;
private final FileSystem fs; private final FileSystem fs;
// The paths to the latest log of each wal group, for new coming peers // The paths to the latest log of each wal group, for new coming peers
@ -170,9 +173,8 @@ public class ReplicationSourceManager implements ReplicationListener {
public ReplicationSourceManager(ReplicationQueueStorage queueStorage, public ReplicationSourceManager(ReplicationQueueStorage queueStorage,
ReplicationPeers replicationPeers, ReplicationTracker replicationTracker, Configuration conf, ReplicationPeers replicationPeers, ReplicationTracker replicationTracker, Configuration conf,
Server server, FileSystem fs, Path logDir, Path oldLogDir, UUID clusterId, Server server, FileSystem fs, Path logDir, Path oldLogDir, UUID clusterId,
WALFileLengthProvider walFileLengthProvider) throws IOException { WALFileLengthProvider walFileLengthProvider,
// CopyOnWriteArrayList is thread-safe. SyncReplicationPeerMappingManager syncReplicationPeerMappingManager) throws IOException {
// Generally, reading is more than modifying.
this.sources = new ConcurrentHashMap<>(); this.sources = new ConcurrentHashMap<>();
this.queueStorage = queueStorage; this.queueStorage = queueStorage;
this.replicationPeers = replicationPeers; this.replicationPeers = replicationPeers;
@ -185,10 +187,11 @@ public class ReplicationSourceManager implements ReplicationListener {
this.fs = fs; this.fs = fs;
this.logDir = logDir; this.logDir = logDir;
this.oldLogDir = oldLogDir; this.oldLogDir = oldLogDir;
this.sleepBeforeFailover = conf.getLong("replication.sleep.before.failover", 30000); // 30 // 30 seconds
// seconds this.sleepBeforeFailover = conf.getLong("replication.sleep.before.failover", 30000);
this.clusterId = clusterId; this.clusterId = clusterId;
this.walFileLengthProvider = walFileLengthProvider; this.walFileLengthProvider = walFileLengthProvider;
this.syncReplicationPeerMappingManager = syncReplicationPeerMappingManager;
this.replicationTracker.registerListener(this); this.replicationTracker.registerListener(this);
// It's preferable to failover 1 RS at a time, but with good zk servers // It's preferable to failover 1 RS at a time, but with good zk servers
// more could be processed at the same time. // more could be processed at the same time.
@ -249,8 +252,11 @@ public class ReplicationSourceManager implements ReplicationListener {
} }
/** /**
* 1. Add peer to replicationPeers 2. Add the normal source and related replication queue 3. Add * <ol>
* HFile Refs * <li>Add peer to replicationPeers</li>
* <li>Add the normal source and related replication queue</li>
* <li>Add HFile Refs</li>
* </ol>
* @param peerId the id of replication peer * @param peerId the id of replication peer
*/ */
public void addPeer(String peerId) throws IOException { public void addPeer(String peerId) throws IOException {
@ -269,13 +275,16 @@ public class ReplicationSourceManager implements ReplicationListener {
} }
/** /**
* 1. Remove peer for replicationPeers 2. Remove all the recovered sources for the specified id * <ol>
* and related replication queues 3. Remove the normal source and related replication queue 4. * <li>Remove peer for replicationPeers</li>
* Remove HFile Refs * <li>Remove all the recovered sources for the specified id and related replication queues</li>
* <li>Remove the normal source and related replication queue</li>
* <li>Remove HFile Refs</li>
* </ol>
* @param peerId the id of the replication peer * @param peerId the id of the replication peer
*/ */
public void removePeer(String peerId) { public void removePeer(String peerId) {
replicationPeers.removePeer(peerId); ReplicationPeer peer = replicationPeers.removePeer(peerId);
String terminateMessage = "Replication stream was removed by a user"; String terminateMessage = "Replication stream was removed by a user";
List<ReplicationSourceInterface> oldSourcesToDelete = new ArrayList<>(); List<ReplicationSourceInterface> oldSourcesToDelete = new ArrayList<>();
// synchronized on oldsources to avoid adding recovered source for the to-be-removed peer // synchronized on oldsources to avoid adding recovered source for the to-be-removed peer
@ -306,7 +315,10 @@ public class ReplicationSourceManager implements ReplicationListener {
deleteQueue(peerId); deleteQueue(peerId);
this.walsById.remove(peerId); this.walsById.remove(peerId);
} }
ReplicationPeerConfig peerConfig = peer.getPeerConfig();
if (peerConfig.isSyncReplication()) {
syncReplicationPeerMappingManager.remove(peerId, peerConfig);
}
// Remove HFile Refs // Remove HFile Refs
abortWhenFail(() -> this.queueStorage.removePeerFromHFileRefs(peerId)); abortWhenFail(() -> this.queueStorage.removePeerFromHFileRefs(peerId));
} }
@ -358,6 +370,10 @@ public class ReplicationSourceManager implements ReplicationListener {
} }
} }
} }
ReplicationPeerConfig peerConfig = peer.getPeerConfig();
if (peerConfig.isSyncReplication()) {
syncReplicationPeerMappingManager.add(peer.getId(), peerConfig);
}
src.startup(); src.startup();
return src; return src;
} }
@ -435,6 +451,7 @@ public class ReplicationSourceManager implements ReplicationListener {
// Delete queue from storage and memory // Delete queue from storage and memory
deleteQueue(src.getQueueId()); deleteQueue(src.getQueueId());
this.walsById.remove(src.getQueueId()); this.walsById.remove(src.getQueueId());
} }
/** /**

View File

@ -19,17 +19,25 @@ package org.apache.hadoop.hbase.replication.regionserver;
import java.util.Optional; import java.util.Optional;
import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.replication.SyncReplicationState;
import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.hbase.util.Pair;
import org.apache.yetus.audience.InterfaceAudience; import org.apache.yetus.audience.InterfaceAudience;
/** /**
* Get the peer id and remote root dir if the region is synchronously replicated. * Get the information for a sync replication peer.
*/ */
@InterfaceAudience.Private @InterfaceAudience.Private
public interface SyncReplicationPeerProvider { public interface SyncReplicationPeerInfoProvider {
/** /**
* Return the peer id and remote WAL directory if the region is synchronously replicated. * Return the peer id and remote WAL directory if the region is synchronously replicated and the
* state is {@link SyncReplicationState#ACTIVE}.
*/ */
Optional<Pair<String, String>> getPeerIdAndRemoteWALDir(RegionInfo info); Optional<Pair<String, String>> getPeerIdAndRemoteWALDir(RegionInfo info);
/**
* Check whether the give region is contained in a sync replication peer which is in the given
* state.
*/
boolean isInState(RegionInfo info, SyncReplicationState state);
} }

View File

@ -0,0 +1,71 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.replication.regionserver;
import java.util.Optional;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.replication.ReplicationPeer;
import org.apache.hadoop.hbase.replication.ReplicationPeers;
import org.apache.hadoop.hbase.replication.SyncReplicationState;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.yetus.audience.InterfaceAudience;
@InterfaceAudience.Private
class SyncReplicationPeerInfoProviderImpl implements SyncReplicationPeerInfoProvider {
private final ReplicationPeers replicationPeers;
private final SyncReplicationPeerMappingManager mapping;
SyncReplicationPeerInfoProviderImpl(ReplicationPeers replicationPeers,
SyncReplicationPeerMappingManager mapping) {
this.replicationPeers = replicationPeers;
this.mapping = mapping;
}
@Override
public Optional<Pair<String, String>> getPeerIdAndRemoteWALDir(RegionInfo info) {
String peerId = mapping.getPeerId(info);
if (peerId == null) {
return Optional.empty();
}
ReplicationPeer peer = replicationPeers.getPeer(peerId);
if (peer == null) {
return Optional.empty();
}
if (peer.getSyncReplicationState() == SyncReplicationState.ACTIVE) {
return Optional.of(Pair.newPair(peerId, peer.getPeerConfig().getRemoteWALDir()));
} else {
return Optional.empty();
}
}
@Override
public boolean isInState(RegionInfo info, SyncReplicationState state) {
String peerId = mapping.getPeerId(info);
if (peerId == null) {
return false;
}
ReplicationPeer peer = replicationPeers.getPeer(peerId);
if (peer == null) {
return false;
}
return peer.getSyncReplicationState() == state;
}
}

View File

@ -0,0 +1,48 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.replication.regionserver;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.replication.ReplicationPeerConfig;
import org.apache.yetus.audience.InterfaceAudience;
/**
* Used to map region to sync replication peer id.
* <p>
* TODO: now only support include table options.
*/
@InterfaceAudience.Private
class SyncReplicationPeerMappingManager {
private final ConcurrentMap<TableName, String> table2PeerId = new ConcurrentHashMap<>();
void add(String peerId, ReplicationPeerConfig peerConfig) {
peerConfig.getTableCFsMap().keySet().forEach(tn -> table2PeerId.put(tn, peerId));
}
void remove(String peerId, ReplicationPeerConfig peerConfig) {
peerConfig.getTableCFsMap().keySet().forEach(table2PeerId::remove);
}
String getPeerId(RegionInfo info) {
return table2PeerId.get(info.getTable());
}
}

View File

@ -39,7 +39,7 @@ import org.apache.hadoop.hbase.regionserver.wal.DualAsyncFSWAL;
import org.apache.hadoop.hbase.regionserver.wal.WALActionsListener; import org.apache.hadoop.hbase.regionserver.wal.WALActionsListener;
import org.apache.hadoop.hbase.replication.SyncReplicationState; import org.apache.hadoop.hbase.replication.SyncReplicationState;
import org.apache.hadoop.hbase.replication.regionserver.PeerActionListener; import org.apache.hadoop.hbase.replication.regionserver.PeerActionListener;
import org.apache.hadoop.hbase.replication.regionserver.SyncReplicationPeerProvider; import org.apache.hadoop.hbase.replication.regionserver.SyncReplicationPeerInfoProvider;
import org.apache.hadoop.hbase.util.CommonFSUtils; import org.apache.hadoop.hbase.util.CommonFSUtils;
import org.apache.hadoop.hbase.util.KeyLocker; import org.apache.hadoop.hbase.util.KeyLocker;
import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.hbase.util.Pair;
@ -67,7 +67,7 @@ public class SyncReplicationWALProvider implements WALProvider, PeerActionListen
private final WALProvider provider; private final WALProvider provider;
private final SyncReplicationPeerProvider peerProvider; private SyncReplicationPeerInfoProvider peerInfoProvider;
private WALFactory factory; private WALFactory factory;
@ -85,9 +85,12 @@ public class SyncReplicationWALProvider implements WALProvider, PeerActionListen
private final KeyLocker<String> createLock = new KeyLocker<>(); private final KeyLocker<String> createLock = new KeyLocker<>();
SyncReplicationWALProvider(WALProvider provider, SyncReplicationPeerProvider peerProvider) { SyncReplicationWALProvider(WALProvider provider) {
this.provider = provider; this.provider = provider;
this.peerProvider = peerProvider; }
public void setPeerInfoProvider(SyncReplicationPeerInfoProvider peerInfoProvider) {
this.peerInfoProvider = peerInfoProvider;
} }
@Override @Override
@ -99,7 +102,7 @@ public class SyncReplicationWALProvider implements WALProvider, PeerActionListen
this.conf = conf; this.conf = conf;
this.factory = factory; this.factory = factory;
Pair<EventLoopGroup, Class<? extends Channel>> eventLoopGroupAndChannelClass = Pair<EventLoopGroup, Class<? extends Channel>> eventLoopGroupAndChannelClass =
NettyAsyncFSWALConfigHelper.getEventLoopConfig(conf); NettyAsyncFSWALConfigHelper.getEventLoopConfig(conf);
eventLoopGroup = eventLoopGroupAndChannelClass.getFirst(); eventLoopGroup = eventLoopGroupAndChannelClass.getFirst();
channelClass = eventLoopGroupAndChannelClass.getSecond(); channelClass = eventLoopGroupAndChannelClass.getSecond();
} }
@ -112,9 +115,9 @@ public class SyncReplicationWALProvider implements WALProvider, PeerActionListen
Path remoteWALDirPath = new Path(remoteWALDir); Path remoteWALDirPath = new Path(remoteWALDir);
FileSystem remoteFs = remoteWALDirPath.getFileSystem(conf); FileSystem remoteFs = remoteWALDirPath.getFileSystem(conf);
return new DualAsyncFSWAL(CommonFSUtils.getWALFileSystem(conf), remoteFs, return new DualAsyncFSWAL(CommonFSUtils.getWALFileSystem(conf), remoteFs,
CommonFSUtils.getWALRootDir(conf), new Path(remoteWALDirPath, peerId), CommonFSUtils.getWALRootDir(conf), new Path(remoteWALDirPath, peerId),
getWALDirectoryName(factory.factoryId), getWALArchiveDirectoryName(conf, factory.factoryId), getWALDirectoryName(factory.factoryId), getWALArchiveDirectoryName(conf, factory.factoryId),
conf, listeners, true, getLogPrefix(peerId), LOG_SUFFIX, eventLoopGroup, channelClass); conf, listeners, true, getLogPrefix(peerId), LOG_SUFFIX, eventLoopGroup, channelClass);
} }
private DualAsyncFSWAL getWAL(String peerId, String remoteWALDir) throws IOException { private DualAsyncFSWAL getWAL(String peerId, String remoteWALDir) throws IOException {
@ -139,7 +142,7 @@ public class SyncReplicationWALProvider implements WALProvider, PeerActionListen
@Override @Override
public WAL getWAL(RegionInfo region) throws IOException { public WAL getWAL(RegionInfo region) throws IOException {
Optional<Pair<String, String>> peerIdAndRemoteWALDir = Optional<Pair<String, String>> peerIdAndRemoteWALDir =
peerProvider.getPeerIdAndRemoteWALDir(region); peerInfoProvider.getPeerIdAndRemoteWALDir(region);
if (peerIdAndRemoteWALDir.isPresent()) { if (peerIdAndRemoteWALDir.isPresent()) {
Pair<String, String> pair = peerIdAndRemoteWALDir.get(); Pair<String, String> pair = peerIdAndRemoteWALDir.get();
return getWAL(pair.getFirst(), pair.getSecond()); return getWAL(pair.getFirst(), pair.getSecond());
@ -220,15 +223,13 @@ public class SyncReplicationWALProvider implements WALProvider, PeerActionListen
provider.addWALActionsListener(listener); provider.addWALActionsListener(listener);
} }
@Override
public void peerRemoved(String peerId) {
safeClose(peerId2WAL.remove(peerId));
}
@Override @Override
public void peerSyncReplicationStateChange(String peerId, SyncReplicationState from, public void peerSyncReplicationStateChange(String peerId, SyncReplicationState from,
SyncReplicationState to) { SyncReplicationState to, int stage) {
assert to == SyncReplicationState.DOWNGRADE_ACTIVE; // TODO: stage 0
safeClose(peerId2WAL.remove(peerId)); if (from == SyncReplicationState.ACTIVE && to == SyncReplicationState.DOWNGRADE_ACTIVE &&
stage == 1) {
safeClose(peerId2WAL.remove(peerId));
}
} }
} }

View File

@ -24,10 +24,10 @@ import java.util.concurrent.atomic.AtomicReference;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.regionserver.wal.MetricsWAL; import org.apache.hadoop.hbase.regionserver.wal.MetricsWAL;
import org.apache.hadoop.hbase.regionserver.wal.ProtobufLogReader; import org.apache.hadoop.hbase.regionserver.wal.ProtobufLogReader;
import org.apache.hadoop.hbase.replication.regionserver.SyncReplicationPeerProvider;
import org.apache.hadoop.hbase.util.CancelableProgressable; import org.apache.hadoop.hbase.util.CancelableProgressable;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.LeaseNotRecoveredException; import org.apache.hadoop.hbase.util.LeaseNotRecoveredException;
@ -144,18 +144,6 @@ public class WALFactory {
} }
} }
/**
* instantiate a provider from a config property. requires conf to have already been set (as well
* as anything the provider might need to read).
*/
private WALProvider getProvider(String key, String defaultValue, String providerId)
throws IOException {
WALProvider provider = createProvider(getProviderClass(key, defaultValue));
provider.init(this, conf, providerId);
provider.addWALActionsListener(new MetricsWAL());
return provider;
}
/** /**
* @param conf must not be null, will keep a reference to read params in later reader/writer * @param conf must not be null, will keep a reference to read params in later reader/writer
* instances. * instances.
@ -173,7 +161,13 @@ public class WALFactory {
this.factoryId = factoryId; this.factoryId = factoryId;
// end required early initialization // end required early initialization
if (conf.getBoolean("hbase.regionserver.hlog.enabled", true)) { if (conf.getBoolean("hbase.regionserver.hlog.enabled", true)) {
provider = getProvider(WAL_PROVIDER, DEFAULT_WAL_PROVIDER, null); WALProvider provider = createProvider(getProviderClass(WAL_PROVIDER, DEFAULT_WAL_PROVIDER));
if (conf.getBoolean(HConstants.SYNC_REPLICATION_ENABLED, false)) {
provider = new SyncReplicationWALProvider(provider);
}
provider.init(this, conf, null);
provider.addWALActionsListener(new MetricsWAL());
this.provider = provider;
} else { } else {
// special handling of existing configuration behavior. // special handling of existing configuration behavior.
LOG.warn("Running with WAL disabled."); LOG.warn("Running with WAL disabled.");
@ -182,26 +176,6 @@ public class WALFactory {
} }
} }
/**
* A temporary constructor for testing synchronous replication.
* <p>
* Remove it once we can integrate the synchronous replication logic in RS.
*/
@VisibleForTesting
WALFactory(Configuration conf, String factoryId, SyncReplicationPeerProvider peerProvider)
throws IOException {
timeoutMillis = conf.getInt("hbase.hlog.open.timeout", 300000);
/* TODO Both of these are probably specific to the fs wal provider */
logReaderClass = conf.getClass("hbase.regionserver.hlog.reader.impl", ProtobufLogReader.class,
AbstractFSWALProvider.Reader.class);
this.conf = conf;
this.factoryId = factoryId;
WALProvider provider = createProvider(getProviderClass(WAL_PROVIDER, DEFAULT_WAL_PROVIDER));
this.provider = new SyncReplicationWALProvider(provider, peerProvider);
this.provider.init(this, conf, null);
this.provider.addWALActionsListener(new MetricsWAL());
}
/** /**
* Shutdown all WALs and clean up any underlying storage. * Shutdown all WALs and clean up any underlying storage.
* Use only when you will not need to replay and edits that have gone to any wals from this * Use only when you will not need to replay and edits that have gone to any wals from this
@ -250,8 +224,9 @@ public class WALFactory {
if (provider != null) { if (provider != null) {
return provider; return provider;
} }
provider = getProvider(META_WAL_PROVIDER, DEFAULT_META_WAL_PROVIDER, provider = createProvider(getProviderClass(META_WAL_PROVIDER, DEFAULT_META_WAL_PROVIDER));
AbstractFSWALProvider.META_WAL_PROVIDER_ID); provider.init(this, conf, AbstractFSWALProvider.META_WAL_PROVIDER_ID);
provider.addWALActionsListener(new MetricsWAL());
if (metaProvider.compareAndSet(null, provider)) { if (metaProvider.compareAndSet(null, provider)) {
return provider; return provider;
} else { } else {

View File

@ -53,6 +53,7 @@ import org.apache.hadoop.hbase.replication.TestReplicationEndpoint.InterClusterR
import org.apache.hadoop.hbase.replication.regionserver.TestReplicator.ReplicationEndpointForTest; import org.apache.hadoop.hbase.replication.regionserver.TestReplicator.ReplicationEndpointForTest;
import org.apache.hadoop.hbase.testclassification.ClientTests; import org.apache.hadoop.hbase.testclassification.ClientTests;
import org.apache.hadoop.hbase.testclassification.MediumTests; import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.After; import org.junit.After;
import org.junit.AfterClass; import org.junit.AfterClass;
import org.junit.BeforeClass; import org.junit.BeforeClass;
@ -1008,7 +1009,7 @@ public class TestReplicationAdmin {
@Test @Test
public void testTransitSyncReplicationPeerState() throws Exception { public void testTransitSyncReplicationPeerState() throws Exception {
TableName tableName = TableName.valueOf(name.getMethodName()); TableName tableName = TableName.valueOf(name.getMethodName());
TEST_UTIL.createTable(tableName, Bytes.toBytes("family"));
ReplicationPeerConfigBuilder builder = ReplicationPeerConfig.newBuilder(); ReplicationPeerConfigBuilder builder = ReplicationPeerConfig.newBuilder();
builder.setClusterKey(KEY_ONE); builder.setClusterKey(KEY_ONE);
builder.setReplicateAllUserTables(false); builder.setReplicateAllUserTables(false);

View File

@ -175,7 +175,10 @@ public abstract class TestReplicationSourceManager {
ZKReplicationPeerStorage.ENABLED_ZNODE_BYTES); ZKReplicationPeerStorage.ENABLED_ZNODE_BYTES);
ZKUtil.createWithParents(zkw, "/hbase/replication/peers/1/sync-rep-state"); ZKUtil.createWithParents(zkw, "/hbase/replication/peers/1/sync-rep-state");
ZKUtil.setData(zkw, "/hbase/replication/peers/1/sync-rep-state", ZKUtil.setData(zkw, "/hbase/replication/peers/1/sync-rep-state",
SyncReplicationState.toByteArray(SyncReplicationState.NONE)); ZKReplicationPeerStorage.NONE_STATE_ZNODE_BYTES);
ZKUtil.createWithParents(zkw, "/hbase/replication/peers/1/new-sync-rep-state");
ZKUtil.setData(zkw, "/hbase/replication/peers/1/new-sync-rep-state",
ZKReplicationPeerStorage.NONE_STATE_ZNODE_BYTES);
ZKUtil.createWithParents(zkw, "/hbase/replication/state"); ZKUtil.createWithParents(zkw, "/hbase/replication/state");
ZKUtil.setData(zkw, "/hbase/replication/state", ZKReplicationPeerStorage.ENABLED_ZNODE_BYTES); ZKUtil.setData(zkw, "/hbase/replication/state", ZKReplicationPeerStorage.ENABLED_ZNODE_BYTES);

View File

@ -27,6 +27,7 @@ import java.util.Optional;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.Waiter.ExplainingPredicate; import org.apache.hadoop.hbase.Waiter.ExplainingPredicate;
import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.client.RegionInfo;
@ -35,6 +36,8 @@ import org.apache.hadoop.hbase.regionserver.MultiVersionConcurrencyControl;
import org.apache.hadoop.hbase.regionserver.wal.DualAsyncFSWAL; import org.apache.hadoop.hbase.regionserver.wal.DualAsyncFSWAL;
import org.apache.hadoop.hbase.regionserver.wal.ProtobufLogReader; import org.apache.hadoop.hbase.regionserver.wal.ProtobufLogReader;
import org.apache.hadoop.hbase.regionserver.wal.ProtobufLogTestHelper; import org.apache.hadoop.hbase.regionserver.wal.ProtobufLogTestHelper;
import org.apache.hadoop.hbase.replication.SyncReplicationState;
import org.apache.hadoop.hbase.replication.regionserver.SyncReplicationPeerInfoProvider;
import org.apache.hadoop.hbase.testclassification.MediumTests; import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.apache.hadoop.hbase.testclassification.RegionServerTests; import org.apache.hadoop.hbase.testclassification.RegionServerTests;
import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Bytes;
@ -51,7 +54,7 @@ public class TestSyncReplicationWALProvider {
@ClassRule @ClassRule
public static final HBaseClassTestRule CLASS_RULE = public static final HBaseClassTestRule CLASS_RULE =
HBaseClassTestRule.forClass(TestSyncReplicationWALProvider.class); HBaseClassTestRule.forClass(TestSyncReplicationWALProvider.class);
private static final HBaseTestingUtility UTIL = new HBaseTestingUtility(); private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
@ -69,19 +72,30 @@ public class TestSyncReplicationWALProvider {
private static WALFactory FACTORY; private static WALFactory FACTORY;
private static Optional<Pair<String, String>> getPeerIdAndRemoteWALDir(RegionInfo info) { public static final class InfoProvider implements SyncReplicationPeerInfoProvider {
if (info.getTable().equals(TABLE)) {
return Optional.of(Pair.newPair(PEER_ID, REMOTE_WAL_DIR)); @Override
} else { public Optional<Pair<String, String>> getPeerIdAndRemoteWALDir(RegionInfo info) {
return Optional.empty(); if (info.getTable().equals(TABLE)) {
return Optional.of(Pair.newPair(PEER_ID, REMOTE_WAL_DIR));
} else {
return Optional.empty();
}
}
@Override
public boolean isInState(RegionInfo info, SyncReplicationState state) {
// TODO Implement SyncReplicationPeerInfoProvider.isInState
return false;
} }
} }
@BeforeClass @BeforeClass
public static void setUpBeforeClass() throws Exception { public static void setUpBeforeClass() throws Exception {
UTIL.getConfiguration().setBoolean(HConstants.SYNC_REPLICATION_ENABLED, true);
UTIL.startMiniDFSCluster(3); UTIL.startMiniDFSCluster(3);
FACTORY = new WALFactory(UTIL.getConfiguration(), "test", FACTORY = new WALFactory(UTIL.getConfiguration(), "test");
TestSyncReplicationWALProvider::getPeerIdAndRemoteWALDir); ((SyncReplicationWALProvider) FACTORY.getWALProvider()).setPeerInfoProvider(new InfoProvider());
UTIL.getTestFileSystem().mkdirs(new Path(REMOTE_WAL_DIR, PEER_ID)); UTIL.getTestFileSystem().mkdirs(new Path(REMOTE_WAL_DIR, PEER_ID));
} }
@ -151,9 +165,9 @@ public class TestSyncReplicationWALProvider {
DualAsyncFSWAL wal = (DualAsyncFSWAL) FACTORY.getWAL(REGION); DualAsyncFSWAL wal = (DualAsyncFSWAL) FACTORY.getWAL(REGION);
assertEquals(2, FACTORY.getWALs().size()); assertEquals(2, FACTORY.getWALs().size());
testReadWrite(wal); testReadWrite(wal);
SyncReplicationWALProvider walProvider = SyncReplicationWALProvider walProvider = (SyncReplicationWALProvider) FACTORY.getWALProvider();
(SyncReplicationWALProvider) FACTORY.getWALProvider(); walProvider.peerSyncReplicationStateChange(PEER_ID, SyncReplicationState.ACTIVE,
walProvider.peerRemoved(PEER_ID); SyncReplicationState.DOWNGRADE_ACTIVE, 1);
assertEquals(1, FACTORY.getWALs().size()); assertEquals(1, FACTORY.getWALs().size());
} }
} }