HBASE-20147 Serial replication will be stuck if we create a table with serial replication but add it to a peer after there are region moves
This commit is contained in:
parent
aadb2f0837
commit
64061f896f
|
@ -489,16 +489,17 @@ public class AsyncMetaTableAccessor {
|
||||||
QueryType type) {
|
QueryType type) {
|
||||||
return tableName.map((table) -> {
|
return tableName.map((table) -> {
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case REGION:
|
case REGION:
|
||||||
byte[] startRow = new byte[table.getName().length + 2];
|
case REPLICATION:
|
||||||
System.arraycopy(table.getName(), 0, startRow, 0, table.getName().length);
|
byte[] startRow = new byte[table.getName().length + 2];
|
||||||
startRow[startRow.length - 2] = HConstants.DELIMITER;
|
System.arraycopy(table.getName(), 0, startRow, 0, table.getName().length);
|
||||||
startRow[startRow.length - 1] = HConstants.DELIMITER;
|
startRow[startRow.length - 2] = HConstants.DELIMITER;
|
||||||
return startRow;
|
startRow[startRow.length - 1] = HConstants.DELIMITER;
|
||||||
case ALL:
|
return startRow;
|
||||||
case TABLE:
|
case ALL:
|
||||||
default:
|
case TABLE:
|
||||||
return table.getName();
|
default:
|
||||||
|
return table.getName();
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -512,20 +513,21 @@ public class AsyncMetaTableAccessor {
|
||||||
return tableName.map((table) -> {
|
return tableName.map((table) -> {
|
||||||
final byte[] stopRow;
|
final byte[] stopRow;
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case REGION:
|
case REGION:
|
||||||
stopRow = new byte[table.getName().length + 3];
|
case REPLICATION:
|
||||||
System.arraycopy(table.getName(), 0, stopRow, 0, table.getName().length);
|
stopRow = new byte[table.getName().length + 3];
|
||||||
stopRow[stopRow.length - 3] = ' ';
|
System.arraycopy(table.getName(), 0, stopRow, 0, table.getName().length);
|
||||||
stopRow[stopRow.length - 2] = HConstants.DELIMITER;
|
stopRow[stopRow.length - 3] = ' ';
|
||||||
stopRow[stopRow.length - 1] = HConstants.DELIMITER;
|
stopRow[stopRow.length - 2] = HConstants.DELIMITER;
|
||||||
break;
|
stopRow[stopRow.length - 1] = HConstants.DELIMITER;
|
||||||
case ALL:
|
break;
|
||||||
case TABLE:
|
case ALL:
|
||||||
default:
|
case TABLE:
|
||||||
stopRow = new byte[table.getName().length + 1];
|
default:
|
||||||
System.arraycopy(table.getName(), 0, stopRow, 0, table.getName().length);
|
stopRow = new byte[table.getName().length + 1];
|
||||||
stopRow[stopRow.length - 1] = ' ';
|
System.arraycopy(table.getName(), 0, stopRow, 0, table.getName().length);
|
||||||
break;
|
stopRow[stopRow.length - 1] = ' ';
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
return stopRow;
|
return stopRow;
|
||||||
});
|
});
|
||||||
|
|
|
@ -192,7 +192,8 @@ public class MetaTableAccessor {
|
||||||
public enum QueryType {
|
public enum QueryType {
|
||||||
ALL(HConstants.TABLE_FAMILY, HConstants.CATALOG_FAMILY),
|
ALL(HConstants.TABLE_FAMILY, HConstants.CATALOG_FAMILY),
|
||||||
REGION(HConstants.CATALOG_FAMILY),
|
REGION(HConstants.CATALOG_FAMILY),
|
||||||
TABLE(HConstants.TABLE_FAMILY);
|
TABLE(HConstants.TABLE_FAMILY),
|
||||||
|
REPLICATION(HConstants.REPLICATION_BARRIER_FAMILY);
|
||||||
|
|
||||||
private final byte[][] families;
|
private final byte[][] families;
|
||||||
|
|
||||||
|
@ -1168,8 +1169,9 @@ public class MetaTableAccessor {
|
||||||
final List<T> results = new ArrayList<>();
|
final List<T> results = new ArrayList<>();
|
||||||
@Override
|
@Override
|
||||||
public boolean visit(Result r) throws IOException {
|
public boolean visit(Result r) throws IOException {
|
||||||
if (r == null || r.isEmpty()) return true;
|
if (r != null && !r.isEmpty()) {
|
||||||
add(r);
|
add(r);
|
||||||
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2108,6 +2110,24 @@ public class MetaTableAccessor {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static List<Pair<String, Long>> getTableEncodedRegionNameAndLastBarrier(Connection conn,
|
||||||
|
TableName tableName) throws IOException {
|
||||||
|
List<Pair<String, Long>> list = new ArrayList<>();
|
||||||
|
scanMeta(conn, getTableStartRowForMeta(tableName, QueryType.REPLICATION),
|
||||||
|
getTableStopRowForMeta(tableName, QueryType.REPLICATION), QueryType.REPLICATION, r -> {
|
||||||
|
byte[] value =
|
||||||
|
r.getValue(HConstants.REPLICATION_BARRIER_FAMILY, HConstants.SEQNUM_QUALIFIER);
|
||||||
|
if (value == null) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
long lastBarrier = Bytes.toLong(value);
|
||||||
|
String encodedRegionName = RegionInfo.encodeRegionName(r.getRow());
|
||||||
|
list.add(Pair.newPair(encodedRegionName, lastBarrier));
|
||||||
|
return true;
|
||||||
|
});
|
||||||
|
return list;
|
||||||
|
}
|
||||||
|
|
||||||
private static void debugLogMutations(List<? extends Mutation> mutations) throws IOException {
|
private static void debugLogMutations(List<? extends Mutation> mutations) throws IOException {
|
||||||
if (!METALOG.isDebugEnabled()) {
|
if (!METALOG.isDebugEnabled()) {
|
||||||
return;
|
return;
|
||||||
|
|
|
@ -377,7 +377,11 @@ enum PeerModificationState {
|
||||||
PRE_PEER_MODIFICATION = 1;
|
PRE_PEER_MODIFICATION = 1;
|
||||||
UPDATE_PEER_STORAGE = 2;
|
UPDATE_PEER_STORAGE = 2;
|
||||||
REFRESH_PEER_ON_RS = 3;
|
REFRESH_PEER_ON_RS = 3;
|
||||||
POST_PEER_MODIFICATION = 4;
|
SERIAL_PEER_REOPEN_REGIONS = 4;
|
||||||
|
SERIAL_PEER_UPDATE_LAST_PUSHED_SEQ_ID = 5;
|
||||||
|
SERIAL_PEER_SET_PEER_ENABLED = 6;
|
||||||
|
SERIAL_PEER_ENABLE_PEER_REFRESH_PEER_ON_RS = 7;
|
||||||
|
POST_PEER_MODIFICATION = 8;
|
||||||
}
|
}
|
||||||
|
|
||||||
message PeerModificationStateData {
|
message PeerModificationStateData {
|
||||||
|
@ -415,4 +419,5 @@ message AddPeerStateData {
|
||||||
|
|
||||||
message UpdatePeerConfigStateData {
|
message UpdatePeerConfigStateData {
|
||||||
required ReplicationPeer peer_config = 1;
|
required ReplicationPeer peer_config = 1;
|
||||||
|
optional ReplicationPeer old_peer_config = 2;
|
||||||
}
|
}
|
||||||
|
|
|
@ -78,6 +78,14 @@ public interface ReplicationQueueStorage {
|
||||||
*/
|
*/
|
||||||
long getLastSequenceId(String encodedRegionName, String peerId) throws ReplicationException;
|
long getLastSequenceId(String encodedRegionName, String peerId) throws ReplicationException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the max sequence id of a bunch of regions for a given peer. Will be called when setting up
|
||||||
|
* a serial replication peer.
|
||||||
|
* @param peerId peer id
|
||||||
|
* @param lastSeqIds map with {encodedRegionName, sequenceId} pairs for serial replication.
|
||||||
|
*/
|
||||||
|
void setLastSequenceIds(String peerId, Map<String, Long> lastSeqIds) throws ReplicationException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the current position for a specific WAL in a given queue for a given regionserver.
|
* Get the current position for a specific WAL in a given queue for a given regionserver.
|
||||||
* @param serverName the name of the regionserver
|
* @param serverName the name of the regionserver
|
||||||
|
|
|
@ -111,13 +111,11 @@ public final class ReplicationUtils {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean isKeyConfigEqual(ReplicationPeerConfig rpc1, ReplicationPeerConfig rpc2) {
|
public static boolean isNamespacesAndTableCFsEqual(ReplicationPeerConfig rpc1,
|
||||||
|
ReplicationPeerConfig rpc2) {
|
||||||
if (rpc1.replicateAllUserTables() != rpc2.replicateAllUserTables()) {
|
if (rpc1.replicateAllUserTables() != rpc2.replicateAllUserTables()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (rpc1.isSerial() != rpc2.isSerial()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (rpc1.replicateAllUserTables()) {
|
if (rpc1.replicateAllUserTables()) {
|
||||||
return isNamespacesEqual(rpc1.getExcludeNamespaces(), rpc2.getExcludeNamespaces()) &&
|
return isNamespacesEqual(rpc1.getExcludeNamespaces(), rpc2.getExcludeNamespaces()) &&
|
||||||
isTableCFsEqual(rpc1.getExcludeTableCFsMap(), rpc2.getExcludeTableCFsMap());
|
isTableCFsEqual(rpc1.getExcludeTableCFsMap(), rpc2.getExcludeTableCFsMap());
|
||||||
|
|
|
@ -203,6 +203,24 @@ class ZKReplicationQueueStorage extends ZKReplicationStorageBase
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void addLastSeqIdsToOps(String queueId, Map<String, Long> lastSeqIds,
|
||||||
|
List<ZKUtilOp> listOfOps) throws KeeperException {
|
||||||
|
for (Entry<String, Long> lastSeqEntry : lastSeqIds.entrySet()) {
|
||||||
|
String peerId = new ReplicationQueueInfo(queueId).getPeerId();
|
||||||
|
String path = getSerialReplicationRegionPeerNode(lastSeqEntry.getKey(), peerId);
|
||||||
|
/*
|
||||||
|
* Make sure the existence of path
|
||||||
|
* /hbase/replication/regions/<hash>/<encoded-region-name>-<peer-id>. As the javadoc in
|
||||||
|
* multiOrSequential() method said, if received a NodeExistsException, all operations will
|
||||||
|
* fail. So create the path here, and in fact, no need to add this operation to listOfOps,
|
||||||
|
* because only need to make sure that update file position and sequence id atomically.
|
||||||
|
*/
|
||||||
|
ZKUtil.createWithParents(zookeeper, path);
|
||||||
|
// Persist the max sequence id of region to zookeeper.
|
||||||
|
listOfOps.add(ZKUtilOp.setData(path, ZKUtil.positionToByteArray(lastSeqEntry.getValue())));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void setWALPosition(ServerName serverName, String queueId, String fileName, long position,
|
public void setWALPosition(ServerName serverName, String queueId, String fileName, long position,
|
||||||
Map<String, Long> lastSeqIds) throws ReplicationException {
|
Map<String, Long> lastSeqIds) throws ReplicationException {
|
||||||
|
@ -213,23 +231,8 @@ class ZKReplicationQueueStorage extends ZKReplicationStorageBase
|
||||||
ZKUtil.positionToByteArray(position)));
|
ZKUtil.positionToByteArray(position)));
|
||||||
}
|
}
|
||||||
// Persist the max sequence id(s) of regions for serial replication atomically.
|
// Persist the max sequence id(s) of regions for serial replication atomically.
|
||||||
for (Entry<String, Long> lastSeqEntry : lastSeqIds.entrySet()) {
|
addLastSeqIdsToOps(queueId, lastSeqIds, listOfOps);
|
||||||
String peerId = new ReplicationQueueInfo(queueId).getPeerId();
|
ZKUtil.multiOrSequential(zookeeper, listOfOps, false);
|
||||||
String path = getSerialReplicationRegionPeerNode(lastSeqEntry.getKey(), peerId);
|
|
||||||
/*
|
|
||||||
* Make sure the existence of path
|
|
||||||
* /hbase/replication/regions/<hash>/<encoded-region-name>-<peer-id>. As the javadoc in
|
|
||||||
* multiOrSequential() method said, if received a NodeExistsException, all operations will
|
|
||||||
* fail. So create the path here, and in fact, no need to add this operation to listOfOps,
|
|
||||||
* because only need to make sure that update file position and sequence id atomically.
|
|
||||||
*/
|
|
||||||
ZKUtil.createWithParents(zookeeper, path);
|
|
||||||
// Persist the max sequence id of region to zookeeper.
|
|
||||||
listOfOps.add(ZKUtilOp.setData(path, ZKUtil.positionToByteArray(lastSeqEntry.getValue())));
|
|
||||||
}
|
|
||||||
if (!listOfOps.isEmpty()) {
|
|
||||||
ZKUtil.multiOrSequential(zookeeper, listOfOps, false);
|
|
||||||
}
|
|
||||||
} catch (KeeperException e) {
|
} catch (KeeperException e) {
|
||||||
throw new ReplicationException("Failed to set log position (serverName=" + serverName
|
throw new ReplicationException("Failed to set log position (serverName=" + serverName
|
||||||
+ ", queueId=" + queueId + ", fileName=" + fileName + ", position=" + position + ")", e);
|
+ ", queueId=" + queueId + ", fileName=" + fileName + ", position=" + position + ")", e);
|
||||||
|
@ -256,6 +259,19 @@ class ZKReplicationQueueStorage extends ZKReplicationStorageBase
|
||||||
return HConstants.NO_SEQNUM;
|
return HConstants.NO_SEQNUM;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setLastSequenceIds(String peerId, Map<String, Long> lastSeqIds)
|
||||||
|
throws ReplicationException {
|
||||||
|
try {
|
||||||
|
List<ZKUtilOp> listOfOps = new ArrayList<>();
|
||||||
|
addLastSeqIdsToOps(peerId, lastSeqIds, listOfOps);
|
||||||
|
ZKUtil.multiOrSequential(zookeeper, listOfOps, false);
|
||||||
|
} catch (KeeperException e) {
|
||||||
|
throw new ReplicationException("Failed to set last sequence ids, peerId=" + peerId +
|
||||||
|
", lastSeqIds.size=" + lastSeqIds.size(), e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long getWALPosition(ServerName serverName, String queueId, String fileName)
|
public long getWALPosition(ServerName serverName, String queueId, String fileName)
|
||||||
throws ReplicationException {
|
throws ReplicationException {
|
||||||
|
|
|
@ -56,6 +56,21 @@ public class AddPeerProcedure extends ModifyPeerProcedure {
|
||||||
return PeerOperationType.ADD;
|
return PeerOperationType.ADD;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected boolean reopenRegionsAfterRefresh() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected boolean enablePeerBeforeFinish() {
|
||||||
|
return enabled;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected ReplicationPeerConfig getNewPeerConfig() {
|
||||||
|
return peerConfig;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void prePeerModification(MasterProcedureEnv env)
|
protected void prePeerModification(MasterProcedureEnv env)
|
||||||
throws IOException, ReplicationException {
|
throws IOException, ReplicationException {
|
||||||
|
@ -68,11 +83,13 @@ public class AddPeerProcedure extends ModifyPeerProcedure {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void updatePeerStorage(MasterProcedureEnv env) throws ReplicationException {
|
protected void updatePeerStorage(MasterProcedureEnv env) throws ReplicationException {
|
||||||
env.getReplicationPeerManager().addPeer(peerId, peerConfig, enabled);
|
env.getReplicationPeerManager().addPeer(peerId, peerConfig,
|
||||||
|
peerConfig.isSerial() ? false : enabled);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void postPeerModification(MasterProcedureEnv env) throws IOException {
|
protected void postPeerModification(MasterProcedureEnv env)
|
||||||
|
throws IOException, ReplicationException {
|
||||||
LOG.info("Successfully added {} peer {}, config {}", enabled ? "ENABLED" : "DISABLED", peerId,
|
LOG.info("Successfully added {} peer {}, config {}", enabled ? "ENABLED" : "DISABLED", peerId,
|
||||||
peerConfig);
|
peerConfig);
|
||||||
MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
|
MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
|
||||||
|
|
|
@ -18,11 +18,28 @@
|
||||||
package org.apache.hadoop.hbase.master.replication;
|
package org.apache.hadoop.hbase.master.replication;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.io.UncheckedIOException;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.stream.Stream;
|
||||||
|
import org.apache.hadoop.hbase.MetaTableAccessor;
|
||||||
|
import org.apache.hadoop.hbase.client.Connection;
|
||||||
|
import org.apache.hadoop.hbase.client.RegionInfo;
|
||||||
|
import org.apache.hadoop.hbase.client.TableDescriptor;
|
||||||
|
import org.apache.hadoop.hbase.master.MasterFileSystem;
|
||||||
|
import org.apache.hadoop.hbase.master.TableStateManager;
|
||||||
|
import org.apache.hadoop.hbase.master.TableStateManager.TableStateNotFoundException;
|
||||||
|
import org.apache.hadoop.hbase.master.assignment.RegionStates;
|
||||||
import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
|
import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
|
||||||
import org.apache.hadoop.hbase.master.procedure.ProcedurePrepareLatch;
|
import org.apache.hadoop.hbase.master.procedure.ProcedurePrepareLatch;
|
||||||
import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
|
import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
|
||||||
import org.apache.hadoop.hbase.procedure2.ProcedureYieldException;
|
import org.apache.hadoop.hbase.procedure2.ProcedureYieldException;
|
||||||
import org.apache.hadoop.hbase.replication.ReplicationException;
|
import org.apache.hadoop.hbase.replication.ReplicationException;
|
||||||
|
import org.apache.hadoop.hbase.replication.ReplicationPeerConfig;
|
||||||
|
import org.apache.hadoop.hbase.replication.ReplicationQueueStorage;
|
||||||
|
import org.apache.hadoop.hbase.replication.ReplicationUtils;
|
||||||
|
import org.apache.hadoop.hbase.util.Pair;
|
||||||
|
import org.apache.hadoop.hbase.wal.WALSplitter;
|
||||||
import org.apache.yetus.audience.InterfaceAudience;
|
import org.apache.yetus.audience.InterfaceAudience;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
@ -38,6 +55,8 @@ public abstract class ModifyPeerProcedure extends AbstractPeerProcedure<PeerModi
|
||||||
|
|
||||||
private static final Logger LOG = LoggerFactory.getLogger(ModifyPeerProcedure.class);
|
private static final Logger LOG = LoggerFactory.getLogger(ModifyPeerProcedure.class);
|
||||||
|
|
||||||
|
private static final int SET_LAST_SEQ_ID_BATCH_SIZE = 1000;
|
||||||
|
|
||||||
protected ModifyPeerProcedure() {
|
protected ModifyPeerProcedure() {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -73,6 +92,114 @@ public abstract class ModifyPeerProcedure extends AbstractPeerProcedure<PeerModi
|
||||||
ProcedurePrepareLatch.releaseLatch(latch, this);
|
ProcedurePrepareLatch.releaseLatch(latch, this);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Implementation class can override this method. The default return value is false which means we
|
||||||
|
* will jump to POST_PEER_MODIFICATION and finish the procedure. If returns true, we will jump to
|
||||||
|
* SERIAL_PEER_REOPEN_REGIONS.
|
||||||
|
*/
|
||||||
|
protected boolean reopenRegionsAfterRefresh() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The implementation class should override this method if the procedure may enter the serial
|
||||||
|
* related states.
|
||||||
|
*/
|
||||||
|
protected boolean enablePeerBeforeFinish() {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void refreshPeer(MasterProcedureEnv env, PeerOperationType type) {
|
||||||
|
addChildProcedure(env.getMasterServices().getServerManager().getOnlineServersList().stream()
|
||||||
|
.map(sn -> new RefreshPeerProcedure(peerId, type, sn))
|
||||||
|
.toArray(RefreshPeerProcedure[]::new));
|
||||||
|
}
|
||||||
|
|
||||||
|
protected ReplicationPeerConfig getOldPeerConfig() {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected ReplicationPeerConfig getNewPeerConfig() {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
private Stream<TableDescriptor> getTables(MasterProcedureEnv env) throws IOException {
|
||||||
|
ReplicationPeerConfig peerConfig = getNewPeerConfig();
|
||||||
|
Stream<TableDescriptor> stream = env.getMasterServices().getTableDescriptors().getAll().values()
|
||||||
|
.stream().filter(TableDescriptor::hasGlobalReplicationScope)
|
||||||
|
.filter(td -> ReplicationUtils.contains(peerConfig, td.getTableName()));
|
||||||
|
ReplicationPeerConfig oldPeerConfig = getOldPeerConfig();
|
||||||
|
if (oldPeerConfig != null && oldPeerConfig.isSerial()) {
|
||||||
|
stream = stream.filter(td -> !ReplicationUtils.contains(oldPeerConfig, td.getTableName()));
|
||||||
|
}
|
||||||
|
return stream;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void reopenRegions(MasterProcedureEnv env) throws IOException {
|
||||||
|
Stream<TableDescriptor> stream = getTables(env);
|
||||||
|
TableStateManager tsm = env.getMasterServices().getTableStateManager();
|
||||||
|
stream.filter(td -> {
|
||||||
|
try {
|
||||||
|
return tsm.getTableState(td.getTableName()).isEnabled();
|
||||||
|
} catch (TableStateNotFoundException e) {
|
||||||
|
return false;
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new UncheckedIOException(e);
|
||||||
|
}
|
||||||
|
}).forEach(td -> {
|
||||||
|
try {
|
||||||
|
addChildProcedure(env.getAssignmentManager().createReopenProcedures(
|
||||||
|
env.getAssignmentManager().getRegionStates().getRegionsOfTable(td.getTableName())));
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new UncheckedIOException(e);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private void addToMap(Map<String, Long> lastSeqIds, String encodedRegionName, long barrier,
|
||||||
|
ReplicationQueueStorage queueStorage) throws ReplicationException {
|
||||||
|
if (barrier >= 0) {
|
||||||
|
lastSeqIds.put(encodedRegionName, barrier);
|
||||||
|
if (lastSeqIds.size() >= SET_LAST_SEQ_ID_BATCH_SIZE) {
|
||||||
|
queueStorage.setLastSequenceIds(peerId, lastSeqIds);
|
||||||
|
lastSeqIds.clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void setLastSequenceIdForSerialPeer(MasterProcedureEnv env)
|
||||||
|
throws IOException, ReplicationException {
|
||||||
|
Stream<TableDescriptor> stream = getTables(env);
|
||||||
|
TableStateManager tsm = env.getMasterServices().getTableStateManager();
|
||||||
|
ReplicationQueueStorage queueStorage = env.getReplicationPeerManager().getQueueStorage();
|
||||||
|
Connection conn = env.getMasterServices().getConnection();
|
||||||
|
RegionStates regionStates = env.getAssignmentManager().getRegionStates();
|
||||||
|
MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem();
|
||||||
|
Map<String, Long> lastSeqIds = new HashMap<String, Long>();
|
||||||
|
stream.forEach(td -> {
|
||||||
|
try {
|
||||||
|
if (tsm.getTableState(td.getTableName()).isEnabled()) {
|
||||||
|
for (Pair<String, Long> name2Barrier : MetaTableAccessor
|
||||||
|
.getTableEncodedRegionNameAndLastBarrier(conn, td.getTableName())) {
|
||||||
|
addToMap(lastSeqIds, name2Barrier.getFirst(), name2Barrier.getSecond().longValue() - 1,
|
||||||
|
queueStorage);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (RegionInfo region : regionStates.getRegionsOfTable(td.getTableName(), true)) {
|
||||||
|
long maxSequenceId =
|
||||||
|
WALSplitter.getMaxRegionSequenceId(mfs.getFileSystem(), mfs.getRegionDir(region));
|
||||||
|
addToMap(lastSeqIds, region.getEncodedName(), maxSequenceId, queueStorage);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (IOException | ReplicationException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
if (!lastSeqIds.isEmpty()) {
|
||||||
|
queueStorage.setLastSequenceIds(peerId, lastSeqIds);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Flow executeFromState(MasterProcedureEnv env, PeerModificationState state)
|
protected Flow executeFromState(MasterProcedureEnv env, PeerModificationState state)
|
||||||
throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException {
|
throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException {
|
||||||
|
@ -104,9 +231,42 @@ public abstract class ModifyPeerProcedure extends AbstractPeerProcedure<PeerModi
|
||||||
setNextState(PeerModificationState.REFRESH_PEER_ON_RS);
|
setNextState(PeerModificationState.REFRESH_PEER_ON_RS);
|
||||||
return Flow.HAS_MORE_STATE;
|
return Flow.HAS_MORE_STATE;
|
||||||
case REFRESH_PEER_ON_RS:
|
case REFRESH_PEER_ON_RS:
|
||||||
addChildProcedure(env.getMasterServices().getServerManager().getOnlineServersList().stream()
|
refreshPeer(env, getPeerOperationType());
|
||||||
.map(sn -> new RefreshPeerProcedure(peerId, getPeerOperationType(), sn))
|
setNextState(reopenRegionsAfterRefresh() ? PeerModificationState.SERIAL_PEER_REOPEN_REGIONS
|
||||||
.toArray(RefreshPeerProcedure[]::new));
|
: PeerModificationState.POST_PEER_MODIFICATION);
|
||||||
|
return Flow.HAS_MORE_STATE;
|
||||||
|
case SERIAL_PEER_REOPEN_REGIONS:
|
||||||
|
try {
|
||||||
|
reopenRegions(env);
|
||||||
|
} catch (Exception e) {
|
||||||
|
LOG.warn("{} reopen regions for peer {} failed, retry", getClass().getName(), peerId, e);
|
||||||
|
throw new ProcedureYieldException();
|
||||||
|
}
|
||||||
|
setNextState(PeerModificationState.SERIAL_PEER_UPDATE_LAST_PUSHED_SEQ_ID);
|
||||||
|
return Flow.HAS_MORE_STATE;
|
||||||
|
case SERIAL_PEER_UPDATE_LAST_PUSHED_SEQ_ID:
|
||||||
|
try {
|
||||||
|
setLastSequenceIdForSerialPeer(env);
|
||||||
|
} catch (Exception e) {
|
||||||
|
LOG.warn("{} set last sequence id for peer {} failed, retry", getClass().getName(),
|
||||||
|
peerId, e);
|
||||||
|
throw new ProcedureYieldException();
|
||||||
|
}
|
||||||
|
setNextState(enablePeerBeforeFinish() ? PeerModificationState.SERIAL_PEER_SET_PEER_ENABLED
|
||||||
|
: PeerModificationState.POST_PEER_MODIFICATION);
|
||||||
|
return Flow.HAS_MORE_STATE;
|
||||||
|
case SERIAL_PEER_SET_PEER_ENABLED:
|
||||||
|
try {
|
||||||
|
env.getReplicationPeerManager().enablePeer(peerId);
|
||||||
|
} catch (ReplicationException e) {
|
||||||
|
LOG.warn("{} enable peer before finish for peer {} failed, retry", getClass().getName(),
|
||||||
|
peerId, e);
|
||||||
|
throw new ProcedureYieldException();
|
||||||
|
}
|
||||||
|
setNextState(PeerModificationState.SERIAL_PEER_ENABLE_PEER_REFRESH_PEER_ON_RS);
|
||||||
|
return Flow.HAS_MORE_STATE;
|
||||||
|
case SERIAL_PEER_ENABLE_PEER_REFRESH_PEER_ON_RS:
|
||||||
|
refreshPeer(env, PeerOperationType.ENABLE);
|
||||||
setNextState(PeerModificationState.POST_PEER_MODIFICATION);
|
setNextState(PeerModificationState.POST_PEER_MODIFICATION);
|
||||||
return Flow.HAS_MORE_STATE;
|
return Flow.HAS_MORE_STATE;
|
||||||
case POST_PEER_MODIFICATION:
|
case POST_PEER_MODIFICATION:
|
||||||
|
|
|
@ -85,7 +85,7 @@ public class ReplicationPeerManager {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void preAddPeer(String peerId, ReplicationPeerConfig peerConfig)
|
void preAddPeer(String peerId, ReplicationPeerConfig peerConfig)
|
||||||
throws DoNotRetryIOException, ReplicationException {
|
throws DoNotRetryIOException, ReplicationException {
|
||||||
if (peerId.contains("-")) {
|
if (peerId.contains("-")) {
|
||||||
throw new DoNotRetryIOException("Found invalid peer name: " + peerId);
|
throw new DoNotRetryIOException("Found invalid peer name: " + peerId);
|
||||||
|
@ -109,43 +109,47 @@ public class ReplicationPeerManager {
|
||||||
return desc;
|
return desc;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void preRemovePeer(String peerId) throws DoNotRetryIOException {
|
void preRemovePeer(String peerId) throws DoNotRetryIOException {
|
||||||
checkPeerExists(peerId);
|
checkPeerExists(peerId);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void preEnablePeer(String peerId) throws DoNotRetryIOException {
|
void preEnablePeer(String peerId) throws DoNotRetryIOException {
|
||||||
ReplicationPeerDescription desc = checkPeerExists(peerId);
|
ReplicationPeerDescription desc = checkPeerExists(peerId);
|
||||||
if (desc.isEnabled()) {
|
if (desc.isEnabled()) {
|
||||||
throw new DoNotRetryIOException("Replication peer " + peerId + " has already been enabled");
|
throw new DoNotRetryIOException("Replication peer " + peerId + " has already been enabled");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void preDisablePeer(String peerId) throws DoNotRetryIOException {
|
void preDisablePeer(String peerId) throws DoNotRetryIOException {
|
||||||
ReplicationPeerDescription desc = checkPeerExists(peerId);
|
ReplicationPeerDescription desc = checkPeerExists(peerId);
|
||||||
if (!desc.isEnabled()) {
|
if (!desc.isEnabled()) {
|
||||||
throw new DoNotRetryIOException("Replication peer " + peerId + " has already been disabled");
|
throw new DoNotRetryIOException("Replication peer " + peerId + " has already been disabled");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void preUpdatePeerConfig(String peerId, ReplicationPeerConfig peerConfig)
|
/**
|
||||||
|
* Return the old peer description. Can never be null.
|
||||||
|
*/
|
||||||
|
ReplicationPeerDescription preUpdatePeerConfig(String peerId, ReplicationPeerConfig peerConfig)
|
||||||
throws DoNotRetryIOException {
|
throws DoNotRetryIOException {
|
||||||
checkPeerConfig(peerConfig);
|
checkPeerConfig(peerConfig);
|
||||||
ReplicationPeerDescription desc = checkPeerExists(peerId);
|
ReplicationPeerDescription desc = checkPeerExists(peerId);
|
||||||
ReplicationPeerConfig oldPeerConfig = desc.getPeerConfig();
|
ReplicationPeerConfig oldPeerConfig = desc.getPeerConfig();
|
||||||
if (!isStringEquals(peerConfig.getClusterKey(), oldPeerConfig.getClusterKey())) {
|
if (!isStringEquals(peerConfig.getClusterKey(), oldPeerConfig.getClusterKey())) {
|
||||||
throw new DoNotRetryIOException(
|
throw new DoNotRetryIOException(
|
||||||
"Changing the cluster key on an existing peer is not allowed. Existing key '" +
|
"Changing the cluster key on an existing peer is not allowed. Existing key '" +
|
||||||
oldPeerConfig.getClusterKey() + "' for peer " + peerId + " does not match new key '" +
|
oldPeerConfig.getClusterKey() + "' for peer " + peerId + " does not match new key '" +
|
||||||
peerConfig.getClusterKey() + "'");
|
peerConfig.getClusterKey() + "'");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!isStringEquals(peerConfig.getReplicationEndpointImpl(),
|
if (!isStringEquals(peerConfig.getReplicationEndpointImpl(),
|
||||||
oldPeerConfig.getReplicationEndpointImpl())) {
|
oldPeerConfig.getReplicationEndpointImpl())) {
|
||||||
throw new DoNotRetryIOException("Changing the replication endpoint implementation class " +
|
throw new DoNotRetryIOException("Changing the replication endpoint implementation class " +
|
||||||
"on an existing peer is not allowed. Existing class '" +
|
"on an existing peer is not allowed. Existing class '" +
|
||||||
oldPeerConfig.getReplicationEndpointImpl() + "' for peer " + peerId +
|
oldPeerConfig.getReplicationEndpointImpl() + "' for peer " + peerId +
|
||||||
" does not match new class '" + peerConfig.getReplicationEndpointImpl() + "'");
|
" does not match new class '" + peerConfig.getReplicationEndpointImpl() + "'");
|
||||||
}
|
}
|
||||||
|
return desc;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void addPeer(String peerId, ReplicationPeerConfig peerConfig, boolean enabled)
|
public void addPeer(String peerId, ReplicationPeerConfig peerConfig, boolean enabled)
|
||||||
|
@ -216,7 +220,7 @@ public class ReplicationPeerManager {
|
||||||
return desc != null ? Optional.of(desc.getPeerConfig()) : Optional.empty();
|
return desc != null ? Optional.of(desc.getPeerConfig()) : Optional.empty();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void removeAllQueuesAndHFileRefs(String peerId) throws ReplicationException {
|
void removeAllQueuesAndHFileRefs(String peerId) throws ReplicationException {
|
||||||
// Here we need two passes to address the problem of claimQueue. Maybe a claimQueue is still
|
// Here we need two passes to address the problem of claimQueue. Maybe a claimQueue is still
|
||||||
// on-going when the refresh peer config procedure is done, if a RS which has already been
|
// on-going when the refresh peer config procedure is done, if a RS which has already been
|
||||||
// scanned claims the queue of a RS which has not been scanned yet, we will miss that queue in
|
// scanned claims the queue of a RS which has not been scanned yet, we will miss that queue in
|
||||||
|
@ -340,7 +344,7 @@ public class ReplicationPeerManager {
|
||||||
public static ReplicationPeerManager create(ZKWatcher zk, Configuration conf)
|
public static ReplicationPeerManager create(ZKWatcher zk, Configuration conf)
|
||||||
throws ReplicationException {
|
throws ReplicationException {
|
||||||
ReplicationPeerStorage peerStorage =
|
ReplicationPeerStorage peerStorage =
|
||||||
ReplicationStorageFactory.getReplicationPeerStorage(zk, conf);
|
ReplicationStorageFactory.getReplicationPeerStorage(zk, conf);
|
||||||
ConcurrentMap<String, ReplicationPeerDescription> peers = new ConcurrentHashMap<>();
|
ConcurrentMap<String, ReplicationPeerDescription> peers = new ConcurrentHashMap<>();
|
||||||
for (String peerId : peerStorage.listPeerIds()) {
|
for (String peerId : peerStorage.listPeerIds()) {
|
||||||
ReplicationPeerConfig peerConfig = peerStorage.getPeerConfig(peerId);
|
ReplicationPeerConfig peerConfig = peerStorage.getPeerConfig(peerId);
|
||||||
|
@ -348,7 +352,7 @@ public class ReplicationPeerManager {
|
||||||
peers.put(peerId, new ReplicationPeerDescription(peerId, enabled, peerConfig));
|
peers.put(peerId, new ReplicationPeerDescription(peerId, enabled, peerConfig));
|
||||||
}
|
}
|
||||||
return new ReplicationPeerManager(peerStorage,
|
return new ReplicationPeerManager(peerStorage,
|
||||||
ReplicationStorageFactory.getReplicationQueueStorage(zk, conf), peers);
|
ReplicationStorageFactory.getReplicationQueueStorage(zk, conf), peers);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -24,6 +24,8 @@ import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
|
||||||
import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
|
import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
|
||||||
import org.apache.hadoop.hbase.replication.ReplicationException;
|
import org.apache.hadoop.hbase.replication.ReplicationException;
|
||||||
import org.apache.hadoop.hbase.replication.ReplicationPeerConfig;
|
import org.apache.hadoop.hbase.replication.ReplicationPeerConfig;
|
||||||
|
import org.apache.hadoop.hbase.replication.ReplicationPeerDescription;
|
||||||
|
import org.apache.hadoop.hbase.replication.ReplicationUtils;
|
||||||
import org.apache.yetus.audience.InterfaceAudience;
|
import org.apache.yetus.audience.InterfaceAudience;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
@ -40,6 +42,10 @@ public class UpdatePeerConfigProcedure extends ModifyPeerProcedure {
|
||||||
|
|
||||||
private ReplicationPeerConfig peerConfig;
|
private ReplicationPeerConfig peerConfig;
|
||||||
|
|
||||||
|
private ReplicationPeerConfig oldPeerConfig;
|
||||||
|
|
||||||
|
private boolean enabled;
|
||||||
|
|
||||||
public UpdatePeerConfigProcedure() {
|
public UpdatePeerConfigProcedure() {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -53,22 +59,54 @@ public class UpdatePeerConfigProcedure extends ModifyPeerProcedure {
|
||||||
return PeerOperationType.UPDATE_CONFIG;
|
return PeerOperationType.UPDATE_CONFIG;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected boolean reopenRegionsAfterRefresh() {
|
||||||
|
// If we remove some tables from the peer config then we do not need to enter the extra states
|
||||||
|
// for serial replication. Could try to optimize later since it is not easy to determine this...
|
||||||
|
return peerConfig.isSerial() && (!oldPeerConfig.isSerial() ||
|
||||||
|
!ReplicationUtils.isNamespacesAndTableCFsEqual(peerConfig, oldPeerConfig));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected boolean enablePeerBeforeFinish() {
|
||||||
|
// do not need to test reopenRegionsAfterRefresh since we can only enter here if
|
||||||
|
// reopenRegionsAfterRefresh returns true.
|
||||||
|
return enabled;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected ReplicationPeerConfig getOldPeerConfig() {
|
||||||
|
return oldPeerConfig;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected ReplicationPeerConfig getNewPeerConfig() {
|
||||||
|
return peerConfig;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void prePeerModification(MasterProcedureEnv env) throws IOException {
|
protected void prePeerModification(MasterProcedureEnv env) throws IOException {
|
||||||
MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
|
MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
|
||||||
if (cpHost != null) {
|
if (cpHost != null) {
|
||||||
cpHost.preUpdateReplicationPeerConfig(peerId, peerConfig);
|
cpHost.preUpdateReplicationPeerConfig(peerId, peerConfig);
|
||||||
}
|
}
|
||||||
env.getReplicationPeerManager().preUpdatePeerConfig(peerId, peerConfig);
|
ReplicationPeerDescription desc =
|
||||||
|
env.getReplicationPeerManager().preUpdatePeerConfig(peerId, peerConfig);
|
||||||
|
oldPeerConfig = desc.getPeerConfig();
|
||||||
|
enabled = desc.isEnabled();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void updatePeerStorage(MasterProcedureEnv env) throws ReplicationException {
|
protected void updatePeerStorage(MasterProcedureEnv env) throws ReplicationException {
|
||||||
env.getReplicationPeerManager().updatePeerConfig(peerId, peerConfig);
|
env.getReplicationPeerManager().updatePeerConfig(peerId, peerConfig);
|
||||||
|
if (enabled && reopenRegionsAfterRefresh()) {
|
||||||
|
env.getReplicationPeerManager().disablePeer(peerId);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void postPeerModification(MasterProcedureEnv env) throws IOException {
|
protected void postPeerModification(MasterProcedureEnv env)
|
||||||
|
throws IOException, ReplicationException {
|
||||||
LOG.info("Successfully updated peer config of {} to {}", peerId, peerConfig);
|
LOG.info("Successfully updated peer config of {} to {}", peerId, peerConfig);
|
||||||
MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
|
MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
|
||||||
if (cpHost != null) {
|
if (cpHost != null) {
|
||||||
|
@ -79,14 +117,23 @@ public class UpdatePeerConfigProcedure extends ModifyPeerProcedure {
|
||||||
@Override
|
@Override
|
||||||
protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException {
|
protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException {
|
||||||
super.serializeStateData(serializer);
|
super.serializeStateData(serializer);
|
||||||
serializer.serialize(UpdatePeerConfigStateData.newBuilder()
|
UpdatePeerConfigStateData.Builder builder = UpdatePeerConfigStateData.newBuilder()
|
||||||
.setPeerConfig(ReplicationPeerConfigUtil.convert(peerConfig)).build());
|
.setPeerConfig(ReplicationPeerConfigUtil.convert(peerConfig));
|
||||||
|
if (oldPeerConfig != null) {
|
||||||
|
builder.setOldPeerConfig(ReplicationPeerConfigUtil.convert(oldPeerConfig));
|
||||||
|
}
|
||||||
|
serializer.serialize(builder.build());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException {
|
protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException {
|
||||||
super.deserializeStateData(serializer);
|
super.deserializeStateData(serializer);
|
||||||
peerConfig = ReplicationPeerConfigUtil
|
UpdatePeerConfigStateData data = serializer.deserialize(UpdatePeerConfigStateData.class);
|
||||||
.convert(serializer.deserialize(UpdatePeerConfigStateData.class).getPeerConfig());
|
peerConfig = ReplicationPeerConfigUtil.convert(data.getPeerConfig());
|
||||||
|
if (data.hasOldPeerConfig()) {
|
||||||
|
oldPeerConfig = ReplicationPeerConfigUtil.convert(data.getOldPeerConfig());
|
||||||
|
} else {
|
||||||
|
oldPeerConfig = null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,6 +23,7 @@ import org.apache.hadoop.hbase.replication.ReplicationException;
|
||||||
import org.apache.hadoop.hbase.replication.ReplicationPeer.PeerState;
|
import org.apache.hadoop.hbase.replication.ReplicationPeer.PeerState;
|
||||||
import org.apache.hadoop.hbase.replication.ReplicationPeerConfig;
|
import org.apache.hadoop.hbase.replication.ReplicationPeerConfig;
|
||||||
import org.apache.hadoop.hbase.replication.ReplicationPeerImpl;
|
import org.apache.hadoop.hbase.replication.ReplicationPeerImpl;
|
||||||
|
import org.apache.hadoop.hbase.replication.ReplicationPeers;
|
||||||
import org.apache.hadoop.hbase.replication.ReplicationUtils;
|
import org.apache.hadoop.hbase.replication.ReplicationUtils;
|
||||||
import org.apache.hadoop.hbase.util.KeyLocker;
|
import org.apache.hadoop.hbase.util.KeyLocker;
|
||||||
import org.apache.yetus.audience.InterfaceAudience;
|
import org.apache.yetus.audience.InterfaceAudience;
|
||||||
|
@ -99,19 +100,26 @@ public class PeerProcedureHandlerImpl implements PeerProcedureHandler {
|
||||||
@Override
|
@Override
|
||||||
public void updatePeerConfig(String peerId) throws ReplicationException, IOException {
|
public void updatePeerConfig(String peerId) throws ReplicationException, IOException {
|
||||||
Lock peerLock = peersLock.acquireLock(peerId);
|
Lock peerLock = peersLock.acquireLock(peerId);
|
||||||
|
ReplicationPeers peers = replicationSourceManager.getReplicationPeers();
|
||||||
ReplicationPeerImpl peer = null;
|
ReplicationPeerImpl peer = null;
|
||||||
ReplicationPeerConfig oldConfig = null;
|
ReplicationPeerConfig oldConfig = null;
|
||||||
|
PeerState oldState = null;
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
try {
|
try {
|
||||||
peer = replicationSourceManager.getReplicationPeers().getPeer(peerId);
|
peer = peers.getPeer(peerId);
|
||||||
if (peer == null) {
|
if (peer == null) {
|
||||||
throw new ReplicationException("Peer with id=" + peerId + " is not cached.");
|
throw new ReplicationException("Peer with id=" + peerId + " is not cached.");
|
||||||
}
|
}
|
||||||
oldConfig = peer.getPeerConfig();
|
oldConfig = peer.getPeerConfig();
|
||||||
ReplicationPeerConfig newConfig =
|
oldState = peer.getPeerState();
|
||||||
replicationSourceManager.getReplicationPeers().refreshPeerConfig(peerId);
|
ReplicationPeerConfig newConfig = peers.refreshPeerConfig(peerId);
|
||||||
|
// also need to refresh peer state here. When updating a serial replication peer we may
|
||||||
|
// disable it first and then enable it.
|
||||||
|
PeerState newState = peers.refreshPeerState(peerId);
|
||||||
// RS need to start work with the new replication config change
|
// RS need to start work with the new replication config change
|
||||||
if (!ReplicationUtils.isKeyConfigEqual(oldConfig, newConfig)) {
|
if (!ReplicationUtils.isNamespacesAndTableCFsEqual(oldConfig, newConfig) ||
|
||||||
|
oldConfig.isSerial() != newConfig.isSerial() ||
|
||||||
|
(oldState.equals(PeerState.ENABLED) && newState.equals(PeerState.DISABLED))) {
|
||||||
replicationSourceManager.refreshSources(peerId);
|
replicationSourceManager.refreshSources(peerId);
|
||||||
}
|
}
|
||||||
success = true;
|
success = true;
|
||||||
|
@ -119,6 +127,7 @@ public class PeerProcedureHandlerImpl implements PeerProcedureHandler {
|
||||||
if (!success && peer != null) {
|
if (!success && peer != null) {
|
||||||
// Reset peer config if refresh source failed
|
// Reset peer config if refresh source failed
|
||||||
peer.setPeerConfig(oldConfig);
|
peer.setPeerConfig(oldConfig);
|
||||||
|
peer.setPeerState(oldState.equals(PeerState.ENABLED));
|
||||||
}
|
}
|
||||||
peerLock.unlock();
|
peerLock.unlock();
|
||||||
}
|
}
|
||||||
|
|
|
@ -510,7 +510,7 @@ public class ReplicationSourceManager implements ReplicationListener {
|
||||||
// synchronized on walsById to avoid race with preLogRoll
|
// synchronized on walsById to avoid race with preLogRoll
|
||||||
synchronized (this.walsById) {
|
synchronized (this.walsById) {
|
||||||
NavigableSet<String> wals = walsById.get(queueId).get(logPrefix);
|
NavigableSet<String> wals = walsById.get(queueId).get(logPrefix);
|
||||||
if (wals != null && !wals.first().equals(log)) {
|
if (wals != null) {
|
||||||
cleanOldLogs(wals, log, inclusive, queueId);
|
cleanOldLogs(wals, log, inclusive, queueId);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -755,7 +755,7 @@ public class ReplicationSourceManager implements ReplicationListener {
|
||||||
* @return a sorted set of wal names
|
* @return a sorted set of wal names
|
||||||
*/
|
*/
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
Map<String, Map<String, NavigableSet<String>>> getWALs() {
|
public Map<String, Map<String, NavigableSet<String>>> getWALs() {
|
||||||
return Collections.unmodifiableMap(walsById);
|
return Collections.unmodifiableMap(walsById);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -157,4 +157,12 @@ class WALEntryBatch {
|
||||||
public void setLastSeqId(String region, long sequenceId) {
|
public void setLastSeqId(String region, long sequenceId) {
|
||||||
lastSeqIds.put(region, sequenceId);
|
lastSeqIds.put(region, sequenceId);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "WALEntryBatch [walEntries=" + walEntries + ", lastWalPath=" + lastWalPath +
|
||||||
|
", lastWalPosition=" + lastWalPosition + ", nbRowKeys=" + nbRowKeys + ", nbHFiles=" +
|
||||||
|
nbHFiles + ", heapSize=" + heapSize + ", lastSeqIds=" + lastSeqIds + ", endOfFile=" +
|
||||||
|
endOfFile + "]";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,229 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hbase.replication;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
|
import static org.junit.Assert.assertTrue;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.UncheckedIOException;
|
||||||
|
import java.util.UUID;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
||||||
|
import org.apache.hadoop.hbase.Waiter.ExplainingPredicate;
|
||||||
|
import org.apache.hadoop.hbase.client.RegionInfo;
|
||||||
|
import org.apache.hadoop.hbase.regionserver.HRegionServer;
|
||||||
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
|
import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
|
||||||
|
import org.apache.hadoop.hbase.wal.WAL;
|
||||||
|
import org.apache.hadoop.hbase.wal.WAL.Entry;
|
||||||
|
import org.apache.hadoop.hbase.wal.WALFactory;
|
||||||
|
import org.apache.hadoop.hbase.wal.WALProvider;
|
||||||
|
import org.junit.After;
|
||||||
|
import org.junit.AfterClass;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
import org.junit.Rule;
|
||||||
|
import org.junit.rules.TestName;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Base class for testing serial replication.
|
||||||
|
*/
|
||||||
|
public class SerialReplicationTestBase {
|
||||||
|
|
||||||
|
protected static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
|
||||||
|
|
||||||
|
protected static String PEER_ID = "1";
|
||||||
|
|
||||||
|
protected static byte[] CF = Bytes.toBytes("CF");
|
||||||
|
|
||||||
|
protected static byte[] CQ = Bytes.toBytes("CQ");
|
||||||
|
|
||||||
|
protected static FileSystem FS;
|
||||||
|
|
||||||
|
protected static Path LOG_DIR;
|
||||||
|
|
||||||
|
protected static WALProvider.Writer WRITER;
|
||||||
|
|
||||||
|
@Rule
|
||||||
|
public final TestName name = new TestName();
|
||||||
|
|
||||||
|
protected Path logPath;
|
||||||
|
|
||||||
|
public static final class LocalReplicationEndpoint extends BaseReplicationEndpoint {
|
||||||
|
|
||||||
|
private static final UUID PEER_UUID = UUID.randomUUID();
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public UUID getPeerUUID() {
|
||||||
|
return PEER_UUID;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean replicate(ReplicateContext replicateContext) {
|
||||||
|
synchronized (WRITER) {
|
||||||
|
try {
|
||||||
|
for (Entry entry : replicateContext.getEntries()) {
|
||||||
|
WRITER.append(entry);
|
||||||
|
}
|
||||||
|
WRITER.sync(false);
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new UncheckedIOException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void start() {
|
||||||
|
startAsync();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void stop() {
|
||||||
|
stopAsync();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void doStart() {
|
||||||
|
notifyStarted();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void doStop() {
|
||||||
|
notifyStopped();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public static void setUpBeforeClass() throws Exception {
|
||||||
|
UTIL.getConfiguration().setInt("replication.source.nb.capacity", 10);
|
||||||
|
UTIL.startMiniCluster(3);
|
||||||
|
// disable balancer
|
||||||
|
UTIL.getAdmin().balancerSwitch(false, true);
|
||||||
|
LOG_DIR = UTIL.getDataTestDirOnTestFS("replicated");
|
||||||
|
FS = UTIL.getTestFileSystem();
|
||||||
|
FS.mkdirs(LOG_DIR);
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterClass
|
||||||
|
public static void tearDownAfterClass() throws Exception {
|
||||||
|
UTIL.shutdownMiniCluster();
|
||||||
|
}
|
||||||
|
|
||||||
|
@After
|
||||||
|
public void tearDown() throws Exception {
|
||||||
|
UTIL.getAdmin().removeReplicationPeer(PEER_ID);
|
||||||
|
rollAllWALs();
|
||||||
|
if (WRITER != null) {
|
||||||
|
WRITER.close();
|
||||||
|
WRITER = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected static void moveRegion(RegionInfo region, HRegionServer rs) throws Exception {
|
||||||
|
UTIL.getAdmin().move(region.getEncodedNameAsBytes(),
|
||||||
|
Bytes.toBytes(rs.getServerName().getServerName()));
|
||||||
|
UTIL.waitFor(30000, new ExplainingPredicate<Exception>() {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean evaluate() throws Exception {
|
||||||
|
return rs.getRegion(region.getEncodedName()) != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String explainFailure() throws Exception {
|
||||||
|
return region + " is still not on " + rs;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
protected static void rollAllWALs() throws Exception {
|
||||||
|
for (RegionServerThread t : UTIL.getMiniHBaseCluster().getLiveRegionServerThreads()) {
|
||||||
|
t.getRegionServer().getWalRoller().requestRollAll();
|
||||||
|
}
|
||||||
|
UTIL.waitFor(30000, new ExplainingPredicate<Exception>() {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean evaluate() throws Exception {
|
||||||
|
return UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().stream()
|
||||||
|
.map(t -> t.getRegionServer()).allMatch(HRegionServer::walRollRequestFinished);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String explainFailure() throws Exception {
|
||||||
|
return "Log roll has not finished yet";
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
protected final void setupWALWriter() throws IOException {
|
||||||
|
logPath = new Path(LOG_DIR, name.getMethodName());
|
||||||
|
WRITER = WALFactory.createWALWriter(FS, logPath, UTIL.getConfiguration());
|
||||||
|
}
|
||||||
|
|
||||||
|
protected final void waitUntilReplicationDone(int expectedEntries) throws Exception {
|
||||||
|
UTIL.waitFor(30000, new ExplainingPredicate<Exception>() {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean evaluate() throws Exception {
|
||||||
|
try (WAL.Reader reader = WALFactory.createReader(FS, logPath, UTIL.getConfiguration())) {
|
||||||
|
int count = 0;
|
||||||
|
while (reader.next() != null) {
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
return count >= expectedEntries;
|
||||||
|
} catch (IOException e) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String explainFailure() throws Exception {
|
||||||
|
return "Not enough entries replicated";
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
protected final void addPeer(boolean enabled) throws IOException {
|
||||||
|
UTIL.getAdmin().addReplicationPeer(PEER_ID,
|
||||||
|
ReplicationPeerConfig.newBuilder().setClusterKey("127.0.0.1:2181:/hbase")
|
||||||
|
.setReplicationEndpointImpl(LocalReplicationEndpoint.class.getName()).setSerial(true)
|
||||||
|
.build(),
|
||||||
|
enabled);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected final void checkOrder(int expectedEntries) throws IOException {
|
||||||
|
try (WAL.Reader reader =
|
||||||
|
WALFactory.createReader(UTIL.getTestFileSystem(), logPath, UTIL.getConfiguration())) {
|
||||||
|
long seqId = -1L;
|
||||||
|
int count = 0;
|
||||||
|
for (Entry entry;;) {
|
||||||
|
entry = reader.next();
|
||||||
|
if (entry == null) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
assertTrue(
|
||||||
|
"Sequence id go backwards from " + seqId + " to " + entry.getKey().getSequenceId(),
|
||||||
|
entry.getKey().getSequenceId() >= seqId);
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
assertEquals(expectedEntries, count);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,215 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hbase.replication;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Collections;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.hbase.HBaseClassTestRule;
|
||||||
|
import org.apache.hadoop.hbase.HConstants;
|
||||||
|
import org.apache.hadoop.hbase.TableName;
|
||||||
|
import org.apache.hadoop.hbase.Waiter.ExplainingPredicate;
|
||||||
|
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
|
||||||
|
import org.apache.hadoop.hbase.client.Put;
|
||||||
|
import org.apache.hadoop.hbase.client.RegionInfo;
|
||||||
|
import org.apache.hadoop.hbase.client.Table;
|
||||||
|
import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
|
||||||
|
import org.apache.hadoop.hbase.regionserver.HRegionServer;
|
||||||
|
import org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL;
|
||||||
|
import org.apache.hadoop.hbase.replication.regionserver.Replication;
|
||||||
|
import org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceManager;
|
||||||
|
import org.apache.hadoop.hbase.testclassification.MediumTests;
|
||||||
|
import org.apache.hadoop.hbase.testclassification.ReplicationTests;
|
||||||
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
|
import org.apache.hadoop.hbase.util.CommonFSUtils.StreamLacksCapabilityException;
|
||||||
|
import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.ClassRule;
|
||||||
|
import org.junit.Test;
|
||||||
|
import org.junit.experimental.categories.Category;
|
||||||
|
|
||||||
|
import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableMap;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Testcase for HBASE-20147.
|
||||||
|
*/
|
||||||
|
@Category({ ReplicationTests.class, MediumTests.class })
|
||||||
|
public class TestAddToSerialReplicationPeer extends SerialReplicationTestBase {
|
||||||
|
|
||||||
|
@ClassRule
|
||||||
|
public static final HBaseClassTestRule CLASS_RULE =
|
||||||
|
HBaseClassTestRule.forClass(TestAddToSerialReplicationPeer.class);
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void setUp() throws IOException, StreamLacksCapabilityException {
|
||||||
|
setupWALWriter();
|
||||||
|
}
|
||||||
|
|
||||||
|
// make sure that we will start replication for the sequence id after move, that's what we want to
|
||||||
|
// test here.
|
||||||
|
private void moveRegionAndArchiveOldWals(RegionInfo region, HRegionServer rs) throws Exception {
|
||||||
|
moveRegion(region, rs);
|
||||||
|
rollAllWALs();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void waitUntilReplicatedToTheCurrentWALFile(HRegionServer rs) throws Exception {
|
||||||
|
Path path = ((AbstractFSWAL<?>) rs.getWAL(null)).getCurrentFileName();
|
||||||
|
String logPrefix = AbstractFSWALProvider.getWALPrefixFromWALName(path.getName());
|
||||||
|
UTIL.waitFor(30000, new ExplainingPredicate<Exception>() {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean evaluate() throws Exception {
|
||||||
|
ReplicationSourceManager manager =
|
||||||
|
((Replication) rs.getReplicationSourceService()).getReplicationManager();
|
||||||
|
return manager.getWALs().get(PEER_ID).get(logPrefix).size() == 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String explainFailure() throws Exception {
|
||||||
|
return "Still not replicated to the current WAL file yet";
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testAddPeer() throws Exception {
|
||||||
|
TableName tableName = TableName.valueOf(name.getMethodName());
|
||||||
|
UTIL.getAdmin().createTable(
|
||||||
|
TableDescriptorBuilder.newBuilder(tableName).setColumnFamily(ColumnFamilyDescriptorBuilder
|
||||||
|
.newBuilder(CF).setScope(HConstants.REPLICATION_SCOPE_GLOBAL).build()).build());
|
||||||
|
UTIL.waitTableAvailable(tableName);
|
||||||
|
try (Table table = UTIL.getConnection().getTable(tableName)) {
|
||||||
|
for (int i = 0; i < 100; i++) {
|
||||||
|
table.put(new Put(Bytes.toBytes(i)).addColumn(CF, CQ, Bytes.toBytes(i)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
RegionInfo region = UTIL.getAdmin().getRegions(tableName).get(0);
|
||||||
|
HRegionServer rs = UTIL.getOtherRegionServer(UTIL.getRSForFirstRegionInTable(tableName));
|
||||||
|
moveRegionAndArchiveOldWals(region, rs);
|
||||||
|
addPeer(true);
|
||||||
|
try (Table table = UTIL.getConnection().getTable(tableName)) {
|
||||||
|
for (int i = 0; i < 100; i++) {
|
||||||
|
table.put(new Put(Bytes.toBytes(i)).addColumn(CF, CQ, Bytes.toBytes(i)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
waitUntilReplicationDone(100);
|
||||||
|
checkOrder(100);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testChangeToSerial() throws Exception {
|
||||||
|
ReplicationPeerConfig peerConfig =
|
||||||
|
ReplicationPeerConfig.newBuilder().setClusterKey("127.0.0.1:2181:/hbase")
|
||||||
|
.setReplicationEndpointImpl(LocalReplicationEndpoint.class.getName()).build();
|
||||||
|
UTIL.getAdmin().addReplicationPeer(PEER_ID, peerConfig, true);
|
||||||
|
|
||||||
|
TableName tableName = TableName.valueOf(name.getMethodName());
|
||||||
|
|
||||||
|
UTIL.getAdmin().createTable(
|
||||||
|
TableDescriptorBuilder.newBuilder(tableName).setColumnFamily(ColumnFamilyDescriptorBuilder
|
||||||
|
.newBuilder(CF).setScope(HConstants.REPLICATION_SCOPE_GLOBAL).build()).build());
|
||||||
|
UTIL.waitTableAvailable(tableName);
|
||||||
|
try (Table table = UTIL.getConnection().getTable(tableName)) {
|
||||||
|
for (int i = 0; i < 100; i++) {
|
||||||
|
table.put(new Put(Bytes.toBytes(i)).addColumn(CF, CQ, Bytes.toBytes(i)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
RegionInfo region = UTIL.getAdmin().getRegions(tableName).get(0);
|
||||||
|
HRegionServer srcRs = UTIL.getRSForFirstRegionInTable(tableName);
|
||||||
|
HRegionServer rs = UTIL.getOtherRegionServer(srcRs);
|
||||||
|
moveRegionAndArchiveOldWals(region, rs);
|
||||||
|
waitUntilReplicationDone(100);
|
||||||
|
waitUntilReplicatedToTheCurrentWALFile(srcRs);
|
||||||
|
|
||||||
|
UTIL.getAdmin().disableReplicationPeer(PEER_ID);
|
||||||
|
UTIL.getAdmin().updateReplicationPeerConfig(PEER_ID,
|
||||||
|
ReplicationPeerConfig.newBuilder(peerConfig).setSerial(true).build());
|
||||||
|
UTIL.getAdmin().enableReplicationPeer(PEER_ID);
|
||||||
|
|
||||||
|
try (Table table = UTIL.getConnection().getTable(tableName)) {
|
||||||
|
for (int i = 0; i < 100; i++) {
|
||||||
|
table.put(new Put(Bytes.toBytes(i)).addColumn(CF, CQ, Bytes.toBytes(i)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
waitUntilReplicationDone(200);
|
||||||
|
checkOrder(200);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testAddToSerialPeer() throws Exception {
|
||||||
|
ReplicationPeerConfig peerConfig =
|
||||||
|
ReplicationPeerConfig.newBuilder().setClusterKey("127.0.0.1:2181:/hbase")
|
||||||
|
.setReplicationEndpointImpl(LocalReplicationEndpoint.class.getName())
|
||||||
|
.setReplicateAllUserTables(false).setSerial(true).build();
|
||||||
|
UTIL.getAdmin().addReplicationPeer(PEER_ID, peerConfig, true);
|
||||||
|
|
||||||
|
TableName tableName = TableName.valueOf(name.getMethodName());
|
||||||
|
UTIL.getAdmin().createTable(
|
||||||
|
TableDescriptorBuilder.newBuilder(tableName).setColumnFamily(ColumnFamilyDescriptorBuilder
|
||||||
|
.newBuilder(CF).setScope(HConstants.REPLICATION_SCOPE_GLOBAL).build()).build());
|
||||||
|
UTIL.waitTableAvailable(tableName);
|
||||||
|
try (Table table = UTIL.getConnection().getTable(tableName)) {
|
||||||
|
for (int i = 0; i < 100; i++) {
|
||||||
|
table.put(new Put(Bytes.toBytes(i)).addColumn(CF, CQ, Bytes.toBytes(i)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
RegionInfo region = UTIL.getAdmin().getRegions(tableName).get(0);
|
||||||
|
HRegionServer srcRs = UTIL.getRSForFirstRegionInTable(tableName);
|
||||||
|
HRegionServer rs = UTIL.getOtherRegionServer(srcRs);
|
||||||
|
moveRegionAndArchiveOldWals(region, rs);
|
||||||
|
waitUntilReplicatedToTheCurrentWALFile(rs);
|
||||||
|
UTIL.getAdmin().disableReplicationPeer(PEER_ID);
|
||||||
|
UTIL.getAdmin().updateReplicationPeerConfig(PEER_ID,
|
||||||
|
ReplicationPeerConfig.newBuilder(peerConfig)
|
||||||
|
.setTableCFsMap(ImmutableMap.of(tableName, Collections.emptyList())).build());
|
||||||
|
UTIL.getAdmin().enableReplicationPeer(PEER_ID);
|
||||||
|
try (Table table = UTIL.getConnection().getTable(tableName)) {
|
||||||
|
for (int i = 0; i < 100; i++) {
|
||||||
|
table.put(new Put(Bytes.toBytes(i)).addColumn(CF, CQ, Bytes.toBytes(i)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
waitUntilReplicationDone(100);
|
||||||
|
checkOrder(100);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDisabledTable() throws Exception {
|
||||||
|
TableName tableName = TableName.valueOf(name.getMethodName());
|
||||||
|
UTIL.getAdmin().createTable(
|
||||||
|
TableDescriptorBuilder.newBuilder(tableName).setColumnFamily(ColumnFamilyDescriptorBuilder
|
||||||
|
.newBuilder(CF).setScope(HConstants.REPLICATION_SCOPE_GLOBAL).build()).build());
|
||||||
|
UTIL.waitTableAvailable(tableName);
|
||||||
|
try (Table table = UTIL.getConnection().getTable(tableName)) {
|
||||||
|
for (int i = 0; i < 100; i++) {
|
||||||
|
table.put(new Put(Bytes.toBytes(i)).addColumn(CF, CQ, Bytes.toBytes(i)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
UTIL.getAdmin().disableTable(tableName);
|
||||||
|
rollAllWALs();
|
||||||
|
addPeer(true);
|
||||||
|
UTIL.getAdmin().enableTable(tableName);
|
||||||
|
try (Table table = UTIL.getConnection().getTable(tableName)) {
|
||||||
|
for (int i = 0; i < 100; i++) {
|
||||||
|
table.put(new Put(Bytes.toBytes(i)).addColumn(CF, CQ, Bytes.toBytes(i)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
waitUntilReplicationDone(100);
|
||||||
|
checkOrder(100);
|
||||||
|
}
|
||||||
|
}
|
|
@ -23,211 +23,49 @@ import static org.junit.Assert.assertNotNull;
|
||||||
import static org.junit.Assert.assertTrue;
|
import static org.junit.Assert.assertTrue;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.UncheckedIOException;
|
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.UUID;
|
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
import org.apache.hadoop.hbase.HBaseClassTestRule;
|
import org.apache.hadoop.hbase.HBaseClassTestRule;
|
||||||
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
|
||||||
import org.apache.hadoop.hbase.HConstants;
|
import org.apache.hadoop.hbase.HConstants;
|
||||||
import org.apache.hadoop.hbase.TableName;
|
import org.apache.hadoop.hbase.TableName;
|
||||||
import org.apache.hadoop.hbase.Waiter.ExplainingPredicate;
|
|
||||||
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
|
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
|
||||||
import org.apache.hadoop.hbase.client.Put;
|
import org.apache.hadoop.hbase.client.Put;
|
||||||
import org.apache.hadoop.hbase.client.RegionInfo;
|
import org.apache.hadoop.hbase.client.RegionInfo;
|
||||||
import org.apache.hadoop.hbase.client.Table;
|
import org.apache.hadoop.hbase.client.Table;
|
||||||
import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
|
import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
|
||||||
import org.apache.hadoop.hbase.regionserver.HRegionServer;
|
import org.apache.hadoop.hbase.regionserver.HRegionServer;
|
||||||
import org.apache.hadoop.hbase.testclassification.LargeTests;
|
import org.apache.hadoop.hbase.testclassification.MediumTests;
|
||||||
import org.apache.hadoop.hbase.testclassification.ReplicationTests;
|
import org.apache.hadoop.hbase.testclassification.ReplicationTests;
|
||||||
import org.apache.hadoop.hbase.util.Bytes;
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
import org.apache.hadoop.hbase.util.CommonFSUtils.StreamLacksCapabilityException;
|
import org.apache.hadoop.hbase.util.CommonFSUtils.StreamLacksCapabilityException;
|
||||||
import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
|
|
||||||
import org.apache.hadoop.hbase.wal.WAL;
|
import org.apache.hadoop.hbase.wal.WAL;
|
||||||
import org.apache.hadoop.hbase.wal.WAL.Entry;
|
import org.apache.hadoop.hbase.wal.WAL.Entry;
|
||||||
import org.apache.hadoop.hbase.wal.WALFactory;
|
import org.apache.hadoop.hbase.wal.WALFactory;
|
||||||
import org.apache.hadoop.hbase.wal.WALProvider;
|
|
||||||
import org.junit.After;
|
|
||||||
import org.junit.AfterClass;
|
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
import org.junit.BeforeClass;
|
|
||||||
import org.junit.ClassRule;
|
import org.junit.ClassRule;
|
||||||
import org.junit.Rule;
|
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import org.junit.experimental.categories.Category;
|
import org.junit.experimental.categories.Category;
|
||||||
import org.junit.rules.TestName;
|
|
||||||
|
|
||||||
@Category({ ReplicationTests.class, LargeTests.class })
|
@Category({ ReplicationTests.class, MediumTests.class })
|
||||||
public class TestSerialReplication {
|
public class TestSerialReplication extends SerialReplicationTestBase {
|
||||||
|
|
||||||
@ClassRule
|
@ClassRule
|
||||||
public static final HBaseClassTestRule CLASS_RULE =
|
public static final HBaseClassTestRule CLASS_RULE =
|
||||||
HBaseClassTestRule.forClass(TestSerialReplication.class);
|
HBaseClassTestRule.forClass(TestSerialReplication.class);
|
||||||
|
|
||||||
private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
|
|
||||||
|
|
||||||
private static String PEER_ID = "1";
|
|
||||||
|
|
||||||
private static byte[] CF = Bytes.toBytes("CF");
|
|
||||||
|
|
||||||
private static byte[] CQ = Bytes.toBytes("CQ");
|
|
||||||
|
|
||||||
private static FileSystem FS;
|
|
||||||
|
|
||||||
private static Path LOG_DIR;
|
|
||||||
|
|
||||||
private static WALProvider.Writer WRITER;
|
|
||||||
|
|
||||||
public static final class LocalReplicationEndpoint extends BaseReplicationEndpoint {
|
|
||||||
|
|
||||||
private static final UUID PEER_UUID = UUID.randomUUID();
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public UUID getPeerUUID() {
|
|
||||||
return PEER_UUID;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean replicate(ReplicateContext replicateContext) {
|
|
||||||
synchronized (WRITER) {
|
|
||||||
try {
|
|
||||||
for (Entry entry : replicateContext.getEntries()) {
|
|
||||||
WRITER.append(entry);
|
|
||||||
}
|
|
||||||
WRITER.sync(false);
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new UncheckedIOException(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void start() {
|
|
||||||
startAsync();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void stop() {
|
|
||||||
stopAsync();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected void doStart() {
|
|
||||||
notifyStarted();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected void doStop() {
|
|
||||||
notifyStopped();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@BeforeClass
|
|
||||||
public static void setUpBeforeClass() throws Exception {
|
|
||||||
UTIL.getConfiguration().setInt("replication.source.nb.capacity", 10);
|
|
||||||
UTIL.startMiniCluster(3);
|
|
||||||
// disable balancer
|
|
||||||
UTIL.getAdmin().balancerSwitch(false, true);
|
|
||||||
LOG_DIR = UTIL.getDataTestDirOnTestFS("replicated");
|
|
||||||
FS = UTIL.getTestFileSystem();
|
|
||||||
FS.mkdirs(LOG_DIR);
|
|
||||||
}
|
|
||||||
|
|
||||||
@AfterClass
|
|
||||||
public static void tearDownAfterClass() throws Exception {
|
|
||||||
UTIL.shutdownMiniCluster();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Rule
|
|
||||||
public final TestName name = new TestName();
|
|
||||||
|
|
||||||
private Path logPath;
|
|
||||||
|
|
||||||
@Before
|
@Before
|
||||||
public void setUp() throws IOException, StreamLacksCapabilityException {
|
public void setUp() throws IOException, StreamLacksCapabilityException {
|
||||||
logPath = new Path(LOG_DIR, name.getMethodName());
|
setupWALWriter();
|
||||||
WRITER = WALFactory.createWALWriter(FS, logPath, UTIL.getConfiguration());
|
|
||||||
// add in disable state, so later when enabling it all sources will start push together.
|
// add in disable state, so later when enabling it all sources will start push together.
|
||||||
UTIL.getAdmin().addReplicationPeer(PEER_ID,
|
addPeer(false);
|
||||||
ReplicationPeerConfig.newBuilder().setClusterKey("127.0.0.1:2181:/hbase")
|
|
||||||
.setReplicationEndpointImpl(LocalReplicationEndpoint.class.getName()).setSerial(true)
|
|
||||||
.build(),
|
|
||||||
false);
|
|
||||||
}
|
|
||||||
|
|
||||||
@After
|
|
||||||
public void tearDown() throws Exception {
|
|
||||||
UTIL.getAdmin().removeReplicationPeer(PEER_ID);
|
|
||||||
for (RegionServerThread t : UTIL.getMiniHBaseCluster().getLiveRegionServerThreads()) {
|
|
||||||
t.getRegionServer().getWalRoller().requestRollAll();
|
|
||||||
}
|
|
||||||
UTIL.waitFor(30000, new ExplainingPredicate<Exception>() {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean evaluate() throws Exception {
|
|
||||||
return UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().stream()
|
|
||||||
.map(t -> t.getRegionServer()).allMatch(HRegionServer::walRollRequestFinished);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String explainFailure() throws Exception {
|
|
||||||
return "Log roll has not finished yet";
|
|
||||||
}
|
|
||||||
});
|
|
||||||
for (RegionServerThread t : UTIL.getMiniHBaseCluster().getLiveRegionServerThreads()) {
|
|
||||||
t.getRegionServer().getWalRoller().requestRollAll();
|
|
||||||
}
|
|
||||||
if (WRITER != null) {
|
|
||||||
WRITER.close();
|
|
||||||
WRITER = null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void moveRegion(RegionInfo region, HRegionServer rs) throws Exception {
|
|
||||||
UTIL.getAdmin().move(region.getEncodedNameAsBytes(),
|
|
||||||
Bytes.toBytes(rs.getServerName().getServerName()));
|
|
||||||
UTIL.waitFor(30000, new ExplainingPredicate<Exception>() {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean evaluate() throws Exception {
|
|
||||||
return rs.getRegion(region.getEncodedName()) != null;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String explainFailure() throws Exception {
|
|
||||||
return region + " is still not on " + rs;
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private void enablePeerAndWaitUntilReplicationDone(int expectedEntries) throws Exception {
|
private void enablePeerAndWaitUntilReplicationDone(int expectedEntries) throws Exception {
|
||||||
UTIL.getAdmin().enableReplicationPeer(PEER_ID);
|
UTIL.getAdmin().enableReplicationPeer(PEER_ID);
|
||||||
UTIL.waitFor(30000, new ExplainingPredicate<Exception>() {
|
waitUntilReplicationDone(expectedEntries);
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean evaluate() throws Exception {
|
|
||||||
try (WAL.Reader reader = WALFactory.createReader(FS, logPath, UTIL.getConfiguration())) {
|
|
||||||
int count = 0;
|
|
||||||
while (reader.next() != null) {
|
|
||||||
count++;
|
|
||||||
}
|
|
||||||
return count >= expectedEntries;
|
|
||||||
} catch (IOException e) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String explainFailure() throws Exception {
|
|
||||||
return "Not enough entries replicated";
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -251,22 +89,7 @@ public class TestSerialReplication {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
enablePeerAndWaitUntilReplicationDone(200);
|
enablePeerAndWaitUntilReplicationDone(200);
|
||||||
try (WAL.Reader reader =
|
checkOrder(200);
|
||||||
WALFactory.createReader(UTIL.getTestFileSystem(), logPath, UTIL.getConfiguration())) {
|
|
||||||
long seqId = -1L;
|
|
||||||
int count = 0;
|
|
||||||
for (Entry entry;;) {
|
|
||||||
entry = reader.next();
|
|
||||||
if (entry == null) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
assertTrue(
|
|
||||||
"Sequence id go backwards from " + seqId + " to " + entry.getKey().getSequenceId(),
|
|
||||||
entry.getKey().getSequenceId() >= seqId);
|
|
||||||
count++;
|
|
||||||
}
|
|
||||||
assertEquals(200, count);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
Loading…
Reference in New Issue