HBASE-12769 Replication fails to delete all corresponding zk nodes when peer is removed (Jianwei Cui)

This commit is contained in:
tedyu 2015-10-28 15:43:26 -07:00
parent e5b566e01c
commit 3c8e92019c
8 changed files with 296 additions and 12 deletions

View File

@ -128,11 +128,12 @@ public class ReplicationAdmin implements Closeable {
try { try {
zkw = createZooKeeperWatcher(); zkw = createZooKeeperWatcher();
try { try {
this.replicationPeers = ReplicationFactory.getReplicationPeers(zkw, conf, this.connection);
this.replicationPeers.init();
this.replicationQueuesClient = this.replicationQueuesClient =
ReplicationFactory.getReplicationQueuesClient(zkw, conf, this.connection); ReplicationFactory.getReplicationQueuesClient(zkw, conf, this.connection);
this.replicationQueuesClient.init(); this.replicationQueuesClient.init();
this.replicationPeers = ReplicationFactory.getReplicationPeers(zkw, conf,
this.replicationQueuesClient, this.connection);
this.replicationPeers.init();
} catch (Exception exception) { } catch (Exception exception) {
if (zkw != null) { if (zkw != null) {
zkw.close(); zkw.close();

View File

@ -42,7 +42,12 @@ public class ReplicationFactory {
public static ReplicationPeers getReplicationPeers(final ZooKeeperWatcher zk, Configuration conf, public static ReplicationPeers getReplicationPeers(final ZooKeeperWatcher zk, Configuration conf,
Abortable abortable) { Abortable abortable) {
return new ReplicationPeersZKImpl(zk, conf, abortable); return getReplicationPeers(zk, conf, null, abortable);
}
public static ReplicationPeers getReplicationPeers(final ZooKeeperWatcher zk, Configuration conf,
final ReplicationQueuesClient queuesClient, Abortable abortable) {
return new ReplicationPeersZKImpl(zk, conf, queuesClient, abortable);
} }
public static ReplicationTracker getReplicationTracker(ZooKeeperWatcher zookeeper, public static ReplicationTracker getReplicationTracker(ZooKeeperWatcher zookeeper,

View File

@ -81,14 +81,16 @@ public class ReplicationPeersZKImpl extends ReplicationStateZKBase implements Re
// Map of peer clusters keyed by their id // Map of peer clusters keyed by their id
private Map<String, ReplicationPeerZKImpl> peerClusters; private Map<String, ReplicationPeerZKImpl> peerClusters;
private final String tableCFsNodeName; private final String tableCFsNodeName;
private final ReplicationQueuesClient queuesClient;
private static final Log LOG = LogFactory.getLog(ReplicationPeersZKImpl.class); private static final Log LOG = LogFactory.getLog(ReplicationPeersZKImpl.class);
public ReplicationPeersZKImpl(final ZooKeeperWatcher zk, final Configuration conf, public ReplicationPeersZKImpl(final ZooKeeperWatcher zk, final Configuration conf,
Abortable abortable) { final ReplicationQueuesClient queuesClient, Abortable abortable) {
super(zk, conf, abortable); super(zk, conf, abortable);
this.tableCFsNodeName = conf.get("zookeeper.znode.replication.peers.tableCFs", "tableCFs"); this.tableCFsNodeName = conf.get("zookeeper.znode.replication.peers.tableCFs", "tableCFs");
this.peerClusters = new ConcurrentHashMap<String, ReplicationPeerZKImpl>(); this.peerClusters = new ConcurrentHashMap<String, ReplicationPeerZKImpl>();
this.queuesClient = queuesClient;
} }
@Override @Override
@ -116,6 +118,8 @@ public class ReplicationPeersZKImpl extends ReplicationStateZKBase implements Re
throw new IllegalArgumentException("Found invalid peer name:" + id); throw new IllegalArgumentException("Found invalid peer name:" + id);
} }
checkQueuesDeleted(id);
ZKUtil.createWithParents(this.zookeeper, this.peersZNode); ZKUtil.createWithParents(this.zookeeper, this.peersZNode);
List<ZKUtilOp> listOfOps = new ArrayList<ZKUtil.ZKUtilOp>(); List<ZKUtilOp> listOfOps = new ArrayList<ZKUtil.ZKUtilOp>();
ZKUtilOp op1 = ZKUtilOp.createAndFailSilent(ZKUtil.joinZNode(this.peersZNode, id), ZKUtilOp op1 = ZKUtilOp.createAndFailSilent(ZKUtil.joinZNode(this.peersZNode, id),
@ -561,5 +565,22 @@ public class ReplicationPeersZKImpl extends ReplicationStateZKBase implements Re
return ProtobufUtil.prependPBMagic(bytes); return ProtobufUtil.prependPBMagic(bytes);
} }
private void checkQueuesDeleted(String peerId) throws ReplicationException {
if (queuesClient == null) return;
try {
List<String> replicators = queuesClient.getListOfReplicators();
for (String replicator : replicators) {
List<String> queueIds = queuesClient.getAllQueues(replicator);
for (String queueId : queueIds) {
ReplicationQueueInfo queueInfo = new ReplicationQueueInfo(queueId);
if (queueInfo.getPeerId().equals(peerId)) {
throw new ReplicationException("undeleted queue for peerId: " + peerId
+ ", replicator: " + replicator + ", queueId: " + queueId);
}
}
}
} catch (KeeperException e) {
throw new ReplicationException("Could not check queues deleted with id=" + peerId, e);
}
}
} }

View File

@ -129,6 +129,7 @@ import org.apache.hadoop.hbase.security.UserProvider;
import org.apache.hadoop.hbase.util.Bytes.ByteArrayComparator; import org.apache.hadoop.hbase.util.Bytes.ByteArrayComparator;
import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE; import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker; import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
import org.apache.hadoop.hbase.util.hbck.ReplicationChecker;
import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandler; import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandler;
import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandlerImpl; import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandlerImpl;
import org.apache.hadoop.hbase.util.hbck.TableLockChecker; import org.apache.hadoop.hbase.util.hbck.TableLockChecker;
@ -251,6 +252,7 @@ public class HBaseFsck extends Configured implements Closeable {
private boolean fixEmptyMetaCells = false; // fix (remove) empty REGIONINFO_QUALIFIER rows private boolean fixEmptyMetaCells = false; // fix (remove) empty REGIONINFO_QUALIFIER rows
private boolean fixTableLocks = false; // fix table locks which are expired private boolean fixTableLocks = false; // fix table locks which are expired
private boolean fixTableZNodes = false; // fix table Znodes which are orphaned private boolean fixTableZNodes = false; // fix table Znodes which are orphaned
private boolean fixReplication = false; // fix undeleted replication queues for removed peer
private boolean fixAny = false; // Set to true if any of the fix is required. private boolean fixAny = false; // Set to true if any of the fix is required.
// limit checking/fixes to listed tables, if empty attempt to check/fix all // limit checking/fixes to listed tables, if empty attempt to check/fix all
@ -715,6 +717,8 @@ public class HBaseFsck extends Configured implements Closeable {
// Check (and fix if requested) orphaned table ZNodes // Check (and fix if requested) orphaned table ZNodes
checkAndFixOrphanedTableZNodes(); checkAndFixOrphanedTableZNodes();
checkAndFixReplication();
// Remove the hbck lock // Remove the hbck lock
unlockHbck(); unlockHbck();
@ -3231,6 +3235,21 @@ public class HBaseFsck extends Configured implements Closeable {
} }
} }
private void checkAndFixReplication() throws IOException {
ZooKeeperWatcher zkw = createZooKeeperWatcher();
try {
ReplicationChecker checker = new ReplicationChecker(getConf(), zkw, connection, errors);
checker.checkUnDeletedQueues();
if (checker.hasUnDeletedQueues() && this.fixReplication) {
checker.fixUnDeletedQueues();
setShouldRerun();
}
} finally {
zkw.close();
}
}
/** /**
* Check whether a orphaned table ZNode exists and fix it if requested. * Check whether a orphaned table ZNode exists and fix it if requested.
* @throws IOException * @throws IOException
@ -3817,7 +3836,8 @@ public class HBaseFsck extends Configured implements Closeable {
FIRST_REGION_STARTKEY_NOT_EMPTY, LAST_REGION_ENDKEY_NOT_EMPTY, DUPE_STARTKEYS, FIRST_REGION_STARTKEY_NOT_EMPTY, LAST_REGION_ENDKEY_NOT_EMPTY, DUPE_STARTKEYS,
HOLE_IN_REGION_CHAIN, OVERLAP_IN_REGION_CHAIN, REGION_CYCLE, DEGENERATE_REGION, HOLE_IN_REGION_CHAIN, OVERLAP_IN_REGION_CHAIN, REGION_CYCLE, DEGENERATE_REGION,
ORPHAN_HDFS_REGION, LINGERING_SPLIT_PARENT, NO_TABLEINFO_FILE, LINGERING_REFERENCE_HFILE, ORPHAN_HDFS_REGION, LINGERING_SPLIT_PARENT, NO_TABLEINFO_FILE, LINGERING_REFERENCE_HFILE,
WRONG_USAGE, EMPTY_META_CELL, EXPIRED_TABLE_LOCK, ORPHANED_ZK_TABLE_ENTRY, BOUNDARIES_ERROR WRONG_USAGE, EMPTY_META_CELL, EXPIRED_TABLE_LOCK, ORPHANED_ZK_TABLE_ENTRY, BOUNDARIES_ERROR,
UNDELETED_REPLICATION_QUEUE
} }
void clear(); void clear();
void report(String message); void report(String message);
@ -4219,6 +4239,14 @@ public class HBaseFsck extends Configured implements Closeable {
fixAny |= shouldFix; fixAny |= shouldFix;
} }
/**
* Set replication fix mode.
*/
public void setFixReplication(boolean shouldFix) {
fixReplication = shouldFix;
fixAny |= shouldFix;
}
/** /**
* Set orphaned table ZNodes fix mode. * Set orphaned table ZNodes fix mode.
* Set the table state to disable in the orphaned table ZNode. * Set the table state to disable in the orphaned table ZNode.
@ -4492,6 +4520,10 @@ public class HBaseFsck extends Configured implements Closeable {
out.println(" Table Znode options"); out.println(" Table Znode options");
out.println(" -fixOrphanedTableZnodes Set table state in ZNode to disabled if table does not exists"); out.println(" -fixOrphanedTableZnodes Set table state in ZNode to disabled if table does not exists");
out.println("");
out.println(" Replication options");
out.println(" -fixReplication Deletes replication queues for removed peers");
out.flush(); out.flush();
errors.reportError(ERROR_CODE.WRONG_USAGE, sw.toString()); errors.reportError(ERROR_CODE.WRONG_USAGE, sw.toString());
@ -4678,6 +4710,8 @@ public class HBaseFsck extends Configured implements Closeable {
setRegionBoundariesCheck(); setRegionBoundariesCheck();
} else if (cmd.equals("-fixTableLocks")) { } else if (cmd.equals("-fixTableLocks")) {
setFixTableLocks(true); setFixTableLocks(true);
} else if (cmd.equals("-fixReplication")) {
setFixReplication(true);
} else if (cmd.equals("-fixOrphanedTableZnodes")) { } else if (cmd.equals("-fixOrphanedTableZnodes")) {
setFixTableZNodes(true); setFixTableZNodes(true);
} else if (cmd.startsWith("-")) { } else if (cmd.startsWith("-")) {

View File

@ -0,0 +1,134 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.util.hbck;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Abortable;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.client.HConnection;
import org.apache.hadoop.hbase.replication.ReplicationException;
import org.apache.hadoop.hbase.replication.ReplicationFactory;
import org.apache.hadoop.hbase.replication.ReplicationPeers;
import org.apache.hadoop.hbase.replication.ReplicationQueueInfo;
import org.apache.hadoop.hbase.replication.ReplicationQueuesClient;
import org.apache.hadoop.hbase.replication.ReplicationStateZKBase;
import org.apache.hadoop.hbase.util.HBaseFsck;
import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter;
import org.apache.hadoop.hbase.zookeeper.ZKUtil;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
import org.apache.zookeeper.KeeperException;
/*
* Check and fix undeleted replication queues for removed peerId.
*/
@InterfaceAudience.Private
public class ReplicationChecker {
private static final Log LOG = LogFactory.getLog(ReplicationChecker.class);
private ErrorReporter errorReporter;
private ReplicationQueuesClient queuesClient;
private ReplicationPeers replicationPeers;
private ReplicationQueueDeletor queueDeletor;
// replicator with its queueIds for removed peers
private Map<String, List<String>> undeletedQueueIds = new HashMap<String, List<String>>();
public ReplicationChecker(Configuration conf, ZooKeeperWatcher zkw, HConnection connection,
ErrorReporter errorReporter) throws IOException {
try {
this.errorReporter = errorReporter;
this.queuesClient = ReplicationFactory.getReplicationQueuesClient(zkw, conf, connection);
this.queuesClient.init();
this.replicationPeers = ReplicationFactory.getReplicationPeers(zkw, conf, this.queuesClient,
connection);
this.replicationPeers.init();
this.queueDeletor = new ReplicationQueueDeletor(zkw, conf, connection);
} catch (ReplicationException e) {
throw new IOException("failed to construct ReplicationChecker", e);
}
}
public boolean hasUnDeletedQueues() {
return errorReporter.getErrorList()
.contains(HBaseFsck.ErrorReporter.ERROR_CODE.UNDELETED_REPLICATION_QUEUE);
}
public void checkUnDeletedQueues() throws IOException {
Set<String> peerIds = new HashSet<String>(this.replicationPeers.getAllPeerIds());
try {
List<String> replicators = this.queuesClient.getListOfReplicators();
for (String replicator : replicators) {
List<String> queueIds = this.queuesClient.getAllQueues(replicator);
for (String queueId : queueIds) {
ReplicationQueueInfo queueInfo = new ReplicationQueueInfo(queueId);
if (!peerIds.contains(queueInfo.getPeerId())) {
if (!undeletedQueueIds.containsKey(replicator)) {
undeletedQueueIds.put(replicator, new ArrayList<String>());
}
undeletedQueueIds.get(replicator).add(queueId);
String msg = "Undeleted replication queue for removed peer found: "
+ String.format("[removedPeerId=%s, replicator=%s, queueId=%s]",
queueInfo.getPeerId(), replicator, queueId);
errorReporter.reportError(
HBaseFsck.ErrorReporter.ERROR_CODE.UNDELETED_REPLICATION_QUEUE, msg);
}
}
}
} catch (KeeperException ke) {
throw new IOException(ke);
}
}
private static class ReplicationQueueDeletor extends ReplicationStateZKBase {
public ReplicationQueueDeletor(ZooKeeperWatcher zk, Configuration conf, Abortable abortable) {
super(zk, conf, abortable);
}
public void removeQueue(String replicator, String queueId) throws IOException {
String queueZnodePath = ZKUtil.joinZNode(ZKUtil.joinZNode(this.queuesZNode, replicator),
queueId);
try {
ZKUtil.deleteNodeRecursively(this.zookeeper, queueZnodePath);
LOG.info("remove replication queue, replicator: " + replicator + ", queueId: " + queueId);
} catch (KeeperException e) {
throw new IOException("failed to delete queue, replicator: " + replicator + ", queueId: "
+ queueId);
}
}
}
public void fixUnDeletedQueues() throws IOException {
for (Entry<String, List<String>> replicatorAndQueueIds : undeletedQueueIds.entrySet()) {
String replicator = replicatorAndQueueIds.getKey();
for (String queueId : replicatorAndQueueIds.getValue()) {
queueDeletor.removeQueue(replicator, queueId);
}
}
}
}

View File

@ -26,7 +26,10 @@ import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*; import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.replication.ReplicationException; import org.apache.hadoop.hbase.replication.ReplicationException;
import org.apache.hadoop.hbase.replication.ReplicationFactory;
import org.apache.hadoop.hbase.replication.ReplicationQueues;
import org.apache.hadoop.hbase.testclassification.MediumTests; import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
import org.junit.AfterClass; import org.junit.AfterClass;
import org.junit.BeforeClass; import org.junit.BeforeClass;
import org.junit.Test; import org.junit.Test;
@ -114,6 +117,37 @@ public class TestReplicationAdmin {
assertEquals(0, admin.getPeersCount()); assertEquals(0, admin.getPeersCount());
} }
@Test
public void testAddPeerWithUnDeletedQueues() throws Exception {
Configuration conf = TEST_UTIL.getConfiguration();
ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "Test HBaseAdmin", null);
ReplicationQueues repQueues =
ReplicationFactory.getReplicationQueues(zkw, conf, null);
repQueues.init("server1");
// add queue for ID_ONE
repQueues.addLog(ID_ONE, "file1");
try {
admin.addPeer(ID_ONE, KEY_ONE);
fail();
} catch (ReplicationException e) {
// OK!
}
repQueues.removeQueue(ID_ONE);
assertEquals(0, repQueues.getAllQueues().size());
// add recovered queue for ID_ONE
repQueues.addLog(ID_ONE + "-server2", "file1");
try {
admin.addPeer(ID_ONE, KEY_ONE);
fail();
} catch (ReplicationException e) {
// OK!
}
repQueues.removeAllQueues();
zkw.close();
}
/** /**
* basic checks that when we add a peer that it is enabled, and that we can disable * basic checks that when we add a peer that it is enabled, and that we can disable
* @throws Exception * @throws Exception

View File

@ -92,6 +92,7 @@ import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.RowMutations; import org.apache.hadoop.hbase.client.RowMutations;
import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.client.replication.ReplicationAdmin;
import org.apache.hadoop.hbase.coprocessor.BaseMasterObserver; import org.apache.hadoop.hbase.coprocessor.BaseMasterObserver;
import org.apache.hadoop.hbase.coprocessor.CoprocessorHost; import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
import org.apache.hadoop.hbase.coprocessor.MasterCoprocessorEnvironment; import org.apache.hadoop.hbase.coprocessor.MasterCoprocessorEnvironment;
@ -111,6 +112,8 @@ import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
import org.apache.hadoop.hbase.regionserver.HRegionServer; import org.apache.hadoop.hbase.regionserver.HRegionServer;
import org.apache.hadoop.hbase.regionserver.SplitTransactionImpl; import org.apache.hadoop.hbase.regionserver.SplitTransactionImpl;
import org.apache.hadoop.hbase.regionserver.TestEndToEndSplitTransaction; import org.apache.hadoop.hbase.regionserver.TestEndToEndSplitTransaction;
import org.apache.hadoop.hbase.replication.ReplicationFactory;
import org.apache.hadoop.hbase.replication.ReplicationQueues;
import org.apache.hadoop.hbase.testclassification.LargeTests; import org.apache.hadoop.hbase.testclassification.LargeTests;
import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter; import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter;
import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE; import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
@ -120,6 +123,7 @@ import org.apache.hadoop.hbase.util.HBaseFsck.TableInfo;
import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker; import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
import org.apache.hadoop.hbase.util.hbck.HbckTestingUtil; import org.apache.hadoop.hbase.util.hbck.HbckTestingUtil;
import org.apache.hadoop.hbase.zookeeper.MetaTableLocator; import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
import org.apache.hadoop.hbase.zookeeper.ZKAssign; import org.apache.hadoop.hbase.zookeeper.ZKAssign;
import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.KeeperException;
import org.junit.AfterClass; import org.junit.AfterClass;
@ -1690,7 +1694,7 @@ public class TestHBaseFsck {
// for some time until children references are deleted. HBCK erroneously sees this as // for some time until children references are deleted. HBCK erroneously sees this as
// overlapping regions // overlapping regions
HBaseFsck hbck = doFsck( HBaseFsck hbck = doFsck(
conf, true, true, false, false, false, true, true, true, false, false, false, null); conf, true, true, false, false, false, true, true, true, false, false, false, false, null);
assertErrors(hbck, new ERROR_CODE[] {}); //no LINGERING_SPLIT_PARENT reported assertErrors(hbck, new ERROR_CODE[] {}); //no LINGERING_SPLIT_PARENT reported
// assert that the split hbase:meta entry is still there. // assert that the split hbase:meta entry is still there.
@ -1759,7 +1763,7 @@ public class TestHBaseFsck {
// now fix it. The fix should not revert the region split, but add daughters to META // now fix it. The fix should not revert the region split, but add daughters to META
hbck = doFsck( hbck = doFsck(
conf, true, true, false, false, false, false, false, false, false, false, false, null); conf, true, true, false, false, false, false, false, false, false, false, false,false,null);
assertErrors(hbck, assertErrors(hbck,
new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.NOT_IN_META_OR_DEPLOYED, new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
ERROR_CODE.HOLE_IN_REGION_CHAIN }); ERROR_CODE.HOLE_IN_REGION_CHAIN });
@ -2202,6 +2206,55 @@ public class TestHBaseFsck {
hbck.close(); hbck.close();
} }
@Test(timeout=60000)
public void testCheckReplication() throws Exception {
// check no errors
HBaseFsck hbck = doFsck(conf, false);
assertNoErrors(hbck);
// create peer
ReplicationAdmin replicationAdmin = new ReplicationAdmin(conf);
Assert.assertEquals(0, replicationAdmin.getPeersCount());
String zkPort = conf.get(HConstants.ZOOKEEPER_CLIENT_PORT);
replicationAdmin.addPeer("1", "127.0.0.1:2181" + zkPort + ":/hbase");
replicationAdmin.getPeersCount();
Assert.assertEquals(1, replicationAdmin.getPeersCount());
// create replicator
ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "Test Hbase Fsck", connection);
ReplicationQueues repQueues =
ReplicationFactory.getReplicationQueues(zkw, conf, connection);
repQueues.init("server1");
// queues for current peer, no errors
repQueues.addLog("1", "file1");
repQueues.addLog("1-server2", "file1");
Assert.assertEquals(2, repQueues.getAllQueues().size());
hbck = doFsck(conf, false);
assertNoErrors(hbck);
// queues for removed peer
repQueues.addLog("2", "file1");
repQueues.addLog("2-server2", "file1");
Assert.assertEquals(4, repQueues.getAllQueues().size());
hbck = doFsck(conf, false);
assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.UNDELETED_REPLICATION_QUEUE,
ERROR_CODE.UNDELETED_REPLICATION_QUEUE });
// fix the case
hbck = doFsck(conf, true);
hbck = doFsck(conf, false);
assertNoErrors(hbck);
// ensure only "2" is deleted
Assert.assertEquals(2, repQueues.getAllQueues().size());
Assert.assertNull(repQueues.getLogsInQueue("2"));
Assert.assertNull(repQueues.getLogsInQueue("2-sever2"));
replicationAdmin.removePeer("1");
repQueues.removeAllQueues();
zkw.close();
replicationAdmin.close();
}
/** /**
* This creates a table and simulates the race situation where a concurrent compaction or split * This creates a table and simulates the race situation where a concurrent compaction or split
* has removed a region dir before the corruption checker got to it. * has removed a region dir before the corruption checker got to it.
@ -2737,7 +2790,8 @@ public class TestHBaseFsck {
// fix hole // fix hole
assertErrors( assertErrors(
doFsck( doFsck(
conf, false, true, false, false, false, false, false, false, false, false, false, null), conf, false, true, false, false, false, false, false, false, false, false, false,
false, null),
new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED, new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
ERROR_CODE.NOT_IN_META_OR_DEPLOYED }); ERROR_CODE.NOT_IN_META_OR_DEPLOYED });

View File

@ -40,14 +40,14 @@ public class HbckTestingUtil {
public static HBaseFsck doFsck( public static HBaseFsck doFsck(
Configuration conf, boolean fix, TableName table) throws Exception { Configuration conf, boolean fix, TableName table) throws Exception {
return doFsck(conf, fix, fix, fix, fix, fix, fix, fix, fix, fix, fix, fix, table); return doFsck(conf, fix, fix, fix, fix, fix, fix, fix, fix, fix, fix, fix, fix, table);
} }
public static HBaseFsck doFsck(Configuration conf, boolean fixAssignments, public static HBaseFsck doFsck(Configuration conf, boolean fixAssignments,
boolean fixMeta, boolean fixHdfsHoles, boolean fixHdfsOverlaps, boolean fixMeta, boolean fixHdfsHoles, boolean fixHdfsOverlaps,
boolean fixHdfsOrphans, boolean fixTableOrphans, boolean fixVersionFile, boolean fixHdfsOrphans, boolean fixTableOrphans, boolean fixVersionFile,
boolean fixReferenceFiles, boolean fixEmptyMetaRegionInfo, boolean fixTableLocks, boolean fixReferenceFiles, boolean fixEmptyMetaRegionInfo, boolean fixTableLocks,
boolean fixTableZnodes, boolean fixTableZnodes, Boolean fixReplication,
TableName table) throws Exception { TableName table) throws Exception {
HBaseFsck fsck = new HBaseFsck(conf, exec); HBaseFsck fsck = new HBaseFsck(conf, exec);
fsck.setDisplayFullReport(); // i.e. -details fsck.setDisplayFullReport(); // i.e. -details
@ -62,6 +62,7 @@ public class HbckTestingUtil {
fsck.setFixReferenceFiles(fixReferenceFiles); fsck.setFixReferenceFiles(fixReferenceFiles);
fsck.setFixEmptyMetaCells(fixEmptyMetaRegionInfo); fsck.setFixEmptyMetaCells(fixEmptyMetaRegionInfo);
fsck.setFixTableLocks(fixTableLocks); fsck.setFixTableLocks(fixTableLocks);
fsck.setFixReplication(fixReplication);
fsck.setFixTableZNodes(fixTableZnodes); fsck.setFixTableZNodes(fixTableZnodes);
fsck.connect(); fsck.connect();
if (table != null) { if (table != null) {