HBASE-12686 Failures in split before PONR not clearing the daughter regions from regions in transition during rollback (Vandana Ayyalasomayajula)

This commit is contained in:
Andrew Purtell 2014-12-15 17:31:33 -08:00
parent 677153ad61
commit 871444cb0a
2 changed files with 68 additions and 8 deletions

View File

@ -29,6 +29,7 @@ import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.NavigableMap;
import java.util.Set;
import java.util.TreeMap;
@ -165,6 +166,9 @@ public class AssignmentManager extends ZooKeeperListener {
private final Map<String, PairOfSameType<HRegionInfo>> mergingRegions
= new HashMap<String, PairOfSameType<HRegionInfo>>();
private final Map<HRegionInfo, PairOfSameType<HRegionInfo>> splitRegions
= new HashMap<HRegionInfo, PairOfSameType<HRegionInfo>>();
/**
* The sleep time for which the assignment will wait before retrying in case of hbase:meta assignment
* failure due to lack of availability of region plan or bad region plan
@ -1321,14 +1325,30 @@ public class AssignmentManager extends ZooKeeperListener {
ServerName serverName = rs.getServerName();
if (serverManager.isServerOnline(serverName)) {
if (rs.isOnServer(serverName)
&& (rs.isOpened() || rs.isSplitting())) {
regionOnline(regionInfo, serverName);
if (disabled) {
// if server is offline, no hurt to unassign again
LOG.info("Opened " + regionNameStr
+ "but this table is disabled, triggering close of region");
unassign(regionInfo);
if (rs.isOnServer(serverName) && (rs.isOpened() || rs.isSplitting())) {
synchronized (regionStates) {
regionOnline(regionInfo, serverName);
if (rs.isSplitting() && splitRegions.containsKey(regionInfo)) {
// Check if the daugter regions are still there, if they are present, offline
// as its the case of a rollback.
HRegionInfo hri_a = splitRegions.get(regionInfo).getFirst();
HRegionInfo hri_b = splitRegions.get(regionInfo).getSecond();
if (!regionStates.isRegionInTransition(hri_a.getEncodedName())) {
LOG.warn("Split daughter region not in transition " + hri_a);
}
if (!regionStates.isRegionInTransition(hri_b.getEncodedName())) {
LOG.warn("Split daughter region not in transition" + hri_b);
}
regionOffline(hri_a);
regionOffline(hri_b);
splitRegions.remove(regionInfo);
}
if (disabled) {
// if server is offline, no hurt to unassign again
LOG.info("Opened " + regionNameStr
+ "but this table is disabled, triggering close of region");
unassign(regionInfo);
}
}
} else if (rs.isMergingNew()) {
synchronized (regionStates) {
@ -3798,6 +3818,7 @@ public class AssignmentManager extends ZooKeeperListener {
}
synchronized (regionStates) {
splitRegions.put(p, new PairOfSameType<HRegionInfo>(hri_a, hri_b));
regionStates.updateRegionState(hri_a, State.SPLITTING_NEW, sn);
regionStates.updateRegionState(hri_b, State.SPLITTING_NEW, sn);
regionStates.updateRegionState(rt, State.SPLITTING);
@ -3813,6 +3834,7 @@ public class AssignmentManager extends ZooKeeperListener {
regionOffline(p, State.SPLIT);
regionOnline(hri_a, sn);
regionOnline(hri_b, sn);
splitRegions.remove(p);
}
}

View File

@ -58,6 +58,8 @@ import org.apache.hadoop.hbase.Waiter;
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
import org.apache.hadoop.hbase.MetaTableAccessor;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
@ -1161,6 +1163,42 @@ public class TestSplitTransactionOnCluster {
TESTING_UTIL.deleteTable(tableName);
}
}
@Test
public void testFailedSplit() throws Exception {
TableName tableName = TableName.valueOf("testFailedSplit");
byte[] colFamily = Bytes.toBytes("info");
TESTING_UTIL.createTable(tableName, colFamily);
Connection connection = ConnectionFactory.createConnection(TESTING_UTIL.getConfiguration());
HTable table = (HTable) connection.getTable(tableName);
try {
TESTING_UTIL.loadTable(table, colFamily);
List<HRegionInfo> regions = TESTING_UTIL.getHBaseAdmin().getTableRegions(tableName);
assertTrue(regions.size() == 1);
final HRegion actualRegion = cluster.getRegions(tableName).get(0);
actualRegion.getCoprocessorHost().load(FailingSplitRegionObserver.class,
Coprocessor.PRIORITY_USER, actualRegion.getBaseConf());
// The following split would fail.
admin.split(tableName);
FailingSplitRegionObserver.latch.await();
LOG.info("Waiting for region to come out of RIT");
TESTING_UTIL.waitFor(60000, 1000, new Waiter.Predicate<Exception>() {
@Override
public boolean evaluate() throws Exception {
RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates();
Map<String, RegionState> rit = regionStates.getRegionsInTransition();
return !rit.containsKey(actualRegion.getRegionInfo().getEncodedName());
}
});
regions = TESTING_UTIL.getHBaseAdmin().getTableRegions(tableName);
assertTrue(regions.size() == 1);
} finally {
table.close();
connection.close();
TESTING_UTIL.deleteTable(tableName);
}
}
public static class MockedCoordinatedStateManager extends ZkCoordinatedStateManager {