HBASE-12686 Failures in split before PONR not clearing the daughter regions from regions in transition during rollback (Vandana Ayyalasomayajula)
This commit is contained in:
parent
677153ad61
commit
871444cb0a
@ -29,6 +29,7 @@ import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.NavigableMap;
|
||||
import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
@ -165,6 +166,9 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||
private final Map<String, PairOfSameType<HRegionInfo>> mergingRegions
|
||||
= new HashMap<String, PairOfSameType<HRegionInfo>>();
|
||||
|
||||
private final Map<HRegionInfo, PairOfSameType<HRegionInfo>> splitRegions
|
||||
= new HashMap<HRegionInfo, PairOfSameType<HRegionInfo>>();
|
||||
|
||||
/**
|
||||
* The sleep time for which the assignment will wait before retrying in case of hbase:meta assignment
|
||||
* failure due to lack of availability of region plan or bad region plan
|
||||
@ -1321,14 +1325,30 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||
|
||||
ServerName serverName = rs.getServerName();
|
||||
if (serverManager.isServerOnline(serverName)) {
|
||||
if (rs.isOnServer(serverName)
|
||||
&& (rs.isOpened() || rs.isSplitting())) {
|
||||
regionOnline(regionInfo, serverName);
|
||||
if (disabled) {
|
||||
// if server is offline, no hurt to unassign again
|
||||
LOG.info("Opened " + regionNameStr
|
||||
+ "but this table is disabled, triggering close of region");
|
||||
unassign(regionInfo);
|
||||
if (rs.isOnServer(serverName) && (rs.isOpened() || rs.isSplitting())) {
|
||||
synchronized (regionStates) {
|
||||
regionOnline(regionInfo, serverName);
|
||||
if (rs.isSplitting() && splitRegions.containsKey(regionInfo)) {
|
||||
// Check if the daugter regions are still there, if they are present, offline
|
||||
// as its the case of a rollback.
|
||||
HRegionInfo hri_a = splitRegions.get(regionInfo).getFirst();
|
||||
HRegionInfo hri_b = splitRegions.get(regionInfo).getSecond();
|
||||
if (!regionStates.isRegionInTransition(hri_a.getEncodedName())) {
|
||||
LOG.warn("Split daughter region not in transition " + hri_a);
|
||||
}
|
||||
if (!regionStates.isRegionInTransition(hri_b.getEncodedName())) {
|
||||
LOG.warn("Split daughter region not in transition" + hri_b);
|
||||
}
|
||||
regionOffline(hri_a);
|
||||
regionOffline(hri_b);
|
||||
splitRegions.remove(regionInfo);
|
||||
}
|
||||
if (disabled) {
|
||||
// if server is offline, no hurt to unassign again
|
||||
LOG.info("Opened " + regionNameStr
|
||||
+ "but this table is disabled, triggering close of region");
|
||||
unassign(regionInfo);
|
||||
}
|
||||
}
|
||||
} else if (rs.isMergingNew()) {
|
||||
synchronized (regionStates) {
|
||||
@ -3798,6 +3818,7 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||
}
|
||||
|
||||
synchronized (regionStates) {
|
||||
splitRegions.put(p, new PairOfSameType<HRegionInfo>(hri_a, hri_b));
|
||||
regionStates.updateRegionState(hri_a, State.SPLITTING_NEW, sn);
|
||||
regionStates.updateRegionState(hri_b, State.SPLITTING_NEW, sn);
|
||||
regionStates.updateRegionState(rt, State.SPLITTING);
|
||||
@ -3813,6 +3834,7 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||
regionOffline(p, State.SPLIT);
|
||||
regionOnline(hri_a, sn);
|
||||
regionOnline(hri_b, sn);
|
||||
splitRegions.remove(p);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -58,6 +58,8 @@ import org.apache.hadoop.hbase.Waiter;
|
||||
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
|
||||
import org.apache.hadoop.hbase.MetaTableAccessor;
|
||||
import org.apache.hadoop.hbase.client.Admin;
|
||||
import org.apache.hadoop.hbase.client.Connection;
|
||||
import org.apache.hadoop.hbase.client.ConnectionFactory;
|
||||
import org.apache.hadoop.hbase.client.Delete;
|
||||
import org.apache.hadoop.hbase.client.HBaseAdmin;
|
||||
import org.apache.hadoop.hbase.client.HTable;
|
||||
@ -1161,6 +1163,42 @@ public class TestSplitTransactionOnCluster {
|
||||
TESTING_UTIL.deleteTable(tableName);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFailedSplit() throws Exception {
|
||||
TableName tableName = TableName.valueOf("testFailedSplit");
|
||||
byte[] colFamily = Bytes.toBytes("info");
|
||||
TESTING_UTIL.createTable(tableName, colFamily);
|
||||
Connection connection = ConnectionFactory.createConnection(TESTING_UTIL.getConfiguration());
|
||||
HTable table = (HTable) connection.getTable(tableName);
|
||||
try {
|
||||
TESTING_UTIL.loadTable(table, colFamily);
|
||||
List<HRegionInfo> regions = TESTING_UTIL.getHBaseAdmin().getTableRegions(tableName);
|
||||
assertTrue(regions.size() == 1);
|
||||
final HRegion actualRegion = cluster.getRegions(tableName).get(0);
|
||||
actualRegion.getCoprocessorHost().load(FailingSplitRegionObserver.class,
|
||||
Coprocessor.PRIORITY_USER, actualRegion.getBaseConf());
|
||||
|
||||
// The following split would fail.
|
||||
admin.split(tableName);
|
||||
FailingSplitRegionObserver.latch.await();
|
||||
LOG.info("Waiting for region to come out of RIT");
|
||||
TESTING_UTIL.waitFor(60000, 1000, new Waiter.Predicate<Exception>() {
|
||||
@Override
|
||||
public boolean evaluate() throws Exception {
|
||||
RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates();
|
||||
Map<String, RegionState> rit = regionStates.getRegionsInTransition();
|
||||
return !rit.containsKey(actualRegion.getRegionInfo().getEncodedName());
|
||||
}
|
||||
});
|
||||
regions = TESTING_UTIL.getHBaseAdmin().getTableRegions(tableName);
|
||||
assertTrue(regions.size() == 1);
|
||||
} finally {
|
||||
table.close();
|
||||
connection.close();
|
||||
TESTING_UTIL.deleteTable(tableName);
|
||||
}
|
||||
}
|
||||
|
||||
public static class MockedCoordinatedStateManager extends ZkCoordinatedStateManager {
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user