HBASE-9724 Failed region split is not handled correctly by AM
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1530826 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
066638e6cc
commit
341256ae5b
|
@ -146,7 +146,7 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
* See below in {@link #assign()} and {@link #unassign()}.
|
* See below in {@link #assign()} and {@link #unassign()}.
|
||||||
*/
|
*/
|
||||||
private final int maximumAttempts;
|
private final int maximumAttempts;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The sleep time for which the assignment will wait before retrying in case of hbase:meta assignment
|
* The sleep time for which the assignment will wait before retrying in case of hbase:meta assignment
|
||||||
* failure due to lack of availability of region plan
|
* failure due to lack of availability of region plan
|
||||||
|
@ -1324,7 +1324,15 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
+ "but this table is disabled, triggering close of region");
|
+ "but this table is disabled, triggering close of region");
|
||||||
unassign(regionInfo);
|
unassign(regionInfo);
|
||||||
}
|
}
|
||||||
|
} else if (rs.isSplitting()) {
|
||||||
|
LOG.debug("Ephemeral node deleted. Found in SPLITTING state. " + "Removing from RIT "
|
||||||
|
+ rs.getRegion());
|
||||||
|
// it can be either SPLIT fail, or RS dead.
|
||||||
|
regionStates.regionOnline(rs.getRegion(), rs.getServerName());
|
||||||
}
|
}
|
||||||
|
// RS does not delete the znode in case SPLIT, it only means RS died which
|
||||||
|
// will be handled by SSH
|
||||||
|
// in region merge we do not put merging regions to MERGING state
|
||||||
} finally {
|
} finally {
|
||||||
lock.unlock();
|
lock.unlock();
|
||||||
}
|
}
|
||||||
|
|
|
@ -37,7 +37,7 @@ import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.hbase.Abortable;
|
import org.apache.hadoop.hbase.Abortable;
|
||||||
import org.apache.hadoop.hbase.TableName;
|
import org.apache.hadoop.hbase.Coprocessor;
|
||||||
import org.apache.hadoop.hbase.HBaseIOException;
|
import org.apache.hadoop.hbase.HBaseIOException;
|
||||||
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
||||||
import org.apache.hadoop.hbase.HColumnDescriptor;
|
import org.apache.hadoop.hbase.HColumnDescriptor;
|
||||||
|
@ -50,7 +50,9 @@ import org.apache.hadoop.hbase.MiniHBaseCluster;
|
||||||
import org.apache.hadoop.hbase.RegionTransition;
|
import org.apache.hadoop.hbase.RegionTransition;
|
||||||
import org.apache.hadoop.hbase.Server;
|
import org.apache.hadoop.hbase.Server;
|
||||||
import org.apache.hadoop.hbase.ServerName;
|
import org.apache.hadoop.hbase.ServerName;
|
||||||
|
import org.apache.hadoop.hbase.TableName;
|
||||||
import org.apache.hadoop.hbase.UnknownRegionException;
|
import org.apache.hadoop.hbase.UnknownRegionException;
|
||||||
|
import org.apache.hadoop.hbase.Waiter;
|
||||||
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
|
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
|
||||||
import org.apache.hadoop.hbase.catalog.MetaEditor;
|
import org.apache.hadoop.hbase.catalog.MetaEditor;
|
||||||
import org.apache.hadoop.hbase.catalog.MetaReader;
|
import org.apache.hadoop.hbase.catalog.MetaReader;
|
||||||
|
@ -192,6 +194,7 @@ public class TestSplitTransactionOnCluster {
|
||||||
assertTrue("not able to find a splittable region", region != null);
|
assertTrue("not able to find a splittable region", region != null);
|
||||||
|
|
||||||
new Thread() {
|
new Thread() {
|
||||||
|
@Override
|
||||||
public void run() {
|
public void run() {
|
||||||
SplitTransaction st = null;
|
SplitTransaction st = null;
|
||||||
st = new MockedSplitTransaction(region, Bytes.toBytes("row2"));
|
st = new MockedSplitTransaction(region, Bytes.toBytes("row2"));
|
||||||
|
@ -250,6 +253,65 @@ public class TestSplitTransactionOnCluster {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test(timeout = 60000)
|
||||||
|
public void testRITStateForRollback() throws Exception {
|
||||||
|
final TableName tableName =
|
||||||
|
TableName.valueOf("testRITStateForRollback");
|
||||||
|
try {
|
||||||
|
// Create table then get the single region for our new table.
|
||||||
|
HTable t = createTableAndWait(tableName.getName(), Bytes.toBytes("cf"));
|
||||||
|
final List<HRegion> regions = cluster.getRegions(tableName);
|
||||||
|
final HRegionInfo hri = getAndCheckSingleTableRegion(regions);
|
||||||
|
int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionName());
|
||||||
|
final HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
|
||||||
|
insertData(tableName.getName(), admin, t);
|
||||||
|
t.close();
|
||||||
|
|
||||||
|
// Turn off balancer so it doesn't cut in and mess up our placements.
|
||||||
|
this.admin.setBalancerRunning(false, true);
|
||||||
|
// Turn off the meta scanner so it don't remove parent on us.
|
||||||
|
cluster.getMaster().setCatalogJanitorEnabled(false);
|
||||||
|
|
||||||
|
// find a splittable region
|
||||||
|
final HRegion region = findSplittableRegion(regions);
|
||||||
|
assertTrue("not able to find a splittable region", region != null);
|
||||||
|
|
||||||
|
// install region co-processor to fail splits
|
||||||
|
region.getCoprocessorHost().load(FailingSplitRegionObserver.class,
|
||||||
|
Coprocessor.PRIORITY_USER, region.getBaseConf());
|
||||||
|
|
||||||
|
// split async
|
||||||
|
this.admin.split(region.getRegionName(), new byte[] {42});
|
||||||
|
|
||||||
|
// we have to wait until the SPLITTING state is seen by the master
|
||||||
|
FailingSplitRegionObserver.latch.await();
|
||||||
|
|
||||||
|
LOG.info("Waiting for region to come out of RIT");
|
||||||
|
TESTING_UTIL.waitFor(60000, 1000, new Waiter.Predicate<Exception>() {
|
||||||
|
@Override
|
||||||
|
public boolean evaluate() throws Exception {
|
||||||
|
RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates();
|
||||||
|
Map<String, RegionState> rit = regionStates.getRegionsInTransition();
|
||||||
|
return !rit.containsKey(hri.getEncodedName());
|
||||||
|
}
|
||||||
|
});
|
||||||
|
} finally {
|
||||||
|
admin.setBalancerRunning(true, false);
|
||||||
|
cluster.getMaster().setCatalogJanitorEnabled(true);
|
||||||
|
TESTING_UTIL.deleteTable(tableName);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class FailingSplitRegionObserver extends BaseRegionObserver {
|
||||||
|
static volatile CountDownLatch latch = new CountDownLatch(1);
|
||||||
|
@Override
|
||||||
|
public void preSplitBeforePONR(ObserverContext<RegionCoprocessorEnvironment> ctx,
|
||||||
|
byte[] splitKey, List<Mutation> metaEntries) throws IOException {
|
||||||
|
latch.countDown();
|
||||||
|
throw new IOException("Causing rollback of region split");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A test that intentionally has master fail the processing of the split message.
|
* A test that intentionally has master fail the processing of the split message.
|
||||||
* Tests that the regionserver split ephemeral node gets cleaned up if it
|
* Tests that the regionserver split ephemeral node gets cleaned up if it
|
||||||
|
@ -1192,6 +1254,7 @@ public class TestSplitTransactionOnCluster {
|
||||||
super(conf);
|
super(conf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
protected void startCatalogJanitorChore() {
|
protected void startCatalogJanitorChore() {
|
||||||
LOG.debug("Customised master executed.");
|
LOG.debug("Customised master executed.");
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue