HBASE-22456 Polish TestSplitTransitionOnCluster

This commit is contained in:
Duo Zhang 2019-05-23 09:35:57 +08:00
parent 8e47c8e739
commit f1a8aa4fd7
1 changed files with 36 additions and 77 deletions

View File

@ -31,7 +31,9 @@ import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Optional; import java.util.Optional;
import java.util.concurrent.CountDownLatch; import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
@ -53,7 +55,6 @@ import org.apache.hadoop.hbase.UnknownRegionException;
import org.apache.hadoop.hbase.ZooKeeperConnectionException; import org.apache.hadoop.hbase.ZooKeeperConnectionException;
import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
import org.apache.hadoop.hbase.client.CompactionState;
import org.apache.hadoop.hbase.client.Consistency; import org.apache.hadoop.hbase.client.Consistency;
import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.DoNotRetryRegionException; import org.apache.hadoop.hbase.client.DoNotRetryRegionException;
@ -91,7 +92,6 @@ import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.FSUtils; import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.util.HBaseFsck; import org.apache.hadoop.hbase.util.HBaseFsck;
import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread; import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
import org.apache.hadoop.hbase.util.RetryCounter;
import org.apache.hadoop.hbase.util.Threads; import org.apache.hadoop.hbase.util.Threads;
import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.KeeperException.NodeExistsException; import org.apache.zookeeper.KeeperException.NodeExistsException;
@ -298,7 +298,7 @@ public class TestSplitTransactionOnCluster {
} }
@Test @Test
public void testSplitRollbackOnRegionClosing() throws IOException, InterruptedException { public void testSplitRollbackOnRegionClosing() throws Exception {
final TableName tableName = TableName.valueOf(name.getMethodName()); final TableName tableName = TableName.valueOf(name.getMethodName());
// Create table then get the single region for our new table. // Create table then get the single region for our new table.
@ -340,7 +340,7 @@ public class TestSplitTransactionOnCluster {
} }
regionStates.updateRegionState(hri, State.OPEN); regionStates.updateRegionState(hri, State.OPEN);
// Now try splitting and it should work. // Now try splitting and it should work.
split(hri, server, regionCount); admin.splitRegionAsync(hri.getRegionName()).get(2, TimeUnit.MINUTES);
// Get daughters // Get daughters
checkAndGetDaughters(tableName); checkAndGetDaughters(tableName);
// OK, so split happened after we cleared the blocking node. // OK, so split happened after we cleared the blocking node.
@ -352,13 +352,11 @@ public class TestSplitTransactionOnCluster {
} }
/** /**
* Test that if daughter split on us, we won't do the shutdown handler fixup * Test that if daughter split on us, we won't do the shutdown handler fixup just because we can't
* just because we can't find the immediate daughter of an offlined parent. * find the immediate daughter of an offlined parent.
* @throws IOException
* @throws InterruptedException
*/ */
@Test @Test
public void testShutdownFixupWhenDaughterHasSplit()throws IOException, InterruptedException { public void testShutdownFixupWhenDaughterHasSplit() throws Exception {
final TableName tableName = TableName.valueOf(name.getMethodName()); final TableName tableName = TableName.valueOf(name.getMethodName());
// Create table then get the single region for our new table. // Create table then get the single region for our new table.
@ -378,42 +376,27 @@ public class TestSplitTransactionOnCluster {
// Get region pre-split. // Get region pre-split.
HRegionServer server = cluster.getRegionServer(tableRegionIndex); HRegionServer server = cluster.getRegionServer(tableRegionIndex);
printOutRegions(server, "Initial regions: "); printOutRegions(server, "Initial regions: ");
int regionCount = cluster.getRegions(hri.getTable()).size();
// Now split. // Now split.
split(hri, server, regionCount); admin.splitRegionAsync(hri.getRegionName()).get(2, TimeUnit.MINUTES);
// Get daughters // Get daughters
List<HRegion> daughters = checkAndGetDaughters(tableName); List<HRegion> daughters = checkAndGetDaughters(tableName);
HRegion daughterRegion = daughters.get(0);
// Now split one of the daughters. // Now split one of the daughters.
regionCount = cluster.getRegions(hri.getTable()).size(); RegionInfo daughter = daughterRegion.getRegionInfo();
RegionInfo daughter = daughters.get(0).getRegionInfo();
LOG.info("Daughter we are going to split: " + daughter); LOG.info("Daughter we are going to split: " + daughter);
// Compact first to ensure we have cleaned up references -- else the split // Compact first to ensure we have cleaned up references -- else the split
// will fail. // will fail.
this.admin.compactRegion(daughter.getRegionName()); daughterRegion.compact(true);
RetryCounter retrier = new RetryCounter(30, 1, TimeUnit.SECONDS); daughterRegion.getStores().get(0).closeAndArchiveCompactedFiles();
while (CompactionState.NONE != admin.getCompactionStateForRegion(daughter.getRegionName()) for (int i = 0; i < 100; i++) {
&& retrier.shouldRetry()) { if (!daughterRegion.hasReferences()) {
retrier.sleepUntilNextRetry();
}
daughters = cluster.getRegions(tableName);
HRegion daughterRegion = null;
for (HRegion r : daughters) {
if (RegionInfo.COMPARATOR.compare(r.getRegionInfo(), daughter) == 0) {
daughterRegion = r;
// Archiving the compacted references file
r.getStores().get(0).closeAndArchiveCompactedFiles();
LOG.info("Found matching HRI: " + daughterRegion);
break; break;
} }
}
assertTrue(daughterRegion != null);
for (int i = 0; i < 100; i++) {
if (!daughterRegion.hasReferences()) break;
Threads.sleep(100); Threads.sleep(100);
} }
assertFalse("Waiting for reference to be compacted", daughterRegion.hasReferences()); assertFalse("Waiting for reference to be compacted", daughterRegion.hasReferences());
LOG.info("Daughter hri before split (has been compacted): " + daughter); LOG.info("Daughter hri before split (has been compacted): " + daughter);
split(daughter, server, regionCount); admin.splitRegionAsync(daughter.getRegionName()).get(2, TimeUnit.MINUTES);
// Get list of daughters // Get list of daughters
daughters = cluster.getRegions(tableName); daughters = cluster.getRegions(tableName);
for (HRegion d: daughters) { for (HRegion d: daughters) {
@ -510,38 +493,32 @@ public class TestSplitTransactionOnCluster {
} }
/** /**
* Verifies HBASE-5806. Here the case is that splitting is completed but before the * Verifies HBASE-5806. Here the case is that splitting is completed but before the CJ could
* CJ could remove the parent region the master is killed and restarted. * remove the parent region the master is killed and restarted.
* @throws IOException
* @throws InterruptedException
* @throws NodeExistsException
* @throws KeeperException
*/ */
@Test @Test
public void testMasterRestartAtRegionSplitPendingCatalogJanitor() public void testMasterRestartAtRegionSplitPendingCatalogJanitor()
throws IOException, InterruptedException, NodeExistsException, throws IOException, InterruptedException, NodeExistsException, KeeperException,
KeeperException, ServiceException { ServiceException, ExecutionException, TimeoutException {
final TableName tableName = TableName.valueOf(name.getMethodName()); final TableName tableName = TableName.valueOf(name.getMethodName());
// Create table then get the single region for our new table. // Create table then get the single region for our new table.
Table t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY); try (Table t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY)) {
List<HRegion> regions = cluster.getRegions(tableName); List<HRegion> regions = cluster.getRegions(tableName);
RegionInfo hri = getAndCheckSingleTableRegion(regions); RegionInfo hri = getAndCheckSingleTableRegion(regions);
int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri); int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
// Turn off balancer so it doesn't cut in and mess up our placements. // Turn off balancer so it doesn't cut in and mess up our placements.
this.admin.balancerSwitch(false, true); this.admin.balancerSwitch(false, true);
// Turn off the meta scanner so it don't remove parent on us. // Turn off the meta scanner so it don't remove parent on us.
cluster.getMaster().setCatalogJanitorEnabled(false); cluster.getMaster().setCatalogJanitorEnabled(false);
try {
// Add a bit of load up into the table so splittable. // Add a bit of load up into the table so splittable.
TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false); TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false);
// Get region pre-split. // Get region pre-split.
HRegionServer server = cluster.getRegionServer(tableRegionIndex); HRegionServer server = cluster.getRegionServer(tableRegionIndex);
printOutRegions(server, "Initial regions: "); printOutRegions(server, "Initial regions: ");
// Call split. // Call split.
this.admin.splitRegionAsync(hri.getRegionName()); this.admin.splitRegionAsync(hri.getRegionName()).get(2, TimeUnit.MINUTES);
List<HRegion> daughters = checkAndGetDaughters(tableName); List<HRegion> daughters = checkAndGetDaughters(tableName);
// Before cleanup, get a new master. // Before cleanup, get a new master.
@ -549,12 +526,6 @@ public class TestSplitTransactionOnCluster {
// Now call compact on the daughters and clean up any references. // Now call compact on the daughters and clean up any references.
for (HRegion daughter : daughters) { for (HRegion daughter : daughters) {
daughter.compact(true); daughter.compact(true);
RetryCounter retrier = new RetryCounter(30, 1, TimeUnit.SECONDS);
while (CompactionState.NONE != admin
.getCompactionStateForRegion(daughter.getRegionInfo().getRegionName())
&& retrier.shouldRetry()) {
retrier.sleepUntilNextRetry();
}
daughter.getStores().get(0).closeAndArchiveCompactedFiles(); daughter.getStores().get(0).closeAndArchiveCompactedFiles();
assertFalse(daughter.hasReferences()); assertFalse(daughter.hasReferences());
} }
@ -563,7 +534,7 @@ public class TestSplitTransactionOnCluster {
// is reading from. Compacted-away files are picked up later by the compacted // is reading from. Compacted-away files are picked up later by the compacted
// file discharger process. It runs infrequently. Make it run so CatalogJanitor // file discharger process. It runs infrequently. Make it run so CatalogJanitor
// doens't find any references. // doens't find any references.
for (RegionServerThread rst: cluster.getRegionServerThreads()) { for (RegionServerThread rst : cluster.getRegionServerThreads()) {
boolean oldSetting = rst.getRegionServer().compactedFileDischarger.setUseExecutor(false); boolean oldSetting = rst.getRegionServer().compactedFileDischarger.setUseExecutor(false);
rst.getRegionServer().compactedFileDischarger.run(); rst.getRegionServer().compactedFileDischarger.run();
rst.getRegionServer().compactedFileDischarger.setUseExecutor(oldSetting); rst.getRegionServer().compactedFileDischarger.setUseExecutor(oldSetting);
@ -578,7 +549,6 @@ public class TestSplitTransactionOnCluster {
} finally { } finally {
TESTING_UTIL.getAdmin().balancerSwitch(true, false); TESTING_UTIL.getAdmin().balancerSwitch(true, false);
cluster.getMaster().setCatalogJanitorEnabled(true); cluster.getMaster().setCatalogJanitorEnabled(true);
t.close();
} }
} }
@ -810,24 +780,24 @@ public class TestSplitTransactionOnCluster {
} }
Thread.sleep(100); Thread.sleep(100);
} }
return(null); return null;
} }
private List<HRegion> checkAndGetDaughters(TableName tableName) private List<HRegion> checkAndGetDaughters(TableName tableName) throws InterruptedException {
throws InterruptedException {
List<HRegion> daughters = null; List<HRegion> daughters = null;
// try up to 10s // try up to 10s
for (int i=0; i<100; i++) { for (int i = 0; i < 100; i++) {
daughters = cluster.getRegions(tableName); daughters = cluster.getRegions(tableName);
if (daughters.size() >= 2) break; if (daughters.size() >= 2) {
break;
}
Thread.sleep(100); Thread.sleep(100);
} }
assertTrue(daughters.size() >= 2); assertTrue(daughters.size() >= 2);
return daughters; return daughters;
} }
private HMaster abortAndWaitForMaster() private HMaster abortAndWaitForMaster() throws IOException, InterruptedException {
throws IOException, InterruptedException {
cluster.abortMaster(0); cluster.abortMaster(0);
cluster.waitOnMaster(0); cluster.waitOnMaster(0);
HMaster master = cluster.startMaster().getMaster(); HMaster master = cluster.startMaster().getMaster();
@ -835,17 +805,6 @@ public class TestSplitTransactionOnCluster {
return master; return master;
} }
private void split(final RegionInfo hri, final HRegionServer server, final int regionCount)
throws IOException, InterruptedException {
admin.splitRegionAsync(hri.getRegionName());
for (int i = 0; cluster.getRegions(hri.getTable()).size() <= regionCount && i < 60; i++) {
LOG.debug("Waiting on region " + hri.getRegionNameAsString() + " to split");
Thread.sleep(2000);
}
assertFalse("Waited too long for split",
cluster.getRegions(hri.getTable()).size() <= regionCount);
}
/** /**
* Ensure single table region is not on same server as the single hbase:meta table * Ensure single table region is not on same server as the single hbase:meta table
* region. * region.