HBASE-20940 HStore.cansplit should not allow split to happen if it has references (Vishal Khandelwal)

This commit is contained in:
Andrew Purtell 2018-08-17 15:01:44 -07:00
parent 8716ac2568
commit 56c59f11e3
6 changed files with 168 additions and 15 deletions

View File

@ -1636,7 +1636,17 @@ public class HStore implements Store {
@Override
public boolean hasReferences() {
return StoreUtils.hasReferences(this.storeEngine.getStoreFileManager().getStorefiles());
List<StoreFile> reloadedStoreFiles = null;
try {
// Reloading the store files from file system due to HBASE-20940. As split can happen with an
// region which has references
reloadedStoreFiles = loadStoreFiles();
} catch (IOException ioe) {
LOG.error("Error trying to determine if store has referenes, " + "assuming references exists",
ioe);
return true;
}
return StoreUtils.hasReferences(reloadedStoreFiles);
}
@Override

View File

@ -105,6 +105,10 @@ public class TestChangingEncoding {
conf.setInt(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, 1024 * 1024);
// ((Log4JLogger)RpcServerImplementation.LOG).getLogger().setLevel(Level.TRACE);
// ((Log4JLogger)RpcClient.LOG).getLogger().setLevel(Level.TRACE);
// Disabling split to make sure split does not cause modify column to wait which timesout test
// sometime
conf.set(HConstants.HBASE_REGION_SPLIT_POLICY_KEY,
"org.apache.hadoop.hbase.regionserver.DisabledRegionSplitPolicy");
conf.setBoolean("hbase.online.schema.update.enable", true);
TEST_UTIL.startMiniCluster();
}

View File

@ -24,6 +24,7 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.commons.logging.Log;
@ -33,19 +34,21 @@ import org.apache.hadoop.hbase.Abortable;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.apache.hadoop.hbase.MiniHBaseCluster;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.ServerLoad;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetRegionInfoResponse.CompactionState;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.regionserver.Region;
import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.JVMClusterUtil;
import org.apache.hadoop.hbase.util.RetryCounter;
import org.apache.hadoop.hbase.zookeeper.DrainingServerTracker;
import org.apache.hadoop.hbase.zookeeper.RegionServerTracker;
import org.apache.hadoop.hbase.zookeeper.ZKUtil;
@ -256,6 +259,20 @@ public class TestAssignmentListener {
while (mergeable < 2) {
Thread.sleep(100);
admin.majorCompact(TABLE_NAME);
// Waiting for compaction to finish
RetryCounter retrier = new RetryCounter(30, 1, TimeUnit.SECONDS);
while (CompactionState.NONE != admin.getCompactionState(TABLE_NAME)
&& retrier.shouldRetry()) {
retrier.sleepUntilNextRetry();
}
// Making sure references are getting cleaned after compaction.
// This is taken care by discharger
for (HRegion region : TEST_UTIL.getHBaseCluster().getRegions(TABLE_NAME)) {
region.getStores().get(0).closeAndArchiveCompactedFiles();
}
mergeable = 0;
for (JVMClusterUtil.RegionServerThread regionThread: miniCluster.getRegionServerThreads()) {
for (Region region: regionThread.getRegionServer().getOnlineRegions(TABLE_NAME)) {

View File

@ -22,17 +22,15 @@ import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import com.google.common.collect.Iterators;
import com.google.common.collect.Sets;
import com.google.protobuf.ServiceException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.NavigableMap;
import java.util.Random;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.TimeUnit;
import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log;
@ -40,9 +38,11 @@ import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.ChoreService;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HRegionLocation;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.MetaTableAccessor;
import org.apache.hadoop.hbase.NotServingRegionException;
import org.apache.hadoop.hbase.ScheduledChore;
@ -63,6 +63,7 @@ import org.apache.hadoop.hbase.coordination.BaseCoordinatedStateManager;
import org.apache.hadoop.hbase.ipc.HBaseRpcControllerImpl;
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.protobuf.RequestConverter;
import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetRegionInfoResponse.CompactionState;
import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ScanRequest;
import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos;
@ -70,13 +71,20 @@ import org.apache.hadoop.hbase.testclassification.LargeTests;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.hbase.util.PairOfSameType;
import org.apache.hadoop.hbase.util.RetryCounter;
import org.apache.hadoop.hbase.util.StoppableImplementation;
import org.apache.hadoop.hbase.util.Threads;
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import com.google.common.collect.Iterators;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.google.protobuf.ServiceException;
@Category(LargeTests.class)
public class TestEndToEndSplitTransaction {
private static final Log LOG = LogFactory.getLog(TestEndToEndSplitTransaction.class);
@ -94,6 +102,79 @@ public class TestEndToEndSplitTransaction {
TEST_UTIL.shutdownMiniCluster();
}
/*
* This is the test for : HBASE-20940 This test will split the region and try to open an reference
* over store file. Once store file has any reference, it makes sure that region can't be split
* @throws Exception
*/
@Test
public void testCanSplitJustAfterASplit() throws Exception {
LOG.info("Starting testCanSplitJustAfterASplit");
byte[] fam = Bytes.toBytes("cf_split");
TableName tableName = TableName.valueOf("CanSplitTable");
Table source = TEST_UTIL.getConnection().getTable(tableName);
Admin admin = TEST_UTIL.getHBaseAdmin();
Map<String, StoreFile.Reader> scanner = Maps.newHashMap();
try {
HTableDescriptor htd = new HTableDescriptor(tableName);
htd.addFamily(new HColumnDescriptor(fam));
admin.createTable(htd);
TEST_UTIL.loadTable(source, fam);
List<HRegion> regions = TEST_UTIL.getHBaseCluster().getRegions(tableName);
regions.get(0).forceSplit(null);
admin.split(tableName);
while (regions.size() <= 1) {
regions = TEST_UTIL.getHBaseCluster().getRegions(tableName);
// Trying to get reference of the file before region split completes
// this would try to get handle on storefile or it can not be cleaned
for (HRegion region : regions) {
for (Store store : region.getStores()) {
for (StoreFile file : store.getStorefiles()) {
StoreFile.Reader reader = file.getReader();
reader.getStoreFileScanner(false, false, false, 0, 0, false);
scanner.put(region.getRegionInfo().getEncodedName(), reader);
LOG.info("Got reference to file = " + file.getPath() + ",for region = "
+ region.getRegionInfo().getEncodedName());
}
}
}
}
Assert.assertTrue("Regions did not split properly", regions.size() > 1);
Assert.assertTrue("Could not get reference any of the store file", scanner.size() > 1);
RetryCounter retrier = new RetryCounter(30, 1, TimeUnit.SECONDS);
while (CompactionState.NONE != admin.getCompactionState(tableName) && retrier.shouldRetry()) {
retrier.sleepUntilNextRetry();
}
Assert.assertEquals("Compaction did not complete in 30 secs", CompactionState.NONE,
admin.getCompactionState(tableName));
for (HRegion region : regions) {
for (Store store : region.getStores()) {
Assert.assertTrue("Contains an open file reference which can be split",
!scanner.containsKey(region.getRegionInfo().getEncodedName()) || !store.canSplit());
}
}
} finally {
for (StoreFile.Reader s : scanner.values()) {
try {
s.close(true);
} catch (IOException e) {
e.printStackTrace();
}
}
scanner.clear();
if (source != null) source.close();
TEST_UTIL.deleteTableIfAny(tableName);
}
}
@Test
public void testMasterOpsWhileSplitting() throws Exception {
TableName tableName = TableName.valueOf("TestSplit");
@ -130,10 +211,10 @@ public class TestEndToEndSplitTransaction {
if (split.useZKForAssignment) {
server.postOpenDeployTasks(regions.getSecond());
} else {
server.reportRegionStateTransition(
RegionServerStatusProtos.RegionStateTransition.TransitionCode.SPLIT,
region.getRegionInfo(), regions.getFirst().getRegionInfo(),
regions.getSecond().getRegionInfo());
server.reportRegionStateTransition(
RegionServerStatusProtos.RegionStateTransition.TransitionCode.SPLIT,
region.getRegionInfo(), regions.getFirst().getRegionInfo(),
regions.getSecond().getRegionInfo());
}
// first daughter second
@ -157,8 +238,8 @@ public class TestEndToEndSplitTransaction {
if (split.useZKForAssignment) {
// 4. phase III
((BaseCoordinatedStateManager) server.getCoordinatedStateManager())
.getSplitTransactionCoordination().completeSplitTransaction(server, regions.getFirst(),
regions.getSecond(), split.std, region);
.getSplitTransactionCoordination().completeSplitTransaction(server, regions.getFirst(),
regions.getSecond(), split.std, region);
}
assertTrue(test(conn, tableName, firstRow, server));
@ -552,6 +633,28 @@ public class TestEndToEndSplitTransaction {
rem = timeout - (System.currentTimeMillis() - start);
blockUntilRegionIsOpened(conf, rem, daughterB);
// Compacting the new region to make sure references can be cleaned up
compactAndBlockUntilDone(TEST_UTIL.getHBaseAdmin(),
TEST_UTIL.getMiniHBaseCluster().getRegionServer(0), daughterA.getRegionName());
compactAndBlockUntilDone(TEST_UTIL.getHBaseAdmin(),
TEST_UTIL.getMiniHBaseCluster().getRegionServer(0), daughterB.getRegionName());
removeCompactedfiles(conn, timeout, daughterA);
removeCompactedfiles(conn, timeout, daughterB);
}
}
}
public static void removeCompactedfiles(Connection conn, long timeout, HRegionInfo hri)
throws IOException, InterruptedException {
log("removeCompactedfiles for : " + hri.getRegionNameAsString());
List<HRegion> regions = TEST_UTIL.getHBaseCluster().getRegions(hri.getTable());
for (HRegion region : regions) {
try {
region.getStores().get(0).closeAndArchiveCompactedFiles();
} catch (IOException e) {
e.printStackTrace();
}
}
}

View File

@ -2686,6 +2686,10 @@ public class TestHRegion {
LOG.info("" + HBaseTestCase.addContent(region, fam3));
region.flush(true);
region.compactStores();
region.waitForFlushesAndCompactions();
for(Store s:region.getStores()) {
s.closeAndArchiveCompactedFiles();
}
byte[] splitRow = region.checkSplit();
assertNotNull(splitRow);
LOG.info("SplitRow: " + Bytes.toString(splitRow));
@ -2695,6 +2699,10 @@ public class TestHRegion {
for (int i = 0; i < subregions.length; i++) {
HRegion.openHRegion(subregions[i], null);
subregions[i].compactStores();
subregions[i].waitForFlushesAndCompactions();
for(Store s:subregions[i].getStores()) {
s.closeAndArchiveCompactedFiles();
}
}
Path oldRegionPath = region.getRegionFileSystem().getRegionDir();
Path oldRegion1 = subregions[0].getRegionFileSystem().getRegionDir();

View File

@ -27,12 +27,12 @@ import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.IOException;
import java.io.InterruptedIOException;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@ -88,6 +88,7 @@ import org.apache.hadoop.hbase.master.RegionState;
import org.apache.hadoop.hbase.master.RegionState.State;
import org.apache.hadoop.hbase.master.RegionStates;
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetRegionInfoResponse.CompactionState;
import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext;
import org.apache.hadoop.hbase.regionserver.throttle.NoLimitThroughputController;
import org.apache.hadoop.hbase.testclassification.LargeTests;
@ -97,6 +98,7 @@ import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.util.HBaseFsck;
import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
import org.apache.hadoop.hbase.util.PairOfSameType;
import org.apache.hadoop.hbase.util.RetryCounter;
import org.apache.hadoop.hbase.util.Threads;
import org.apache.hadoop.hbase.zookeeper.ZKAssign;
import org.apache.hadoop.hbase.zookeeper.ZKUtil;
@ -605,11 +607,20 @@ public class TestSplitTransactionOnCluster {
// Compact first to ensure we have cleaned up references -- else the split
// will fail.
this.admin.compact(daughter.getRegionName());
RetryCounter retrier = new RetryCounter(30, 1, TimeUnit.SECONDS);
while (CompactionState.NONE != admin.getCompactionStateForRegion(daughter.getRegionName())
&& retrier.shouldRetry()) {
retrier.sleepUntilNextRetry();
}
daughters = cluster.getRegions(tableName);
HRegion daughterRegion = null;
for (HRegion r: daughters) {
for (HRegion r : daughters) {
if (r.getRegionInfo().equals(daughter)) {
daughterRegion = r;
// Archiving the compacted references file
r.getStores().get(0).closeAndArchiveCompactedFiles();
LOG.info("Found matching HRI: " + daughterRegion);
break;
}