HBASE-12791 HBase does not attempt to clean up an aborted split when the regionserver shutting down(Rajeshbabu)
This commit is contained in:
parent
9b8f59cdf9
commit
f4e0cbc26c
|
@ -46,6 +46,7 @@ import org.apache.hadoop.hbase.MetaTableAccessor;
|
|||
import org.apache.hadoop.hbase.master.RegionState.State;
|
||||
import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.hbase.util.FSUtils;
|
||||
import org.apache.hadoop.hbase.util.Pair;
|
||||
import org.apache.hadoop.hbase.zookeeper.ZKAssign;
|
||||
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
|
||||
|
@ -610,10 +611,6 @@ public class RegionStates {
|
|||
}
|
||||
}
|
||||
|
||||
for (HRegionInfo hri : regionsToOffline) {
|
||||
regionOffline(hri);
|
||||
}
|
||||
|
||||
for (RegionState state : regionsInTransition.values()) {
|
||||
HRegionInfo hri = state.getRegion();
|
||||
if (assignedRegions.contains(hri)) {
|
||||
|
@ -632,12 +629,27 @@ public class RegionStates {
|
|||
if (state.isPendingOpenOrOpening() || state.isFailedClose() || state.isOffline()) {
|
||||
LOG.info("Found region in " + state + " to be reassigned by SSH for " + sn);
|
||||
rits.add(hri);
|
||||
} else if(state.isSplittingNew()) {
|
||||
try {
|
||||
if (MetaTableAccessor.getRegion(server.getConnection(), state.getRegion()
|
||||
.getEncodedNameAsBytes()) == null) {
|
||||
regionsToOffline.add(state.getRegion());
|
||||
FSUtils.deleteRegionDir(server.getConfiguration(), state.getRegion());
|
||||
}
|
||||
} catch (IOException e) {
|
||||
LOG.warn("Got exception while deleting " + state.getRegion()
|
||||
+ " directories from file system.", e);
|
||||
}
|
||||
} else {
|
||||
LOG.warn("THIS SHOULD NOT HAPPEN: unexpected " + state);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (HRegionInfo hri : regionsToOffline) {
|
||||
regionOffline(hri);
|
||||
}
|
||||
|
||||
this.notifyAll();
|
||||
return rits;
|
||||
}
|
||||
|
|
|
@ -180,6 +180,21 @@ public abstract class FSUtils {
|
|||
return fs.exists(dir) && fs.delete(dir, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete the region directory if exists.
|
||||
* @param conf
|
||||
* @param hri
|
||||
* @return True if deleted the region directory.
|
||||
* @throws IOException
|
||||
*/
|
||||
public static boolean deleteRegionDir(final Configuration conf, final HRegionInfo hri)
|
||||
throws IOException {
|
||||
Path rootDir = getRootDir(conf);
|
||||
FileSystem fs = rootDir.getFileSystem(conf);
|
||||
return deleteDirectory(fs,
|
||||
new Path(getTableDir(rootDir, hri.getTable()), hri.getEncodedName()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the number of bytes that large input files should be optimally
|
||||
* be split into to minimize i/o time.
|
||||
|
|
|
@ -1935,6 +1935,44 @@ public class HBaseFsck extends Configured implements Closeable {
|
|||
return;
|
||||
}
|
||||
|
||||
HRegionInfo hri = hbi.getHdfsHRI();
|
||||
TableInfo tableInfo = tablesInfo.get(hri.getTable());
|
||||
if (tableInfo.regionsFromMeta.isEmpty()) {
|
||||
for (HbckInfo h : regionInfoMap.values()) {
|
||||
if (h.getTableName().equals(hri.getTable())) {
|
||||
if (h.metaEntry != null) tableInfo.regionsFromMeta
|
||||
.add((HRegionInfo) h.metaEntry);
|
||||
}
|
||||
}
|
||||
Collections.sort(tableInfo.regionsFromMeta);
|
||||
}
|
||||
for (HRegionInfo region : tableInfo.regionsFromMeta) {
|
||||
if (Bytes.compareTo(region.getStartKey(), hri.getStartKey()) <= 0
|
||||
&& (region.getEndKey().length == 0 || Bytes.compareTo(region.getEndKey(),
|
||||
hri.getEndKey()) >= 0)
|
||||
&& Bytes.compareTo(region.getStartKey(), hri.getEndKey()) <= 0) {
|
||||
if(region.isSplit() || region.isOffline()) continue;
|
||||
Path regionDir = hbi.getHdfsRegionDir();
|
||||
FileSystem fs = regionDir.getFileSystem(getConf());
|
||||
List<Path> familyDirs = FSUtils.getFamilyDirs(fs, regionDir);
|
||||
for (Path familyDir : familyDirs) {
|
||||
List<Path> referenceFilePaths = FSUtils.getReferenceFilePaths(fs, familyDir);
|
||||
for (Path referenceFilePath : referenceFilePaths) {
|
||||
Path parentRegionDir =
|
||||
StoreFileInfo.getReferredToFile(referenceFilePath).getParent().getParent();
|
||||
if (parentRegionDir.toString().endsWith(region.getEncodedName())) {
|
||||
LOG.warn(hri + " start and stop keys are in the range of " + region
|
||||
+ ". The region might not be cleaned up from hdfs when region " + region
|
||||
+ " split failed. Hence deleting from hdfs.");
|
||||
HRegionFileSystem.deleteRegionFromFileSystem(getConf(), fs,
|
||||
regionDir.getParent(), hri);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
LOG.info("Patching hbase:meta with .regioninfo: " + hbi.getHdfsHRI());
|
||||
HBaseFsckRepair.fixMetaHoleOnline(getConf(), hbi.getHdfsHRI());
|
||||
|
||||
|
@ -2252,6 +2290,9 @@ public class HBaseFsck extends Configured implements Closeable {
|
|||
final Multimap<byte[], HbckInfo> overlapGroups =
|
||||
TreeMultimap.create(RegionSplitCalculator.BYTES_COMPARATOR, cmp);
|
||||
|
||||
// list of regions derived from meta entries.
|
||||
final List<HRegionInfo> regionsFromMeta = new ArrayList<HRegionInfo>();
|
||||
|
||||
TableInfo(TableName name) {
|
||||
this.tableName = name;
|
||||
deployedOn = new TreeSet <ServerName>();
|
||||
|
|
|
@ -1247,6 +1247,46 @@ public class TestSplitTransactionOnCluster {
|
|||
}
|
||||
}
|
||||
|
||||
@Test (timeout=300000)
|
||||
public void testSSHCleanupDaugtherRegionsOfAbortedSplit() throws Exception {
|
||||
TableName table = TableName.valueOf("testSSHCleanupDaugtherRegionsOfAbortedSplit");
|
||||
try {
|
||||
HTableDescriptor desc = new HTableDescriptor(table);
|
||||
desc.addFamily(new HColumnDescriptor(Bytes.toBytes("f")));
|
||||
admin.createTable(desc);
|
||||
HTable hTable = new HTable(cluster.getConfiguration(), desc.getTableName());
|
||||
for(int i = 1; i < 5; i++) {
|
||||
Put p1 = new Put(("r"+i).getBytes());
|
||||
p1.add(Bytes.toBytes("f"), "q1".getBytes(), "v".getBytes());
|
||||
hTable.put(p1);
|
||||
}
|
||||
admin.flush(desc.getTableName());
|
||||
List<HRegion> regions = cluster.getRegions(desc.getTableName());
|
||||
int serverWith = cluster.getServerWith(regions.get(0).getRegionName());
|
||||
HRegionServer regionServer = cluster.getRegionServer(serverWith);
|
||||
cluster.getServerWith(regions.get(0).getRegionName());
|
||||
SplitTransaction st = new SplitTransaction(regions.get(0), Bytes.toBytes("r3"));
|
||||
st.prepare();
|
||||
st.stepsBeforePONR(regionServer, regionServer, false);
|
||||
Path tableDir =
|
||||
FSUtils.getTableDir(cluster.getMaster().getMasterFileSystem().getRootDir(),
|
||||
desc.getTableName());
|
||||
tableDir.getFileSystem(cluster.getConfiguration());
|
||||
List<Path> regionDirs =
|
||||
FSUtils.getRegionDirs(tableDir.getFileSystem(cluster.getConfiguration()), tableDir);
|
||||
assertEquals(3,regionDirs.size());
|
||||
AssignmentManager am = cluster.getMaster().getAssignmentManager();
|
||||
am.processServerShutdown(regionServer.getServerName());
|
||||
assertEquals(am.getRegionStates().getRegionsInTransition().toString(), 0, am
|
||||
.getRegionStates().getRegionsInTransition().size());
|
||||
regionDirs =
|
||||
FSUtils.getRegionDirs(tableDir.getFileSystem(cluster.getConfiguration()), tableDir);
|
||||
assertEquals(1,regionDirs.size());
|
||||
} finally {
|
||||
TESTING_UTIL.deleteTable(table);
|
||||
}
|
||||
}
|
||||
|
||||
public static class MockedCoordinatedStateManager extends ZkCoordinatedStateManager {
|
||||
|
||||
public void initialize(Server server, HRegion region) {
|
||||
|
|
|
@ -35,9 +35,6 @@ import java.util.HashMap;
|
|||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.NavigableMap;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.Callable;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
|
@ -87,6 +84,7 @@ import org.apache.hadoop.hbase.client.Table;
|
|||
import org.apache.hadoop.hbase.io.hfile.TestHFile;
|
||||
import org.apache.hadoop.hbase.master.AssignmentManager;
|
||||
import org.apache.hadoop.hbase.master.HMaster;
|
||||
import org.apache.hadoop.hbase.master.RegionState;
|
||||
import org.apache.hadoop.hbase.master.RegionStates;
|
||||
import org.apache.hadoop.hbase.master.TableLockManager;
|
||||
import org.apache.hadoop.hbase.master.TableLockManager.TableLock;
|
||||
|
@ -95,6 +93,7 @@ import org.apache.hadoop.hbase.protobuf.generated.AdminProtos;
|
|||
import org.apache.hadoop.hbase.regionserver.HRegion;
|
||||
import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
|
||||
import org.apache.hadoop.hbase.regionserver.HRegionServer;
|
||||
import org.apache.hadoop.hbase.regionserver.SplitTransaction;
|
||||
import org.apache.hadoop.hbase.regionserver.TestEndToEndSplitTransaction;
|
||||
import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter;
|
||||
import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
|
||||
|
@ -104,6 +103,7 @@ import org.apache.hadoop.hbase.util.HBaseFsck.TableInfo;
|
|||
import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
|
||||
import org.apache.hadoop.hbase.util.hbck.HbckTestingUtil;
|
||||
import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
|
||||
import org.apache.hadoop.hbase.zookeeper.ZKAssign;
|
||||
import org.apache.zookeeper.KeeperException;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.Assert;
|
||||
|
@ -2396,4 +2396,61 @@ public class TestHBaseFsck {
|
|||
Assert.assertEquals("shouldIgnorePreCheckPermission", true,
|
||||
hbck.shouldIgnorePreCheckPermission());
|
||||
}
|
||||
|
||||
@Test (timeout=180000)
|
||||
public void testCleanUpDaughtersNotInMetaAfterFailedSplit() throws Exception {
|
||||
TableName table = TableName.valueOf("testCleanUpDaughtersNotInMetaAfterFailedSplit");
|
||||
MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
|
||||
try {
|
||||
HTableDescriptor desc = new HTableDescriptor(table);
|
||||
desc.addFamily(new HColumnDescriptor(Bytes.toBytes("f")));
|
||||
admin.createTable(desc);
|
||||
tbl = new HTable(cluster.getConfiguration(), desc.getTableName());
|
||||
for (int i = 0; i < 5; i++) {
|
||||
Put p1 = new Put(("r" + i).getBytes());
|
||||
p1.add(Bytes.toBytes("f"), "q1".getBytes(), "v".getBytes());
|
||||
tbl.put(p1);
|
||||
}
|
||||
admin.flush(desc.getTableName());
|
||||
List<HRegion> regions = cluster.getRegions(desc.getTableName());
|
||||
int serverWith = cluster.getServerWith(regions.get(0).getRegionName());
|
||||
HRegionServer regionServer = cluster.getRegionServer(serverWith);
|
||||
cluster.getServerWith(regions.get(0).getRegionName());
|
||||
SplitTransaction st = new SplitTransaction(regions.get(0), Bytes.toBytes("r3"));
|
||||
st.prepare();
|
||||
st.stepsBeforePONR(regionServer, regionServer, false);
|
||||
AssignmentManager am = cluster.getMaster().getAssignmentManager();
|
||||
Map<String, RegionState> regionsInTransition = am.getRegionStates().getRegionsInTransition();
|
||||
for (RegionState state : regionsInTransition.values()) {
|
||||
am.regionOffline(state.getRegion());
|
||||
}
|
||||
ZKAssign.deleteNodeFailSilent(regionServer.getZooKeeper(), regions.get(0).getRegionInfo());
|
||||
Map<HRegionInfo, ServerName> regionsMap = new HashMap<HRegionInfo, ServerName>();
|
||||
regionsMap.put(regions.get(0).getRegionInfo(), regionServer.getServerName());
|
||||
am.assign(regionsMap);
|
||||
am.waitForAssignment(regions.get(0).getRegionInfo());
|
||||
HBaseFsck hbck = doFsck(conf, false);
|
||||
assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
|
||||
ERROR_CODE.NOT_IN_META_OR_DEPLOYED });
|
||||
// holes are separate from overlap groups
|
||||
assertEquals(0, hbck.getOverlapGroups(table).size());
|
||||
|
||||
// fix hole
|
||||
assertErrors(
|
||||
doFsck(conf, false, true, false, false, false, false, false, false, false, false, null),
|
||||
new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
|
||||
ERROR_CODE.NOT_IN_META_OR_DEPLOYED });
|
||||
|
||||
// check that hole fixed
|
||||
assertNoErrors(doFsck(conf, false));
|
||||
assertEquals(5, countRows());
|
||||
} finally {
|
||||
if (tbl != null) {
|
||||
tbl.close();
|
||||
tbl = null;
|
||||
}
|
||||
cleanupTable(table);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue