HBASE-12791 HBase does not attempt to clean up an aborted split when the regionserver shutting down(Rajeshbabu)

This commit is contained in:
Rajeshbabu Chintaguntla 2015-01-12 07:04:26 +05:30
parent 9b8f59cdf9
commit f4e0cbc26c
5 changed files with 172 additions and 7 deletions

View File

@ -46,6 +46,7 @@ import org.apache.hadoop.hbase.MetaTableAccessor;
import org.apache.hadoop.hbase.master.RegionState.State; import org.apache.hadoop.hbase.master.RegionState.State;
import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos; import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.hbase.zookeeper.ZKAssign; import org.apache.hadoop.hbase.zookeeper.ZKAssign;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
@ -610,10 +611,6 @@ public class RegionStates {
} }
} }
for (HRegionInfo hri : regionsToOffline) {
regionOffline(hri);
}
for (RegionState state : regionsInTransition.values()) { for (RegionState state : regionsInTransition.values()) {
HRegionInfo hri = state.getRegion(); HRegionInfo hri = state.getRegion();
if (assignedRegions.contains(hri)) { if (assignedRegions.contains(hri)) {
@ -632,12 +629,27 @@ public class RegionStates {
if (state.isPendingOpenOrOpening() || state.isFailedClose() || state.isOffline()) { if (state.isPendingOpenOrOpening() || state.isFailedClose() || state.isOffline()) {
LOG.info("Found region in " + state + " to be reassigned by SSH for " + sn); LOG.info("Found region in " + state + " to be reassigned by SSH for " + sn);
rits.add(hri); rits.add(hri);
} else if(state.isSplittingNew()) {
try {
if (MetaTableAccessor.getRegion(server.getConnection(), state.getRegion()
.getEncodedNameAsBytes()) == null) {
regionsToOffline.add(state.getRegion());
FSUtils.deleteRegionDir(server.getConfiguration(), state.getRegion());
}
} catch (IOException e) {
LOG.warn("Got exception while deleting " + state.getRegion()
+ " directories from file system.", e);
}
} else { } else {
LOG.warn("THIS SHOULD NOT HAPPEN: unexpected " + state); LOG.warn("THIS SHOULD NOT HAPPEN: unexpected " + state);
} }
} }
} }
for (HRegionInfo hri : regionsToOffline) {
regionOffline(hri);
}
this.notifyAll(); this.notifyAll();
return rits; return rits;
} }

View File

@ -180,6 +180,21 @@ public abstract class FSUtils {
return fs.exists(dir) && fs.delete(dir, true); return fs.exists(dir) && fs.delete(dir, true);
} }
/**
* Delete the region directory if exists.
* @param conf
* @param hri
* @return True if deleted the region directory.
* @throws IOException
*/
public static boolean deleteRegionDir(final Configuration conf, final HRegionInfo hri)
throws IOException {
Path rootDir = getRootDir(conf);
FileSystem fs = rootDir.getFileSystem(conf);
return deleteDirectory(fs,
new Path(getTableDir(rootDir, hri.getTable()), hri.getEncodedName()));
}
/** /**
* Return the number of bytes that large input files should be optimally * Return the number of bytes that large input files should be optimally
* be split into to minimize i/o time. * be split into to minimize i/o time.

View File

@ -1935,6 +1935,44 @@ public class HBaseFsck extends Configured implements Closeable {
return; return;
} }
HRegionInfo hri = hbi.getHdfsHRI();
TableInfo tableInfo = tablesInfo.get(hri.getTable());
if (tableInfo.regionsFromMeta.isEmpty()) {
for (HbckInfo h : regionInfoMap.values()) {
if (h.getTableName().equals(hri.getTable())) {
if (h.metaEntry != null) tableInfo.regionsFromMeta
.add((HRegionInfo) h.metaEntry);
}
}
Collections.sort(tableInfo.regionsFromMeta);
}
for (HRegionInfo region : tableInfo.regionsFromMeta) {
if (Bytes.compareTo(region.getStartKey(), hri.getStartKey()) <= 0
&& (region.getEndKey().length == 0 || Bytes.compareTo(region.getEndKey(),
hri.getEndKey()) >= 0)
&& Bytes.compareTo(region.getStartKey(), hri.getEndKey()) <= 0) {
if(region.isSplit() || region.isOffline()) continue;
Path regionDir = hbi.getHdfsRegionDir();
FileSystem fs = regionDir.getFileSystem(getConf());
List<Path> familyDirs = FSUtils.getFamilyDirs(fs, regionDir);
for (Path familyDir : familyDirs) {
List<Path> referenceFilePaths = FSUtils.getReferenceFilePaths(fs, familyDir);
for (Path referenceFilePath : referenceFilePaths) {
Path parentRegionDir =
StoreFileInfo.getReferredToFile(referenceFilePath).getParent().getParent();
if (parentRegionDir.toString().endsWith(region.getEncodedName())) {
LOG.warn(hri + " start and stop keys are in the range of " + region
+ ". The region might not be cleaned up from hdfs when region " + region
+ " split failed. Hence deleting from hdfs.");
HRegionFileSystem.deleteRegionFromFileSystem(getConf(), fs,
regionDir.getParent(), hri);
return;
}
}
}
}
}
LOG.info("Patching hbase:meta with .regioninfo: " + hbi.getHdfsHRI()); LOG.info("Patching hbase:meta with .regioninfo: " + hbi.getHdfsHRI());
HBaseFsckRepair.fixMetaHoleOnline(getConf(), hbi.getHdfsHRI()); HBaseFsckRepair.fixMetaHoleOnline(getConf(), hbi.getHdfsHRI());
@ -2252,6 +2290,9 @@ public class HBaseFsck extends Configured implements Closeable {
final Multimap<byte[], HbckInfo> overlapGroups = final Multimap<byte[], HbckInfo> overlapGroups =
TreeMultimap.create(RegionSplitCalculator.BYTES_COMPARATOR, cmp); TreeMultimap.create(RegionSplitCalculator.BYTES_COMPARATOR, cmp);
// list of regions derived from meta entries.
final List<HRegionInfo> regionsFromMeta = new ArrayList<HRegionInfo>();
TableInfo(TableName name) { TableInfo(TableName name) {
this.tableName = name; this.tableName = name;
deployedOn = new TreeSet <ServerName>(); deployedOn = new TreeSet <ServerName>();

View File

@ -1247,6 +1247,46 @@ public class TestSplitTransactionOnCluster {
} }
} }
@Test (timeout=300000)
public void testSSHCleanupDaugtherRegionsOfAbortedSplit() throws Exception {
TableName table = TableName.valueOf("testSSHCleanupDaugtherRegionsOfAbortedSplit");
try {
HTableDescriptor desc = new HTableDescriptor(table);
desc.addFamily(new HColumnDescriptor(Bytes.toBytes("f")));
admin.createTable(desc);
HTable hTable = new HTable(cluster.getConfiguration(), desc.getTableName());
for(int i = 1; i < 5; i++) {
Put p1 = new Put(("r"+i).getBytes());
p1.add(Bytes.toBytes("f"), "q1".getBytes(), "v".getBytes());
hTable.put(p1);
}
admin.flush(desc.getTableName());
List<HRegion> regions = cluster.getRegions(desc.getTableName());
int serverWith = cluster.getServerWith(regions.get(0).getRegionName());
HRegionServer regionServer = cluster.getRegionServer(serverWith);
cluster.getServerWith(regions.get(0).getRegionName());
SplitTransaction st = new SplitTransaction(regions.get(0), Bytes.toBytes("r3"));
st.prepare();
st.stepsBeforePONR(regionServer, regionServer, false);
Path tableDir =
FSUtils.getTableDir(cluster.getMaster().getMasterFileSystem().getRootDir(),
desc.getTableName());
tableDir.getFileSystem(cluster.getConfiguration());
List<Path> regionDirs =
FSUtils.getRegionDirs(tableDir.getFileSystem(cluster.getConfiguration()), tableDir);
assertEquals(3,regionDirs.size());
AssignmentManager am = cluster.getMaster().getAssignmentManager();
am.processServerShutdown(regionServer.getServerName());
assertEquals(am.getRegionStates().getRegionsInTransition().toString(), 0, am
.getRegionStates().getRegionsInTransition().size());
regionDirs =
FSUtils.getRegionDirs(tableDir.getFileSystem(cluster.getConfiguration()), tableDir);
assertEquals(1,regionDirs.size());
} finally {
TESTING_UTIL.deleteTable(table);
}
}
public static class MockedCoordinatedStateManager extends ZkCoordinatedStateManager { public static class MockedCoordinatedStateManager extends ZkCoordinatedStateManager {
public void initialize(Server server, HRegion region) { public void initialize(Server server, HRegion region) {

View File

@ -35,9 +35,6 @@ import java.util.HashMap;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Map.Entry;
import java.util.NavigableMap;
import java.util.Set;
import java.util.concurrent.Callable; import java.util.concurrent.Callable;
import java.util.concurrent.CountDownLatch; import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService; import java.util.concurrent.ExecutorService;
@ -87,6 +84,7 @@ import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.io.hfile.TestHFile; import org.apache.hadoop.hbase.io.hfile.TestHFile;
import org.apache.hadoop.hbase.master.AssignmentManager; import org.apache.hadoop.hbase.master.AssignmentManager;
import org.apache.hadoop.hbase.master.HMaster; import org.apache.hadoop.hbase.master.HMaster;
import org.apache.hadoop.hbase.master.RegionState;
import org.apache.hadoop.hbase.master.RegionStates; import org.apache.hadoop.hbase.master.RegionStates;
import org.apache.hadoop.hbase.master.TableLockManager; import org.apache.hadoop.hbase.master.TableLockManager;
import org.apache.hadoop.hbase.master.TableLockManager.TableLock; import org.apache.hadoop.hbase.master.TableLockManager.TableLock;
@ -95,6 +93,7 @@ import org.apache.hadoop.hbase.protobuf.generated.AdminProtos;
import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.regionserver.HRegionFileSystem; import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
import org.apache.hadoop.hbase.regionserver.HRegionServer; import org.apache.hadoop.hbase.regionserver.HRegionServer;
import org.apache.hadoop.hbase.regionserver.SplitTransaction;
import org.apache.hadoop.hbase.regionserver.TestEndToEndSplitTransaction; import org.apache.hadoop.hbase.regionserver.TestEndToEndSplitTransaction;
import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter; import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter;
import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE; import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
@ -104,6 +103,7 @@ import org.apache.hadoop.hbase.util.HBaseFsck.TableInfo;
import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker; import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
import org.apache.hadoop.hbase.util.hbck.HbckTestingUtil; import org.apache.hadoop.hbase.util.hbck.HbckTestingUtil;
import org.apache.hadoop.hbase.zookeeper.MetaTableLocator; import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
import org.apache.hadoop.hbase.zookeeper.ZKAssign;
import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.KeeperException;
import org.junit.AfterClass; import org.junit.AfterClass;
import org.junit.Assert; import org.junit.Assert;
@ -2396,4 +2396,61 @@ public class TestHBaseFsck {
Assert.assertEquals("shouldIgnorePreCheckPermission", true, Assert.assertEquals("shouldIgnorePreCheckPermission", true,
hbck.shouldIgnorePreCheckPermission()); hbck.shouldIgnorePreCheckPermission());
} }
@Test (timeout=180000)
public void testCleanUpDaughtersNotInMetaAfterFailedSplit() throws Exception {
TableName table = TableName.valueOf("testCleanUpDaughtersNotInMetaAfterFailedSplit");
MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
try {
HTableDescriptor desc = new HTableDescriptor(table);
desc.addFamily(new HColumnDescriptor(Bytes.toBytes("f")));
admin.createTable(desc);
tbl = new HTable(cluster.getConfiguration(), desc.getTableName());
for (int i = 0; i < 5; i++) {
Put p1 = new Put(("r" + i).getBytes());
p1.add(Bytes.toBytes("f"), "q1".getBytes(), "v".getBytes());
tbl.put(p1);
}
admin.flush(desc.getTableName());
List<HRegion> regions = cluster.getRegions(desc.getTableName());
int serverWith = cluster.getServerWith(regions.get(0).getRegionName());
HRegionServer regionServer = cluster.getRegionServer(serverWith);
cluster.getServerWith(regions.get(0).getRegionName());
SplitTransaction st = new SplitTransaction(regions.get(0), Bytes.toBytes("r3"));
st.prepare();
st.stepsBeforePONR(regionServer, regionServer, false);
AssignmentManager am = cluster.getMaster().getAssignmentManager();
Map<String, RegionState> regionsInTransition = am.getRegionStates().getRegionsInTransition();
for (RegionState state : regionsInTransition.values()) {
am.regionOffline(state.getRegion());
}
ZKAssign.deleteNodeFailSilent(regionServer.getZooKeeper(), regions.get(0).getRegionInfo());
Map<HRegionInfo, ServerName> regionsMap = new HashMap<HRegionInfo, ServerName>();
regionsMap.put(regions.get(0).getRegionInfo(), regionServer.getServerName());
am.assign(regionsMap);
am.waitForAssignment(regions.get(0).getRegionInfo());
HBaseFsck hbck = doFsck(conf, false);
assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
ERROR_CODE.NOT_IN_META_OR_DEPLOYED });
// holes are separate from overlap groups
assertEquals(0, hbck.getOverlapGroups(table).size());
// fix hole
assertErrors(
doFsck(conf, false, true, false, false, false, false, false, false, false, false, null),
new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
ERROR_CODE.NOT_IN_META_OR_DEPLOYED });
// check that hole fixed
assertNoErrors(doFsck(conf, false));
assertEquals(5, countRows());
} finally {
if (tbl != null) {
tbl.close();
tbl = null;
}
cleanupTable(table);
}
}
} }