HBASE-16621 HBCK should have -fixHFileLinks (Janos Gub)

This commit is contained in:
tedyu 2017-02-02 07:31:35 -08:00
parent fab0b2e603
commit f59cf6f02e
4 changed files with 203 additions and 9 deletions

View File

@ -82,6 +82,7 @@ import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.exceptions.DeserializationException;
import org.apache.hadoop.hbase.fs.HFileSystem;
import org.apache.hadoop.hbase.io.HFileLink;
import org.apache.hadoop.hbase.master.HMaster;
import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
import org.apache.hadoop.hbase.security.AccessDeniedException;
@ -1530,6 +1531,18 @@ public abstract class FSUtils {
}
}
/**
* Filter for HFileLinks (StoreFiles and HFiles not included).
* the filter itself does not consider if a link is file or not.
*/
public static class HFileLinkFilter implements PathFilter {
@Override
public boolean accept(Path p) {
return HFileLink.isHFileLink(p);
}
}
public static class ReferenceFileFilter extends AbstractFileStatusFilter {
private final FileSystem fs;

View File

@ -105,6 +105,8 @@ import org.apache.hadoop.hbase.client.RegionReplicaUtil;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.RowMutations;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.io.FileLink;
import org.apache.hadoop.hbase.io.HFileLink;
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
import org.apache.hadoop.hbase.io.hfile.HFile;
import org.apache.hadoop.hbase.master.MasterFileSystem;
@ -252,6 +254,7 @@ public class HBaseFsck extends Configured implements Closeable {
private boolean fixVersionFile = false; // fix missing hbase.version file in hdfs
private boolean fixSplitParents = false; // fix lingering split parents
private boolean fixReferenceFiles = false; // fix lingering reference store file
private boolean fixHFileLinks = false; // fix lingering HFileLinks
private boolean fixEmptyMetaCells = false; // fix (remove) empty REGIONINFO_QUALIFIER rows
private boolean fixTableLocks = false; // fix table locks which are expired
private boolean fixTableZNodes = false; // fix table Znodes which are orphaned
@ -762,6 +765,7 @@ public class HBaseFsck extends Configured implements Closeable {
// Do offline check and repair first
offlineHdfsIntegrityRepair();
offlineReferenceFileRepair();
offlineHLinkFileRepair();
// If Master runs maintenance tasks (such as balancer, catalog janitor, etc) during online
// hbck, it is likely that hbck would be misled and report transient errors. Therefore, it
// is better to set Master into maintenance mode during online hbck.
@ -1127,6 +1131,73 @@ public class HBaseFsck extends Configured implements Closeable {
}
}
/**
* Scan all the store file names to find any lingering HFileLink files,
* which refer to some none-exiting files. If "fix" option is enabled,
* any lingering HFileLink file will be sidelined if found.
*/
private void offlineHLinkFileRepair() throws IOException, InterruptedException {
Configuration conf = getConf();
Path hbaseRoot = FSUtils.getRootDir(conf);
FileSystem fs = hbaseRoot.getFileSystem(conf);
LOG.info("Computing mapping of all link files");
Map<String, Path> allFiles = FSUtils
.getTableStoreFilePathMap(fs, hbaseRoot, new FSUtils.HFileLinkFilter(), executor, errors);
errors.print("");
LOG.info("Validating mapping using HDFS state");
for (Path path : allFiles.values()) {
// building HFileLink object to gather locations
HFileLink actualLink = HFileLink.buildFromHFileLinkPattern(conf, path);
if (actualLink.exists(fs)) continue; // good, expected
// Found a lingering HFileLink
errors.reportError(ERROR_CODE.LINGERING_HFILELINK, "Found lingering HFileLink " + path);
if (!shouldFixHFileLinks()) continue;
// Now, trying to fix it since requested
setShouldRerun();
// An HFileLink path should be like
// ${hbase.rootdir}/data/namespace/table_name/region_id/family_name/linkedtable=linkedregionname-linkedhfilename
// sidelineing will happen in the ${hbase.rootdir}/${sidelinedir} directory with the same folder structure.
boolean success = sidelineFile(fs, hbaseRoot, path);
if (!success) {
LOG.error("Failed to sideline HFileLink file " + path);
}
// An HFileLink backreference path should be like
// ${hbase.rootdir}/archive/data/namespace/table_name/region_id/family_name/.links-linkedhfilename
// sidelineing will happen in the ${hbase.rootdir}/${sidelinedir} directory with the same folder structure.
Path backRefPath = FileLink.getBackReferencesDir(HFileArchiveUtil
.getStoreArchivePath(conf, HFileLink.getReferencedTableName(path.getName().toString()),
HFileLink.getReferencedRegionName(path.getName().toString()),
path.getParent().getName()),
HFileLink.getReferencedHFileName(path.getName().toString()));
success = sidelineFile(fs, hbaseRoot, backRefPath);
if (!success) {
LOG.error("Failed to sideline HFileLink backreference file " + path);
}
}
}
private boolean sidelineFile(FileSystem fs, Path hbaseRoot, Path path) throws IOException {
URI uri = hbaseRoot.toUri().relativize(path.toUri());
if (uri.isAbsolute()) return false;
String relativePath = uri.getPath();
Path rootDir = getSidelineDir();
Path dst = new Path(rootDir, relativePath);
boolean pathCreated = fs.mkdirs(dst.getParent());
if (!pathCreated) {
LOG.error("Failed to create path: " + dst.getParent());
return false;
}
LOG.info("Trying to sideline file " + path + " to " + dst);
return fs.rename(path, dst);
}
/**
* TODO -- need to add tests for this.
*/
@ -3892,8 +3963,8 @@ public class HBaseFsck extends Configured implements Closeable {
FIRST_REGION_STARTKEY_NOT_EMPTY, LAST_REGION_ENDKEY_NOT_EMPTY, DUPE_STARTKEYS,
HOLE_IN_REGION_CHAIN, OVERLAP_IN_REGION_CHAIN, REGION_CYCLE, DEGENERATE_REGION,
ORPHAN_HDFS_REGION, LINGERING_SPLIT_PARENT, NO_TABLEINFO_FILE, LINGERING_REFERENCE_HFILE,
WRONG_USAGE, EMPTY_META_CELL, EXPIRED_TABLE_LOCK, ORPHANED_ZK_TABLE_ENTRY, BOUNDARIES_ERROR,
UNDELETED_REPLICATION_QUEUE
LINGERING_HFILELINK, WRONG_USAGE, EMPTY_META_CELL, EXPIRED_TABLE_LOCK,
ORPHANED_ZK_TABLE_ENTRY, BOUNDARIES_ERROR, UNDELETED_REPLICATION_QUEUE
}
void clear();
void report(String message);
@ -4471,6 +4542,15 @@ public class HBaseFsck extends Configured implements Closeable {
return fixReferenceFiles;
}
public void setFixHFileLinks(boolean shouldFix) {
fixHFileLinks = shouldFix;
fixAny |= shouldFix;
}
boolean shouldFixHFileLinks() {
return fixHFileLinks;
}
public boolean shouldIgnorePreCheckPermission() {
return !fixAny || ignorePreCheckPermission;
}
@ -4587,6 +4667,7 @@ public class HBaseFsck extends Configured implements Closeable {
out.println(" -fixSplitParents Try to force offline split parents to be online.");
out.println(" -ignorePreCheckPermission ignore filesystem permission pre-check");
out.println(" -fixReferenceFiles Try to offline lingering reference store files");
out.println(" -fixHFileLinks Try to offline lingering HFileLinks");
out.println(" -fixEmptyMetaCells Try to fix hbase:meta entries not referencing any region"
+ " (empty REGIONINFO_QUALIFIER rows)");
@ -4599,7 +4680,8 @@ public class HBaseFsck extends Configured implements Closeable {
out.println(" Metadata Repair shortcuts");
out.println(" -repair Shortcut for -fixAssignments -fixMeta -fixHdfsHoles " +
"-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps " +
"-fixReferenceFiles -fixTableLocks -fixOrphanedTableZnodes");
"-fixReferenceFiles -fixHFileLinks -fixTableLocks -fixOrphanedTableZnodes");
out.println(" -repairHoles Shortcut for -fixAssignments -fixMeta -fixHdfsHoles");
out.println("");
@ -4733,6 +4815,8 @@ public class HBaseFsck extends Configured implements Closeable {
sidelineCorruptHFiles = true;
} else if (cmd.equals("-fixReferenceFiles")) {
setFixReferenceFiles(true);
} else if (cmd.equals("-fixHFileLinks")) {
setFixHFileLinks(true);
} else if (cmd.equals("-fixEmptyMetaCells")) {
setFixEmptyMetaCells(true);
} else if (cmd.equals("-repair")) {
@ -4748,6 +4832,7 @@ public class HBaseFsck extends Configured implements Closeable {
setFixSplitParents(false);
setCheckHdfs(true);
setFixReferenceFiles(true);
setFixHFileLinks(true);
setFixTableLocks(true);
setFixTableZNodes(true);
} else if (cmd.equals("-repairHoles")) {

View File

@ -65,6 +65,10 @@ import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HRegionLocation;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableExistsException;
import org.apache.hadoop.hbase.io.HFileLink;
import org.apache.hadoop.hbase.io.hfile.HFile;
import org.apache.hadoop.hbase.io.hfile.HFileContext;
import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
import org.apache.hadoop.hbase.testclassification.LargeTests;
import org.apache.hadoop.hbase.MiniHBaseCluster;
import org.apache.hadoop.hbase.ServerName;
@ -1736,7 +1740,7 @@ public class TestHBaseFsck {
// for some time until children references are deleted. HBCK erroneously sees this as
// overlapping regions
HBaseFsck hbck = doFsck(
conf, true, true, false, false, false, true, true, true, false, false, false, false, null);
conf, true, true, false, false, false, true, true, true, false, false, false, false, false, null);
assertErrors(hbck, new ERROR_CODE[] {}); //no LINGERING_SPLIT_PARENT reported
// assert that the split hbase:meta entry is still there.
@ -1809,7 +1813,7 @@ public class TestHBaseFsck {
// now fix it. The fix should not revert the region split, but add daughters to META
hbck = doFsck(
conf, true, true, false, false, false, false, false, false, false, false, false,false,null);
conf, true, true, false, false, false, false, false, false, false, false, false, false,false,null);
assertErrors(hbck,
new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
ERROR_CODE.HOLE_IN_REGION_CHAIN });
@ -2361,6 +2365,97 @@ public class TestHBaseFsck {
}
}
/**
* Test fixing lingering HFileLinks.
*/
@Test(timeout = 180000)
public void testLingeringHFileLinks() throws Exception {
TableName table = TableName.valueOf("testLingeringHFileLinks");
try {
setupTable(table);
FileSystem fs = FileSystem.get(conf);
Path tableDir = FSUtils.getTableDir(FSUtils.getRootDir(conf), table);
Path regionDir = FSUtils.getRegionDirs(fs, tableDir).get(0);
String regionName = regionDir.getName();
Path famDir = new Path(regionDir, FAM_STR);
String HFILE_NAME = "01234567abcd";
Path hFilePath = new Path(famDir, HFILE_NAME);
// creating HFile
HFileContext context = new HFileContextBuilder().withIncludesTags(false).build();
HFile.Writer w =
HFile.getWriterFactoryNoCache(conf).withPath(fs, hFilePath).withFileContext(context)
.create();
w.close();
HFileLink.create(conf, fs, famDir, table, regionName, HFILE_NAME);
// should report no error
HBaseFsck hbck = doFsck(conf, false);
assertNoErrors(hbck);
// Delete linked file
fs.delete(hFilePath, true);
// Check without fix should show the error
hbck = doFsck(conf, false);
assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_HFILELINK });
// Fixing the error
hbck = doFsck(conf, true);
assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_HFILELINK });
// Fix should sideline these files, thus preventing the error
hbck = doFsck(conf, false);
assertNoErrors(hbck);
} finally {
cleanupTable(table);
}
}
@Test(timeout = 180000)
public void testCorruptLinkDirectory() throws Exception {
TableName table = TableName.valueOf("testLingeringHFileLinks");
try {
setupTable(table);
FileSystem fs = FileSystem.get(conf);
Path tableDir = FSUtils.getTableDir(FSUtils.getRootDir(conf), table);
Path regionDir = FSUtils.getRegionDirs(fs, tableDir).get(0);
Path famDir = new Path(regionDir, FAM_STR);
String regionName = regionDir.getName();
String HFILE_NAME = "01234567abcd";
String link = HFileLink.createHFileLinkName(table, regionName, HFILE_NAME);
// should report no error
HBaseFsck hbck = doFsck(conf, false);
assertNoErrors(hbck);
// creating a directory with file instead of the HFileLink file
fs.mkdirs(new Path(famDir, link));
fs.create(new Path(new Path(famDir, link), "somefile"));
// Check without fix should show the error
hbck = doFsck(conf, false);
assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_HFILELINK });
// Fixing the error
hbck = doFsck(conf, true);
assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_HFILELINK });
// Fix should sideline these files, thus preventing the error
hbck = doFsck(conf, false);
assertNoErrors(hbck);
} finally {
cleanupTable(table);
}
}
/**
* Test mission REGIONINFO_QUALIFIER in hbase:meta
*/
@ -2843,7 +2938,7 @@ public class TestHBaseFsck {
// fix hole
assertErrors(
doFsck(
conf, false, true, false, false, false, false, false, false, false, false, false,
conf, false, true, false, false, false, false, false, false, false, false, false, false,
false, null),
new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
ERROR_CODE.NOT_IN_META_OR_DEPLOYED });

View File

@ -40,14 +40,14 @@ public class HbckTestingUtil {
public static HBaseFsck doFsck(
Configuration conf, boolean fix, TableName table) throws Exception {
return doFsck(conf, fix, fix, fix, fix, fix, fix, fix, fix, fix, fix, fix, fix, table);
return doFsck(conf, fix, fix, fix, fix, fix, fix, fix, fix, fix, fix, fix, fix, fix, table);
}
public static HBaseFsck doFsck(Configuration conf, boolean fixAssignments,
boolean fixMeta, boolean fixHdfsHoles, boolean fixHdfsOverlaps,
boolean fixHdfsOrphans, boolean fixTableOrphans, boolean fixVersionFile,
boolean fixReferenceFiles, boolean fixEmptyMetaRegionInfo, boolean fixTableLocks,
boolean fixTableZnodes, Boolean fixReplication,
boolean fixReferenceFiles, boolean fixHFileLinks, boolean fixEmptyMetaRegionInfo,
boolean fixTableLocks, boolean fixTableZnodes, Boolean fixReplication,
TableName table) throws Exception {
HBaseFsck fsck = new HBaseFsck(conf, exec);
fsck.setDisplayFullReport(); // i.e. -details
@ -60,6 +60,7 @@ public class HbckTestingUtil {
fsck.setFixTableOrphans(fixTableOrphans);
fsck.setFixVersionFile(fixVersionFile);
fsck.setFixReferenceFiles(fixReferenceFiles);
fsck.setFixHFileLinks(fixHFileLinks);
fsck.setFixEmptyMetaCells(fixEmptyMetaRegionInfo);
fsck.setFixTableLocks(fixTableLocks);
fsck.setFixReplication(fixReplication);