HBASE-16621 HBCK should have -fixHFileLinks (Janos Gub)

This commit is contained in:
tedyu 2017-01-31 14:32:45 -08:00
parent 5ebaadf1a6
commit 34ffca1357
5 changed files with 204 additions and 10 deletions

View File

@ -82,6 +82,7 @@ import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.exceptions.DeserializationException; import org.apache.hadoop.hbase.exceptions.DeserializationException;
import org.apache.hadoop.hbase.fs.HFileSystem; import org.apache.hadoop.hbase.fs.HFileSystem;
import org.apache.hadoop.hbase.io.HFileLink;
import org.apache.hadoop.hbase.master.HMaster; import org.apache.hadoop.hbase.master.HMaster;
import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.regionserver.StoreFileInfo; import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
@ -1612,6 +1613,18 @@ public abstract class FSUtils {
} }
} }
/**
* Filter for HFileLinks (StoreFiles and HFiles not included).
* the filter itself does not consider if a link is file or not.
*/
public static class HFileLinkFilter implements PathFilter {
@Override
public boolean accept(Path p) {
return HFileLink.isHFileLink(p);
}
}
public static class ReferenceFileFilter extends AbstractFileStatusFilter { public static class ReferenceFileFilter extends AbstractFileStatusFilter {
private final FileSystem fs; private final FileSystem fs;

View File

@ -110,6 +110,8 @@ import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.RowMutations; import org.apache.hadoop.hbase.client.RowMutations;
import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.client.TableState; import org.apache.hadoop.hbase.client.TableState;
import org.apache.hadoop.hbase.io.FileLink;
import org.apache.hadoop.hbase.io.HFileLink;
import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.hbase.io.hfile.CacheConfig;
import org.apache.hadoop.hbase.io.hfile.HFile; import org.apache.hadoop.hbase.io.hfile.HFile;
import org.apache.hadoop.hbase.master.MasterFileSystem; import org.apache.hadoop.hbase.master.MasterFileSystem;
@ -247,6 +249,7 @@ public class HBaseFsck extends Configured implements Closeable {
private boolean fixVersionFile = false; // fix missing hbase.version file in hdfs private boolean fixVersionFile = false; // fix missing hbase.version file in hdfs
private boolean fixSplitParents = false; // fix lingering split parents private boolean fixSplitParents = false; // fix lingering split parents
private boolean fixReferenceFiles = false; // fix lingering reference store file private boolean fixReferenceFiles = false; // fix lingering reference store file
private boolean fixHFileLinks = false; // fix lingering HFileLinks
private boolean fixEmptyMetaCells = false; // fix (remove) empty REGIONINFO_QUALIFIER rows private boolean fixEmptyMetaCells = false; // fix (remove) empty REGIONINFO_QUALIFIER rows
private boolean fixReplication = false; // fix undeleted replication queues for removed peer private boolean fixReplication = false; // fix undeleted replication queues for removed peer
private boolean fixAny = false; // Set to true if any of the fix is required. private boolean fixAny = false; // Set to true if any of the fix is required.
@ -751,6 +754,7 @@ public class HBaseFsck extends Configured implements Closeable {
// Do offline check and repair first // Do offline check and repair first
offlineHdfsIntegrityRepair(); offlineHdfsIntegrityRepair();
offlineReferenceFileRepair(); offlineReferenceFileRepair();
offlineHLinkFileRepair();
// If Master runs maintenance tasks (such as balancer, catalog janitor, etc) during online // If Master runs maintenance tasks (such as balancer, catalog janitor, etc) during online
// hbck, it is likely that hbck would be misled and report transient errors. Therefore, it // hbck, it is likely that hbck would be misled and report transient errors. Therefore, it
// is better to set Master into maintenance mode during online hbck. // is better to set Master into maintenance mode during online hbck.
@ -1111,6 +1115,73 @@ public class HBaseFsck extends Configured implements Closeable {
} }
} }
/**
* Scan all the store file names to find any lingering HFileLink files,
* which refer to some none-exiting files. If "fix" option is enabled,
* any lingering HFileLink file will be sidelined if found.
*/
private void offlineHLinkFileRepair() throws IOException, InterruptedException {
Configuration conf = getConf();
Path hbaseRoot = FSUtils.getRootDir(conf);
FileSystem fs = hbaseRoot.getFileSystem(conf);
LOG.info("Computing mapping of all link files");
Map<String, Path> allFiles = FSUtils
.getTableStoreFilePathMap(fs, hbaseRoot, new FSUtils.HFileLinkFilter(), executor, errors);
errors.print("");
LOG.info("Validating mapping using HDFS state");
for (Path path : allFiles.values()) {
// building HFileLink object to gather locations
HFileLink actualLink = HFileLink.buildFromHFileLinkPattern(conf, path);
if (actualLink.exists(fs)) continue; // good, expected
// Found a lingering HFileLink
errors.reportError(ERROR_CODE.LINGERING_HFILELINK, "Found lingering HFileLink " + path);
if (!shouldFixHFileLinks()) continue;
// Now, trying to fix it since requested
setShouldRerun();
// An HFileLink path should be like
// ${hbase.rootdir}/data/namespace/table_name/region_id/family_name/linkedtable=linkedregionname-linkedhfilename
// sidelineing will happen in the ${hbase.rootdir}/${sidelinedir} directory with the same folder structure.
boolean success = sidelineFile(fs, hbaseRoot, path);
if (!success) {
LOG.error("Failed to sideline HFileLink file " + path);
}
// An HFileLink backreference path should be like
// ${hbase.rootdir}/archive/data/namespace/table_name/region_id/family_name/.links-linkedhfilename
// sidelineing will happen in the ${hbase.rootdir}/${sidelinedir} directory with the same folder structure.
Path backRefPath = FileLink.getBackReferencesDir(HFileArchiveUtil
.getStoreArchivePath(conf, HFileLink.getReferencedTableName(path.getName().toString()),
HFileLink.getReferencedRegionName(path.getName().toString()),
path.getParent().getName()),
HFileLink.getReferencedHFileName(path.getName().toString()));
success = sidelineFile(fs, hbaseRoot, backRefPath);
if (!success) {
LOG.error("Failed to sideline HFileLink backreference file " + path);
}
}
}
private boolean sidelineFile(FileSystem fs, Path hbaseRoot, Path path) throws IOException {
URI uri = hbaseRoot.toUri().relativize(path.toUri());
if (uri.isAbsolute()) return false;
String relativePath = uri.getPath();
Path rootDir = getSidelineDir();
Path dst = new Path(rootDir, relativePath);
boolean pathCreated = fs.mkdirs(dst.getParent());
if (!pathCreated) {
LOG.error("Failed to create path: " + dst.getParent());
return false;
}
LOG.info("Trying to sideline file " + path + " to " + dst);
return fs.rename(path, dst);
}
/** /**
* TODO -- need to add tests for this. * TODO -- need to add tests for this.
*/ */
@ -3877,8 +3948,8 @@ public class HBaseFsck extends Configured implements Closeable {
FIRST_REGION_STARTKEY_NOT_EMPTY, LAST_REGION_ENDKEY_NOT_EMPTY, DUPE_STARTKEYS, FIRST_REGION_STARTKEY_NOT_EMPTY, LAST_REGION_ENDKEY_NOT_EMPTY, DUPE_STARTKEYS,
HOLE_IN_REGION_CHAIN, OVERLAP_IN_REGION_CHAIN, REGION_CYCLE, DEGENERATE_REGION, HOLE_IN_REGION_CHAIN, OVERLAP_IN_REGION_CHAIN, REGION_CYCLE, DEGENERATE_REGION,
ORPHAN_HDFS_REGION, LINGERING_SPLIT_PARENT, NO_TABLEINFO_FILE, LINGERING_REFERENCE_HFILE, ORPHAN_HDFS_REGION, LINGERING_SPLIT_PARENT, NO_TABLEINFO_FILE, LINGERING_REFERENCE_HFILE,
WRONG_USAGE, EMPTY_META_CELL, EXPIRED_TABLE_LOCK, BOUNDARIES_ERROR, ORPHAN_TABLE_STATE, LINGERING_HFILELINK, WRONG_USAGE, EMPTY_META_CELL, EXPIRED_TABLE_LOCK, BOUNDARIES_ERROR,
NO_TABLE_STATE, UNDELETED_REPLICATION_QUEUE ORPHAN_TABLE_STATE, NO_TABLE_STATE, UNDELETED_REPLICATION_QUEUE
} }
void clear(); void clear();
void report(String message); void report(String message);
@ -4434,6 +4505,15 @@ public class HBaseFsck extends Configured implements Closeable {
return fixReferenceFiles; return fixReferenceFiles;
} }
public void setFixHFileLinks(boolean shouldFix) {
fixHFileLinks = shouldFix;
fixAny |= shouldFix;
}
boolean shouldFixHFileLinks() {
return fixHFileLinks;
}
public boolean shouldIgnorePreCheckPermission() { public boolean shouldIgnorePreCheckPermission() {
return !fixAny || ignorePreCheckPermission; return !fixAny || ignorePreCheckPermission;
} }
@ -4550,6 +4630,7 @@ public class HBaseFsck extends Configured implements Closeable {
out.println(" -fixSplitParents Try to force offline split parents to be online."); out.println(" -fixSplitParents Try to force offline split parents to be online.");
out.println(" -ignorePreCheckPermission ignore filesystem permission pre-check"); out.println(" -ignorePreCheckPermission ignore filesystem permission pre-check");
out.println(" -fixReferenceFiles Try to offline lingering reference store files"); out.println(" -fixReferenceFiles Try to offline lingering reference store files");
out.println(" -fixHFileLinks Try to offline lingering HFileLinks");
out.println(" -fixEmptyMetaCells Try to fix hbase:meta entries not referencing any region" out.println(" -fixEmptyMetaCells Try to fix hbase:meta entries not referencing any region"
+ " (empty REGIONINFO_QUALIFIER rows)"); + " (empty REGIONINFO_QUALIFIER rows)");
@ -4561,7 +4642,8 @@ public class HBaseFsck extends Configured implements Closeable {
out.println(""); out.println("");
out.println(" Metadata Repair shortcuts"); out.println(" Metadata Repair shortcuts");
out.println(" -repair Shortcut for -fixAssignments -fixMeta -fixHdfsHoles " + out.println(" -repair Shortcut for -fixAssignments -fixMeta -fixHdfsHoles " +
"-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps -fixReferenceFiles"); "-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps -fixReferenceFiles" +
"-fixHFileLinks");
out.println(" -repairHoles Shortcut for -fixAssignments -fixMeta -fixHdfsHoles"); out.println(" -repairHoles Shortcut for -fixAssignments -fixMeta -fixHdfsHoles");
out.println(""); out.println("");
@ -4687,6 +4769,8 @@ public class HBaseFsck extends Configured implements Closeable {
sidelineCorruptHFiles = true; sidelineCorruptHFiles = true;
} else if (cmd.equals("-fixReferenceFiles")) { } else if (cmd.equals("-fixReferenceFiles")) {
setFixReferenceFiles(true); setFixReferenceFiles(true);
} else if (cmd.equals("-fixHFileLinks")) {
setFixHFileLinks(true);
} else if (cmd.equals("-fixEmptyMetaCells")) { } else if (cmd.equals("-fixEmptyMetaCells")) {
setFixEmptyMetaCells(true); setFixEmptyMetaCells(true);
} else if (cmd.equals("-repair")) { } else if (cmd.equals("-repair")) {
@ -4702,6 +4786,7 @@ public class HBaseFsck extends Configured implements Closeable {
setFixSplitParents(false); setFixSplitParents(false);
setCheckHdfs(true); setCheckHdfs(true);
setFixReferenceFiles(true); setFixReferenceFiles(true);
setFixHFileLinks(true);
} else if (cmd.equals("-repairHoles")) { } else if (cmd.equals("-repairHoles")) {
// this will make all missing hdfs regions available but may lose data // this will make all missing hdfs regions available but may lose data
setFixHdfsHoles(true); setFixHdfsHoles(true);

View File

@ -912,10 +912,10 @@ public class TestHBaseFsckOneRS extends BaseTestHBaseFsck {
// TODO: fixHdfsHoles does not work against splits, since the parent dir lingers on // TODO: fixHdfsHoles does not work against splits, since the parent dir lingers on
// for some time until children references are deleted. HBCK erroneously sees this as // for some time until children references are deleted. HBCK erroneously sees this as
// overlapping regions // overlapping regions
HBaseFsck hbck = doFsck(conf, true, true, false, false, false, true, true, true, false, HBaseFsck hbck = doFsck(conf, true, true, false, false, false, true, true, true, true,
false, false, null); false, false, false, null);
// no LINGERING_SPLIT_PARENT reported // no LINGERING_SPLIT_PARENT reported
assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {}); assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {}); //no LINGERING_SPLIT_PARENT reported
// assert that the split hbase:meta entry is still there. // assert that the split hbase:meta entry is still there.
Get get = new Get(hri.getRegionName()); Get get = new Get(hri.getRegionName());
@ -997,7 +997,7 @@ public class TestHBaseFsckOneRS extends BaseTestHBaseFsck {
// now fix it. The fix should not revert the region split, but add daughters to META // now fix it. The fix should not revert the region split, but add daughters to META
hbck = doFsck(conf, true, true, false, false, false, false, false, false, false, hbck = doFsck(conf, true, true, false, false, false, false, false, false, false,
false, false, null); false, false, false, null);
assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED, HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED, HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
@ -1657,7 +1657,7 @@ public class TestHBaseFsckOneRS extends BaseTestHBaseFsck {
// fix hole // fix hole
assertErrors( assertErrors(
doFsck(conf, false, true, false, false, false, false, false, false, false, false, false, doFsck(conf, false, true, false, false, false, false, false, false, false, false, false,
null), false, null),
new HBaseFsck.ErrorReporter.ERROR_CODE[] { new HBaseFsck.ErrorReporter.ERROR_CODE[] {
HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED, HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED }); HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED });

View File

@ -44,6 +44,10 @@ import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.coprocessor.CoprocessorHost; import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
import org.apache.hadoop.hbase.io.HFileLink;
import org.apache.hadoop.hbase.io.hfile.HFile;
import org.apache.hadoop.hbase.io.hfile.HFileContext;
import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
import org.apache.hadoop.hbase.master.AssignmentManager; import org.apache.hadoop.hbase.master.AssignmentManager;
import org.apache.hadoop.hbase.master.HMaster; import org.apache.hadoop.hbase.master.HMaster;
import org.apache.hadoop.hbase.regionserver.HRegionServer; import org.apache.hadoop.hbase.regionserver.HRegionServer;
@ -279,6 +283,97 @@ public class TestHBaseFsckTwoRS extends BaseTestHBaseFsck {
} }
} }
/**
* Test fixing lingering HFileLinks.
*/
@Test(timeout = 180000)
public void testLingeringHFileLinks() throws Exception {
TableName table = TableName.valueOf("testLingeringHFileLinks");
try {
setupTable(table);
FileSystem fs = FileSystem.get(conf);
Path tableDir = FSUtils.getTableDir(FSUtils.getRootDir(conf), table);
Path regionDir = FSUtils.getRegionDirs(fs, tableDir).get(0);
String regionName = regionDir.getName();
Path famDir = new Path(regionDir, FAM_STR);
String HFILE_NAME = "01234567abcd";
Path hFilePath = new Path(famDir, HFILE_NAME);
// creating HFile
HFileContext context = new HFileContextBuilder().withIncludesTags(false).build();
HFile.Writer w =
HFile.getWriterFactoryNoCache(conf).withPath(fs, hFilePath).withFileContext(context)
.create();
w.close();
HFileLink.create(conf, fs, famDir, table, regionName, HFILE_NAME);
// should report no error
HBaseFsck hbck = doFsck(conf, false);
assertNoErrors(hbck);
// Delete linked file
fs.delete(hFilePath, true);
// Check without fix should show the error
hbck = doFsck(conf, false);
assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_HFILELINK });
// Fixing the error
hbck = doFsck(conf, true);
assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_HFILELINK });
// Fix should sideline these files, thus preventing the error
hbck = doFsck(conf, false);
assertNoErrors(hbck);
} finally {
cleanupTable(table);
}
}
@Test(timeout = 180000)
public void testCorruptLinkDirectory() throws Exception {
TableName table = TableName.valueOf("testLingeringHFileLinks");
try {
setupTable(table);
FileSystem fs = FileSystem.get(conf);
Path tableDir = FSUtils.getTableDir(FSUtils.getRootDir(conf), table);
Path regionDir = FSUtils.getRegionDirs(fs, tableDir).get(0);
Path famDir = new Path(regionDir, FAM_STR);
String regionName = regionDir.getName();
String HFILE_NAME = "01234567abcd";
String link = HFileLink.createHFileLinkName(table, regionName, HFILE_NAME);
// should report no error
HBaseFsck hbck = doFsck(conf, false);
assertNoErrors(hbck);
// creating a directory with file instead of the HFileLink file
fs.mkdirs(new Path(famDir, link));
fs.create(new Path(new Path(famDir, link), "somefile"));
// Check without fix should show the error
hbck = doFsck(conf, false);
assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_HFILELINK });
// Fixing the error
hbck = doFsck(conf, true);
assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_HFILELINK });
// Fix should sideline these files, thus preventing the error
hbck = doFsck(conf, false);
assertNoErrors(hbck);
} finally {
cleanupTable(table);
}
}
@Test (timeout=180000) @Test (timeout=180000)
public void testMetaOffline() throws Exception { public void testMetaOffline() throws Exception {
// check no errors // check no errors

View File

@ -40,12 +40,12 @@ public class HbckTestingUtil {
public static HBaseFsck doFsck( public static HBaseFsck doFsck(
Configuration conf, boolean fix, TableName table) throws Exception { Configuration conf, boolean fix, TableName table) throws Exception {
return doFsck(conf, fix, fix, fix, fix, fix, fix, fix, fix, fix, fix, fix, table); return doFsck(conf, fix, fix, fix, fix, fix, fix, fix, fix, fix, fix, fix, fix, table);
} }
public static HBaseFsck doFsck(Configuration conf, boolean fixAssignments, boolean fixMeta, public static HBaseFsck doFsck(Configuration conf, boolean fixAssignments, boolean fixMeta,
boolean fixHdfsHoles, boolean fixHdfsOverlaps, boolean fixHdfsOrphans, boolean fixHdfsHoles, boolean fixHdfsOverlaps, boolean fixHdfsOrphans,
boolean fixTableOrphans, boolean fixVersionFile, boolean fixReferenceFiles, boolean fixTableOrphans, boolean fixVersionFile, boolean fixReferenceFiles, boolean fixHFileLinks,
boolean fixEmptyMetaRegionInfo, boolean fixTableLocks, Boolean fixReplication, boolean fixEmptyMetaRegionInfo, boolean fixTableLocks, Boolean fixReplication,
TableName table) throws Exception { TableName table) throws Exception {
HBaseFsck fsck = new HBaseFsck(conf, exec); HBaseFsck fsck = new HBaseFsck(conf, exec);
@ -60,6 +60,7 @@ public class HbckTestingUtil {
fsck.setFixTableOrphans(fixTableOrphans); fsck.setFixTableOrphans(fixTableOrphans);
fsck.setFixVersionFile(fixVersionFile); fsck.setFixVersionFile(fixVersionFile);
fsck.setFixReferenceFiles(fixReferenceFiles); fsck.setFixReferenceFiles(fixReferenceFiles);
fsck.setFixHFileLinks(fixHFileLinks);
fsck.setFixEmptyMetaCells(fixEmptyMetaRegionInfo); fsck.setFixEmptyMetaCells(fixEmptyMetaRegionInfo);
fsck.setFixReplication(fixReplication); fsck.setFixReplication(fixReplication);
if (table != null) { if (table != null) {