HBASE-16621 HBCK should have -fixHFileLinks (Janos Gub)
This commit is contained in:
parent
5ebaadf1a6
commit
34ffca1357
|
@ -82,6 +82,7 @@ import org.apache.hadoop.hbase.TableName;
|
|||
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.exceptions.DeserializationException;
|
||||
import org.apache.hadoop.hbase.fs.HFileSystem;
|
||||
import org.apache.hadoop.hbase.io.HFileLink;
|
||||
import org.apache.hadoop.hbase.master.HMaster;
|
||||
import org.apache.hadoop.hbase.regionserver.HRegion;
|
||||
import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
|
||||
|
@ -1612,6 +1613,18 @@ public abstract class FSUtils {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Filter for HFileLinks (StoreFiles and HFiles not included).
|
||||
* the filter itself does not consider if a link is file or not.
|
||||
*/
|
||||
public static class HFileLinkFilter implements PathFilter {
|
||||
|
||||
@Override
|
||||
public boolean accept(Path p) {
|
||||
return HFileLink.isHFileLink(p);
|
||||
}
|
||||
}
|
||||
|
||||
public static class ReferenceFileFilter extends AbstractFileStatusFilter {
|
||||
|
||||
private final FileSystem fs;
|
||||
|
|
|
@ -110,6 +110,8 @@ import org.apache.hadoop.hbase.client.Result;
|
|||
import org.apache.hadoop.hbase.client.RowMutations;
|
||||
import org.apache.hadoop.hbase.client.Table;
|
||||
import org.apache.hadoop.hbase.client.TableState;
|
||||
import org.apache.hadoop.hbase.io.FileLink;
|
||||
import org.apache.hadoop.hbase.io.HFileLink;
|
||||
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
|
||||
import org.apache.hadoop.hbase.io.hfile.HFile;
|
||||
import org.apache.hadoop.hbase.master.MasterFileSystem;
|
||||
|
@ -247,6 +249,7 @@ public class HBaseFsck extends Configured implements Closeable {
|
|||
private boolean fixVersionFile = false; // fix missing hbase.version file in hdfs
|
||||
private boolean fixSplitParents = false; // fix lingering split parents
|
||||
private boolean fixReferenceFiles = false; // fix lingering reference store file
|
||||
private boolean fixHFileLinks = false; // fix lingering HFileLinks
|
||||
private boolean fixEmptyMetaCells = false; // fix (remove) empty REGIONINFO_QUALIFIER rows
|
||||
private boolean fixReplication = false; // fix undeleted replication queues for removed peer
|
||||
private boolean fixAny = false; // Set to true if any of the fix is required.
|
||||
|
@ -751,6 +754,7 @@ public class HBaseFsck extends Configured implements Closeable {
|
|||
// Do offline check and repair first
|
||||
offlineHdfsIntegrityRepair();
|
||||
offlineReferenceFileRepair();
|
||||
offlineHLinkFileRepair();
|
||||
// If Master runs maintenance tasks (such as balancer, catalog janitor, etc) during online
|
||||
// hbck, it is likely that hbck would be misled and report transient errors. Therefore, it
|
||||
// is better to set Master into maintenance mode during online hbck.
|
||||
|
@ -1111,6 +1115,73 @@ public class HBaseFsck extends Configured implements Closeable {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Scan all the store file names to find any lingering HFileLink files,
|
||||
* which refer to some none-exiting files. If "fix" option is enabled,
|
||||
* any lingering HFileLink file will be sidelined if found.
|
||||
*/
|
||||
private void offlineHLinkFileRepair() throws IOException, InterruptedException {
|
||||
Configuration conf = getConf();
|
||||
Path hbaseRoot = FSUtils.getRootDir(conf);
|
||||
FileSystem fs = hbaseRoot.getFileSystem(conf);
|
||||
LOG.info("Computing mapping of all link files");
|
||||
Map<String, Path> allFiles = FSUtils
|
||||
.getTableStoreFilePathMap(fs, hbaseRoot, new FSUtils.HFileLinkFilter(), executor, errors);
|
||||
errors.print("");
|
||||
|
||||
LOG.info("Validating mapping using HDFS state");
|
||||
for (Path path : allFiles.values()) {
|
||||
// building HFileLink object to gather locations
|
||||
HFileLink actualLink = HFileLink.buildFromHFileLinkPattern(conf, path);
|
||||
if (actualLink.exists(fs)) continue; // good, expected
|
||||
|
||||
// Found a lingering HFileLink
|
||||
errors.reportError(ERROR_CODE.LINGERING_HFILELINK, "Found lingering HFileLink " + path);
|
||||
if (!shouldFixHFileLinks()) continue;
|
||||
|
||||
// Now, trying to fix it since requested
|
||||
setShouldRerun();
|
||||
|
||||
// An HFileLink path should be like
|
||||
// ${hbase.rootdir}/data/namespace/table_name/region_id/family_name/linkedtable=linkedregionname-linkedhfilename
|
||||
// sidelineing will happen in the ${hbase.rootdir}/${sidelinedir} directory with the same folder structure.
|
||||
boolean success = sidelineFile(fs, hbaseRoot, path);
|
||||
|
||||
if (!success) {
|
||||
LOG.error("Failed to sideline HFileLink file " + path);
|
||||
}
|
||||
|
||||
// An HFileLink backreference path should be like
|
||||
// ${hbase.rootdir}/archive/data/namespace/table_name/region_id/family_name/.links-linkedhfilename
|
||||
// sidelineing will happen in the ${hbase.rootdir}/${sidelinedir} directory with the same folder structure.
|
||||
Path backRefPath = FileLink.getBackReferencesDir(HFileArchiveUtil
|
||||
.getStoreArchivePath(conf, HFileLink.getReferencedTableName(path.getName().toString()),
|
||||
HFileLink.getReferencedRegionName(path.getName().toString()),
|
||||
path.getParent().getName()),
|
||||
HFileLink.getReferencedHFileName(path.getName().toString()));
|
||||
success = sidelineFile(fs, hbaseRoot, backRefPath);
|
||||
|
||||
if (!success) {
|
||||
LOG.error("Failed to sideline HFileLink backreference file " + path);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private boolean sidelineFile(FileSystem fs, Path hbaseRoot, Path path) throws IOException {
|
||||
URI uri = hbaseRoot.toUri().relativize(path.toUri());
|
||||
if (uri.isAbsolute()) return false;
|
||||
String relativePath = uri.getPath();
|
||||
Path rootDir = getSidelineDir();
|
||||
Path dst = new Path(rootDir, relativePath);
|
||||
boolean pathCreated = fs.mkdirs(dst.getParent());
|
||||
if (!pathCreated) {
|
||||
LOG.error("Failed to create path: " + dst.getParent());
|
||||
return false;
|
||||
}
|
||||
LOG.info("Trying to sideline file " + path + " to " + dst);
|
||||
return fs.rename(path, dst);
|
||||
}
|
||||
|
||||
/**
|
||||
* TODO -- need to add tests for this.
|
||||
*/
|
||||
|
@ -3877,8 +3948,8 @@ public class HBaseFsck extends Configured implements Closeable {
|
|||
FIRST_REGION_STARTKEY_NOT_EMPTY, LAST_REGION_ENDKEY_NOT_EMPTY, DUPE_STARTKEYS,
|
||||
HOLE_IN_REGION_CHAIN, OVERLAP_IN_REGION_CHAIN, REGION_CYCLE, DEGENERATE_REGION,
|
||||
ORPHAN_HDFS_REGION, LINGERING_SPLIT_PARENT, NO_TABLEINFO_FILE, LINGERING_REFERENCE_HFILE,
|
||||
WRONG_USAGE, EMPTY_META_CELL, EXPIRED_TABLE_LOCK, BOUNDARIES_ERROR, ORPHAN_TABLE_STATE,
|
||||
NO_TABLE_STATE, UNDELETED_REPLICATION_QUEUE
|
||||
LINGERING_HFILELINK, WRONG_USAGE, EMPTY_META_CELL, EXPIRED_TABLE_LOCK, BOUNDARIES_ERROR,
|
||||
ORPHAN_TABLE_STATE, NO_TABLE_STATE, UNDELETED_REPLICATION_QUEUE
|
||||
}
|
||||
void clear();
|
||||
void report(String message);
|
||||
|
@ -4434,6 +4505,15 @@ public class HBaseFsck extends Configured implements Closeable {
|
|||
return fixReferenceFiles;
|
||||
}
|
||||
|
||||
public void setFixHFileLinks(boolean shouldFix) {
|
||||
fixHFileLinks = shouldFix;
|
||||
fixAny |= shouldFix;
|
||||
}
|
||||
|
||||
boolean shouldFixHFileLinks() {
|
||||
return fixHFileLinks;
|
||||
}
|
||||
|
||||
public boolean shouldIgnorePreCheckPermission() {
|
||||
return !fixAny || ignorePreCheckPermission;
|
||||
}
|
||||
|
@ -4550,6 +4630,7 @@ public class HBaseFsck extends Configured implements Closeable {
|
|||
out.println(" -fixSplitParents Try to force offline split parents to be online.");
|
||||
out.println(" -ignorePreCheckPermission ignore filesystem permission pre-check");
|
||||
out.println(" -fixReferenceFiles Try to offline lingering reference store files");
|
||||
out.println(" -fixHFileLinks Try to offline lingering HFileLinks");
|
||||
out.println(" -fixEmptyMetaCells Try to fix hbase:meta entries not referencing any region"
|
||||
+ " (empty REGIONINFO_QUALIFIER rows)");
|
||||
|
||||
|
@ -4561,7 +4642,8 @@ public class HBaseFsck extends Configured implements Closeable {
|
|||
out.println("");
|
||||
out.println(" Metadata Repair shortcuts");
|
||||
out.println(" -repair Shortcut for -fixAssignments -fixMeta -fixHdfsHoles " +
|
||||
"-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps -fixReferenceFiles");
|
||||
"-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps -fixReferenceFiles" +
|
||||
"-fixHFileLinks");
|
||||
out.println(" -repairHoles Shortcut for -fixAssignments -fixMeta -fixHdfsHoles");
|
||||
|
||||
out.println("");
|
||||
|
@ -4687,6 +4769,8 @@ public class HBaseFsck extends Configured implements Closeable {
|
|||
sidelineCorruptHFiles = true;
|
||||
} else if (cmd.equals("-fixReferenceFiles")) {
|
||||
setFixReferenceFiles(true);
|
||||
} else if (cmd.equals("-fixHFileLinks")) {
|
||||
setFixHFileLinks(true);
|
||||
} else if (cmd.equals("-fixEmptyMetaCells")) {
|
||||
setFixEmptyMetaCells(true);
|
||||
} else if (cmd.equals("-repair")) {
|
||||
|
@ -4702,6 +4786,7 @@ public class HBaseFsck extends Configured implements Closeable {
|
|||
setFixSplitParents(false);
|
||||
setCheckHdfs(true);
|
||||
setFixReferenceFiles(true);
|
||||
setFixHFileLinks(true);
|
||||
} else if (cmd.equals("-repairHoles")) {
|
||||
// this will make all missing hdfs regions available but may lose data
|
||||
setFixHdfsHoles(true);
|
||||
|
|
|
@ -912,10 +912,10 @@ public class TestHBaseFsckOneRS extends BaseTestHBaseFsck {
|
|||
// TODO: fixHdfsHoles does not work against splits, since the parent dir lingers on
|
||||
// for some time until children references are deleted. HBCK erroneously sees this as
|
||||
// overlapping regions
|
||||
HBaseFsck hbck = doFsck(conf, true, true, false, false, false, true, true, true, false,
|
||||
false, false, null);
|
||||
HBaseFsck hbck = doFsck(conf, true, true, false, false, false, true, true, true, true,
|
||||
false, false, false, null);
|
||||
// no LINGERING_SPLIT_PARENT reported
|
||||
assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {});
|
||||
assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {}); //no LINGERING_SPLIT_PARENT reported
|
||||
|
||||
// assert that the split hbase:meta entry is still there.
|
||||
Get get = new Get(hri.getRegionName());
|
||||
|
@ -997,7 +997,7 @@ public class TestHBaseFsckOneRS extends BaseTestHBaseFsck {
|
|||
|
||||
// now fix it. The fix should not revert the region split, but add daughters to META
|
||||
hbck = doFsck(conf, true, true, false, false, false, false, false, false, false,
|
||||
false, false, null);
|
||||
false, false, false, null);
|
||||
assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
|
||||
HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
|
||||
HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
|
||||
|
@ -1657,7 +1657,7 @@ public class TestHBaseFsckOneRS extends BaseTestHBaseFsck {
|
|||
// fix hole
|
||||
assertErrors(
|
||||
doFsck(conf, false, true, false, false, false, false, false, false, false, false, false,
|
||||
null),
|
||||
false, null),
|
||||
new HBaseFsck.ErrorReporter.ERROR_CODE[] {
|
||||
HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
|
||||
HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED });
|
||||
|
|
|
@ -44,6 +44,10 @@ import org.apache.hadoop.hbase.client.ResultScanner;
|
|||
import org.apache.hadoop.hbase.client.Scan;
|
||||
import org.apache.hadoop.hbase.client.Table;
|
||||
import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
|
||||
import org.apache.hadoop.hbase.io.HFileLink;
|
||||
import org.apache.hadoop.hbase.io.hfile.HFile;
|
||||
import org.apache.hadoop.hbase.io.hfile.HFileContext;
|
||||
import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
|
||||
import org.apache.hadoop.hbase.master.AssignmentManager;
|
||||
import org.apache.hadoop.hbase.master.HMaster;
|
||||
import org.apache.hadoop.hbase.regionserver.HRegionServer;
|
||||
|
@ -279,6 +283,97 @@ public class TestHBaseFsckTwoRS extends BaseTestHBaseFsck {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test fixing lingering HFileLinks.
|
||||
*/
|
||||
@Test(timeout = 180000)
|
||||
public void testLingeringHFileLinks() throws Exception {
|
||||
TableName table = TableName.valueOf("testLingeringHFileLinks");
|
||||
try {
|
||||
setupTable(table);
|
||||
|
||||
FileSystem fs = FileSystem.get(conf);
|
||||
Path tableDir = FSUtils.getTableDir(FSUtils.getRootDir(conf), table);
|
||||
Path regionDir = FSUtils.getRegionDirs(fs, tableDir).get(0);
|
||||
String regionName = regionDir.getName();
|
||||
Path famDir = new Path(regionDir, FAM_STR);
|
||||
String HFILE_NAME = "01234567abcd";
|
||||
Path hFilePath = new Path(famDir, HFILE_NAME);
|
||||
|
||||
// creating HFile
|
||||
HFileContext context = new HFileContextBuilder().withIncludesTags(false).build();
|
||||
HFile.Writer w =
|
||||
HFile.getWriterFactoryNoCache(conf).withPath(fs, hFilePath).withFileContext(context)
|
||||
.create();
|
||||
w.close();
|
||||
|
||||
HFileLink.create(conf, fs, famDir, table, regionName, HFILE_NAME);
|
||||
|
||||
// should report no error
|
||||
HBaseFsck hbck = doFsck(conf, false);
|
||||
assertNoErrors(hbck);
|
||||
|
||||
// Delete linked file
|
||||
fs.delete(hFilePath, true);
|
||||
|
||||
// Check without fix should show the error
|
||||
hbck = doFsck(conf, false);
|
||||
assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
|
||||
HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_HFILELINK });
|
||||
|
||||
// Fixing the error
|
||||
hbck = doFsck(conf, true);
|
||||
assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
|
||||
HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_HFILELINK });
|
||||
|
||||
// Fix should sideline these files, thus preventing the error
|
||||
hbck = doFsck(conf, false);
|
||||
assertNoErrors(hbck);
|
||||
} finally {
|
||||
cleanupTable(table);
|
||||
}
|
||||
}
|
||||
|
||||
@Test(timeout = 180000)
|
||||
public void testCorruptLinkDirectory() throws Exception {
|
||||
TableName table = TableName.valueOf("testLingeringHFileLinks");
|
||||
try {
|
||||
setupTable(table);
|
||||
FileSystem fs = FileSystem.get(conf);
|
||||
|
||||
Path tableDir = FSUtils.getTableDir(FSUtils.getRootDir(conf), table);
|
||||
Path regionDir = FSUtils.getRegionDirs(fs, tableDir).get(0);
|
||||
Path famDir = new Path(regionDir, FAM_STR);
|
||||
String regionName = regionDir.getName();
|
||||
String HFILE_NAME = "01234567abcd";
|
||||
String link = HFileLink.createHFileLinkName(table, regionName, HFILE_NAME);
|
||||
|
||||
// should report no error
|
||||
HBaseFsck hbck = doFsck(conf, false);
|
||||
assertNoErrors(hbck);
|
||||
|
||||
// creating a directory with file instead of the HFileLink file
|
||||
fs.mkdirs(new Path(famDir, link));
|
||||
fs.create(new Path(new Path(famDir, link), "somefile"));
|
||||
|
||||
// Check without fix should show the error
|
||||
hbck = doFsck(conf, false);
|
||||
assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
|
||||
HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_HFILELINK });
|
||||
|
||||
// Fixing the error
|
||||
hbck = doFsck(conf, true);
|
||||
assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
|
||||
HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_HFILELINK });
|
||||
|
||||
// Fix should sideline these files, thus preventing the error
|
||||
hbck = doFsck(conf, false);
|
||||
assertNoErrors(hbck);
|
||||
} finally {
|
||||
cleanupTable(table);
|
||||
}
|
||||
}
|
||||
|
||||
@Test (timeout=180000)
|
||||
public void testMetaOffline() throws Exception {
|
||||
// check no errors
|
||||
|
|
|
@ -40,12 +40,12 @@ public class HbckTestingUtil {
|
|||
|
||||
public static HBaseFsck doFsck(
|
||||
Configuration conf, boolean fix, TableName table) throws Exception {
|
||||
return doFsck(conf, fix, fix, fix, fix, fix, fix, fix, fix, fix, fix, fix, table);
|
||||
return doFsck(conf, fix, fix, fix, fix, fix, fix, fix, fix, fix, fix, fix, fix, table);
|
||||
}
|
||||
|
||||
public static HBaseFsck doFsck(Configuration conf, boolean fixAssignments, boolean fixMeta,
|
||||
boolean fixHdfsHoles, boolean fixHdfsOverlaps, boolean fixHdfsOrphans,
|
||||
boolean fixTableOrphans, boolean fixVersionFile, boolean fixReferenceFiles,
|
||||
boolean fixTableOrphans, boolean fixVersionFile, boolean fixReferenceFiles, boolean fixHFileLinks,
|
||||
boolean fixEmptyMetaRegionInfo, boolean fixTableLocks, Boolean fixReplication,
|
||||
TableName table) throws Exception {
|
||||
HBaseFsck fsck = new HBaseFsck(conf, exec);
|
||||
|
@ -60,6 +60,7 @@ public class HbckTestingUtil {
|
|||
fsck.setFixTableOrphans(fixTableOrphans);
|
||||
fsck.setFixVersionFile(fixVersionFile);
|
||||
fsck.setFixReferenceFiles(fixReferenceFiles);
|
||||
fsck.setFixHFileLinks(fixHFileLinks);
|
||||
fsck.setFixEmptyMetaCells(fixEmptyMetaRegionInfo);
|
||||
fsck.setFixReplication(fixReplication);
|
||||
if (table != null) {
|
||||
|
|
Loading…
Reference in New Issue