HBASE-13806 Check the mob files when there are mob-enabled columns in HFileCorruptionChecker. (Jingcheng)
This commit is contained in:
parent
efbef296d6
commit
13fe542bcc
|
@ -39,9 +39,11 @@ import org.apache.hadoop.fs.FileStatus;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.hbase.HConstants;
|
import org.apache.hadoop.hbase.HConstants;
|
||||||
|
import org.apache.hadoop.hbase.TableName;
|
||||||
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
|
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
|
||||||
import org.apache.hadoop.hbase.io.hfile.CorruptHFileException;
|
import org.apache.hadoop.hbase.io.hfile.CorruptHFileException;
|
||||||
import org.apache.hadoop.hbase.io.hfile.HFile;
|
import org.apache.hadoop.hbase.io.hfile.HFile;
|
||||||
|
import org.apache.hadoop.hbase.mob.MobUtils;
|
||||||
import org.apache.hadoop.hbase.util.FSUtils;
|
import org.apache.hadoop.hbase.util.FSUtils;
|
||||||
import org.apache.hadoop.hbase.util.FSUtils.FamilyDirFilter;
|
import org.apache.hadoop.hbase.util.FSUtils.FamilyDirFilter;
|
||||||
import org.apache.hadoop.hbase.util.FSUtils.HFileFilter;
|
import org.apache.hadoop.hbase.util.FSUtils.HFileFilter;
|
||||||
|
@ -68,8 +70,13 @@ public class HFileCorruptionChecker {
|
||||||
final Set<Path> failures = new ConcurrentSkipListSet<Path>();
|
final Set<Path> failures = new ConcurrentSkipListSet<Path>();
|
||||||
final Set<Path> quarantined = new ConcurrentSkipListSet<Path>();
|
final Set<Path> quarantined = new ConcurrentSkipListSet<Path>();
|
||||||
final Set<Path> missing = new ConcurrentSkipListSet<Path>();
|
final Set<Path> missing = new ConcurrentSkipListSet<Path>();
|
||||||
|
final Set<Path> corruptedMobFiles = new ConcurrentSkipListSet<Path>();
|
||||||
|
final Set<Path> failureMobFiles = new ConcurrentSkipListSet<Path>();
|
||||||
|
final Set<Path> missedMobFiles = new ConcurrentSkipListSet<Path>();
|
||||||
|
final Set<Path> quarantinedMobFiles = new ConcurrentSkipListSet<Path>();
|
||||||
final boolean inQuarantineMode;
|
final boolean inQuarantineMode;
|
||||||
final AtomicInteger hfilesChecked = new AtomicInteger();
|
final AtomicInteger hfilesChecked = new AtomicInteger();
|
||||||
|
final AtomicInteger mobFilesChecked = new AtomicInteger();
|
||||||
|
|
||||||
public HFileCorruptionChecker(Configuration conf, ExecutorService executor,
|
public HFileCorruptionChecker(Configuration conf, ExecutorService executor,
|
||||||
boolean quarantine) throws IOException {
|
boolean quarantine) throws IOException {
|
||||||
|
@ -176,6 +183,109 @@ public class HFileCorruptionChecker {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check all files in a mob column family dir.
|
||||||
|
*
|
||||||
|
* @param cfDir
|
||||||
|
* mob column family directory
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
protected void checkMobColFamDir(Path cfDir) throws IOException {
|
||||||
|
FileStatus[] hfs = null;
|
||||||
|
try {
|
||||||
|
hfs = fs.listStatus(cfDir, new HFileFilter(fs)); // use same filter as scanner.
|
||||||
|
} catch (FileNotFoundException fnfe) {
|
||||||
|
// Hadoop 0.23+ listStatus semantics throws an exception if the path does not exist.
|
||||||
|
LOG.warn("Mob colfam Directory " + cfDir +
|
||||||
|
" does not exist. Likely the table is deleted. Skipping.");
|
||||||
|
missedMobFiles.add(cfDir);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Hadoop 1.0 listStatus does not throw an exception if the path does not exist.
|
||||||
|
if (hfs.length == 0 && !fs.exists(cfDir)) {
|
||||||
|
LOG.warn("Mob colfam Directory " + cfDir +
|
||||||
|
" does not exist. Likely the table is deleted. Skipping.");
|
||||||
|
missedMobFiles.add(cfDir);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
for (FileStatus hfFs : hfs) {
|
||||||
|
Path hf = hfFs.getPath();
|
||||||
|
checkMobFile(hf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks a path to see if it is a valid mob file.
|
||||||
|
*
|
||||||
|
* @param p
|
||||||
|
* full Path to a mob file.
|
||||||
|
* @throws IOException
|
||||||
|
* This is a connectivity related exception
|
||||||
|
*/
|
||||||
|
protected void checkMobFile(Path p) throws IOException {
|
||||||
|
HFile.Reader r = null;
|
||||||
|
try {
|
||||||
|
r = HFile.createReader(fs, p, cacheConf, conf);
|
||||||
|
} catch (CorruptHFileException che) {
|
||||||
|
LOG.warn("Found corrupt mob file " + p, che);
|
||||||
|
corruptedMobFiles.add(p);
|
||||||
|
if (inQuarantineMode) {
|
||||||
|
Path dest = createQuarantinePath(p);
|
||||||
|
LOG.warn("Quarantining corrupt mob file " + p + " into " + dest);
|
||||||
|
boolean success = fs.mkdirs(dest.getParent());
|
||||||
|
success = success ? fs.rename(p, dest): false;
|
||||||
|
if (!success) {
|
||||||
|
failureMobFiles.add(p);
|
||||||
|
} else {
|
||||||
|
quarantinedMobFiles.add(dest);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
} catch (FileNotFoundException fnfe) {
|
||||||
|
LOG.warn("Mob file " + p + " was missing. Likely removed due to compaction?");
|
||||||
|
missedMobFiles.add(p);
|
||||||
|
} finally {
|
||||||
|
mobFilesChecked.addAndGet(1);
|
||||||
|
if (r != null) {
|
||||||
|
r.close(true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks all the mob files of a table.
|
||||||
|
* @param regionDir The mob region directory
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
private void checkMobRegionDir(Path regionDir) throws IOException {
|
||||||
|
if (!fs.exists(regionDir)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
FileStatus[] hfs = null;
|
||||||
|
try {
|
||||||
|
hfs = fs.listStatus(regionDir, new FamilyDirFilter(fs));
|
||||||
|
} catch (FileNotFoundException fnfe) {
|
||||||
|
// Hadoop 0.23+ listStatus semantics throws an exception if the path does not exist.
|
||||||
|
LOG.warn("Mob directory " + regionDir
|
||||||
|
+ " does not exist. Likely the table is deleted. Skipping.");
|
||||||
|
missedMobFiles.add(regionDir);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Hadoop 1.0 listStatus does not throw an exception if the path does not exist.
|
||||||
|
if (hfs.length == 0 && !fs.exists(regionDir)) {
|
||||||
|
LOG.warn("Mob directory " + regionDir
|
||||||
|
+ " does not exist. Likely the table is deleted. Skipping.");
|
||||||
|
missedMobFiles.add(regionDir);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
for (FileStatus hfFs : hfs) {
|
||||||
|
Path hf = hfFs.getPath();
|
||||||
|
checkMobColFamDir(hf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check all column families in a region dir.
|
* Check all column families in a region dir.
|
||||||
*
|
*
|
||||||
|
@ -236,6 +346,8 @@ public class HFileCorruptionChecker {
|
||||||
rdcs.add(work);
|
rdcs.add(work);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// add mob region
|
||||||
|
rdcs.add(createMobRegionDirChecker(tableDir));
|
||||||
// Submit and wait for completion
|
// Submit and wait for completion
|
||||||
try {
|
try {
|
||||||
rdFutures = executor.invokeAll(rdcs);
|
rdFutures = executor.invokeAll(rdcs);
|
||||||
|
@ -292,6 +404,34 @@ public class HFileCorruptionChecker {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An individual work item for parallelized mob dir processing. This is
|
||||||
|
* intentionally an inner class so it can use the shared error sets and fs.
|
||||||
|
*/
|
||||||
|
private class MobRegionDirChecker extends RegionDirChecker {
|
||||||
|
|
||||||
|
MobRegionDirChecker(Path regionDir) {
|
||||||
|
super(regionDir);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Void call() throws IOException {
|
||||||
|
checkMobRegionDir(regionDir);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates an instance of MobRegionDirChecker.
|
||||||
|
* @param tableDir The current table directory.
|
||||||
|
* @return An instance of MobRegionDirChecker.
|
||||||
|
*/
|
||||||
|
private MobRegionDirChecker createMobRegionDirChecker(Path tableDir) {
|
||||||
|
TableName tableName = FSUtils.getTableName(tableDir);
|
||||||
|
Path mobDir = MobUtils.getMobRegionPath(conf, tableName);
|
||||||
|
return new MobRegionDirChecker(mobDir);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check the specified table dirs for bad hfiles.
|
* Check the specified table dirs for bad hfiles.
|
||||||
*/
|
*/
|
||||||
|
@ -337,6 +477,42 @@ public class HFileCorruptionChecker {
|
||||||
return new HashSet<Path>(missing);
|
return new HashSet<Path>(missing);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the set of check failure mob file paths after checkTables is called.
|
||||||
|
*/
|
||||||
|
public Collection<Path> getFailureMobFiles() {
|
||||||
|
return new HashSet<Path>(failureMobFiles);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the set of corrupted mob file paths after checkTables is called.
|
||||||
|
*/
|
||||||
|
public Collection<Path> getCorruptedMobFiles() {
|
||||||
|
return new HashSet<Path>(corruptedMobFiles);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return number of mob files checked in the last HfileCorruptionChecker run
|
||||||
|
*/
|
||||||
|
public int getMobFilesChecked() {
|
||||||
|
return mobFilesChecked.get();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the set of successfully quarantined paths after checkTables is called.
|
||||||
|
*/
|
||||||
|
public Collection<Path> getQuarantinedMobFiles() {
|
||||||
|
return new HashSet<Path>(quarantinedMobFiles);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the set of paths that were missing. Likely due to table deletion or
|
||||||
|
* deletion/moves from compaction.
|
||||||
|
*/
|
||||||
|
public Collection<Path> getMissedMobFiles() {
|
||||||
|
return new HashSet<Path>(missedMobFiles);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Print a human readable summary of hfile quarantining operations.
|
* Print a human readable summary of hfile quarantining operations.
|
||||||
* @param out
|
* @param out
|
||||||
|
@ -363,10 +539,31 @@ public class HFileCorruptionChecker {
|
||||||
String fixedState = (corrupted.size() == quarantined.size()) ? "OK"
|
String fixedState = (corrupted.size() == quarantined.size()) ? "OK"
|
||||||
: "CORRUPTED";
|
: "CORRUPTED";
|
||||||
|
|
||||||
|
// print mob-related report
|
||||||
|
if (inQuarantineMode) {
|
||||||
|
out.print(" Mob files successfully quarantined: " + quarantinedMobFiles.size());
|
||||||
|
for (Path sq : quarantinedMobFiles) {
|
||||||
|
out.print(" " + sq);
|
||||||
|
}
|
||||||
|
out.print(" Mob files failed quarantine: " + failureMobFiles.size());
|
||||||
|
for (Path fq : failureMobFiles) {
|
||||||
|
out.print(" " + fq);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
out.print(" Mob files moved while checking: " + missedMobFiles.size());
|
||||||
|
for (Path mq : missedMobFiles) {
|
||||||
|
out.print(" " + mq);
|
||||||
|
}
|
||||||
|
String initialMobState = (corruptedMobFiles.size() == 0) ? "OK" : "CORRUPTED";
|
||||||
|
String fixedMobState = (corruptedMobFiles.size() == quarantinedMobFiles.size()) ? "OK"
|
||||||
|
: "CORRUPTED";
|
||||||
|
|
||||||
if (inQuarantineMode) {
|
if (inQuarantineMode) {
|
||||||
out.print("Summary: " + initialState + " => " + fixedState);
|
out.print("Summary: " + initialState + " => " + fixedState);
|
||||||
|
out.print("Mob summary: " + initialMobState + " => " + fixedMobState);
|
||||||
} else {
|
} else {
|
||||||
out.print("Summary: " + initialState);
|
out.print("Summary: " + initialState);
|
||||||
|
out.print("Mob summary: " + initialMobState);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,6 +39,7 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.NavigableMap;
|
import java.util.NavigableMap;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
import java.util.UUID;
|
||||||
import java.util.concurrent.Callable;
|
import java.util.concurrent.Callable;
|
||||||
import java.util.concurrent.CountDownLatch;
|
import java.util.concurrent.CountDownLatch;
|
||||||
import java.util.concurrent.ExecutorService;
|
import java.util.concurrent.ExecutorService;
|
||||||
|
@ -95,6 +96,8 @@ import org.apache.hadoop.hbase.master.RegionState;
|
||||||
import org.apache.hadoop.hbase.master.RegionStates;
|
import org.apache.hadoop.hbase.master.RegionStates;
|
||||||
import org.apache.hadoop.hbase.master.TableLockManager;
|
import org.apache.hadoop.hbase.master.TableLockManager;
|
||||||
import org.apache.hadoop.hbase.master.TableLockManager.TableLock;
|
import org.apache.hadoop.hbase.master.TableLockManager.TableLock;
|
||||||
|
import org.apache.hadoop.hbase.mob.MobFileName;
|
||||||
|
import org.apache.hadoop.hbase.mob.MobUtils;
|
||||||
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
|
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
|
||||||
import org.apache.hadoop.hbase.protobuf.generated.AdminProtos;
|
import org.apache.hadoop.hbase.protobuf.generated.AdminProtos;
|
||||||
import org.apache.hadoop.hbase.regionserver.HRegion;
|
import org.apache.hadoop.hbase.regionserver.HRegion;
|
||||||
|
@ -437,6 +440,31 @@ public class TestHBaseFsck {
|
||||||
tbl.flushCommits();
|
tbl.flushCommits();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Setup a clean table with a mob-enabled column.
|
||||||
|
*
|
||||||
|
* @param tableName The name of a table to be created.
|
||||||
|
* @throws Exception
|
||||||
|
*/
|
||||||
|
void setupMobTable(TableName tablename) throws Exception {
|
||||||
|
HTableDescriptor desc = new HTableDescriptor(tablename);
|
||||||
|
HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toString(FAM));
|
||||||
|
hcd.setMobEnabled(true);
|
||||||
|
hcd.setMobThreshold(0);
|
||||||
|
desc.addFamily(hcd); // If a table has no CF's it doesn't get checked
|
||||||
|
createTable(TEST_UTIL, desc, SPLITS);
|
||||||
|
|
||||||
|
tbl = (HTable) connection.getTable(tablename, tableExecutorService);
|
||||||
|
List<Put> puts = new ArrayList<Put>();
|
||||||
|
for (byte[] row : ROWKEYS) {
|
||||||
|
Put p = new Put(row);
|
||||||
|
p.add(FAM, Bytes.toBytes("val"), row);
|
||||||
|
puts.add(p);
|
||||||
|
}
|
||||||
|
tbl.put(puts);
|
||||||
|
tbl.flushCommits();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Counts the number of row to verify data loss or non-dataloss.
|
* Counts the number of row to verify data loss or non-dataloss.
|
||||||
*/
|
*/
|
||||||
|
@ -2120,6 +2148,44 @@ public class TestHBaseFsck {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets flushed mob files.
|
||||||
|
* @param fs The current file system.
|
||||||
|
* @param table The current table name.
|
||||||
|
* @return Path of a flushed hfile.
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
Path getFlushedMobFile(FileSystem fs, TableName table) throws IOException {
|
||||||
|
Path regionDir = MobUtils.getMobRegionPath(conf, table);
|
||||||
|
Path famDir = new Path(regionDir, FAM_STR);
|
||||||
|
|
||||||
|
// keep doing this until we get a legit hfile
|
||||||
|
while (true) {
|
||||||
|
FileStatus[] hfFss = fs.listStatus(famDir);
|
||||||
|
if (hfFss.length == 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
for (FileStatus hfs : hfFss) {
|
||||||
|
if (!hfs.isDirectory()) {
|
||||||
|
return hfs.getPath();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new mob file name by the old one.
|
||||||
|
* @param oldFileName The old mob file name.
|
||||||
|
* @return The new mob file name.
|
||||||
|
*/
|
||||||
|
String createMobFileName(String oldFileName) {
|
||||||
|
MobFileName mobFileName = MobFileName.create(oldFileName);
|
||||||
|
String startKey = mobFileName.getStartKey();
|
||||||
|
String date = mobFileName.getDate();
|
||||||
|
return MobFileName.create(startKey, date, UUID.randomUUID().toString().replaceAll("-", ""))
|
||||||
|
.getFileName();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This creates a table and then corrupts an hfile. Hbck should quarantine the file.
|
* This creates a table and then corrupts an hfile. Hbck should quarantine the file.
|
||||||
*/
|
*/
|
||||||
|
@ -2160,6 +2226,50 @@ public class TestHBaseFsck {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This creates a table and then corrupts a mob file. Hbck should quarantine the file.
|
||||||
|
*/
|
||||||
|
@Test(timeout=180000)
|
||||||
|
public void testQuarantineCorruptMobFile() throws Exception {
|
||||||
|
TableName table = TableName.valueOf(name.getMethodName());
|
||||||
|
try {
|
||||||
|
setupMobTable(table);
|
||||||
|
assertEquals(ROWKEYS.length, countRows());
|
||||||
|
admin.flush(table);
|
||||||
|
|
||||||
|
FileSystem fs = FileSystem.get(conf);
|
||||||
|
Path mobFile = getFlushedMobFile(fs, table);
|
||||||
|
admin.disableTable(table);
|
||||||
|
// create new corrupt mob file.
|
||||||
|
String corruptMobFile = createMobFileName(mobFile.getName());
|
||||||
|
Path corrupt = new Path(mobFile.getParent(), corruptMobFile);
|
||||||
|
TestHFile.truncateFile(fs, mobFile, corrupt);
|
||||||
|
LOG.info("Created corrupted mob file " + corrupt);
|
||||||
|
HBaseFsck.debugLsr(conf, FSUtils.getRootDir(conf));
|
||||||
|
HBaseFsck.debugLsr(conf, MobUtils.getMobHome(conf));
|
||||||
|
|
||||||
|
// A corrupt mob file doesn't abort the start of regions, so we can enable the table.
|
||||||
|
admin.enableTable(table);
|
||||||
|
HBaseFsck res = HbckTestingUtil.doHFileQuarantine(conf, table);
|
||||||
|
assertEquals(res.getRetCode(), 0);
|
||||||
|
HFileCorruptionChecker hfcc = res.getHFilecorruptionChecker();
|
||||||
|
assertEquals(hfcc.getHFilesChecked(), 4);
|
||||||
|
assertEquals(hfcc.getCorrupted().size(), 0);
|
||||||
|
assertEquals(hfcc.getFailures().size(), 0);
|
||||||
|
assertEquals(hfcc.getQuarantined().size(), 0);
|
||||||
|
assertEquals(hfcc.getMissing().size(), 0);
|
||||||
|
assertEquals(hfcc.getMobFilesChecked(), 5);
|
||||||
|
assertEquals(hfcc.getCorruptedMobFiles().size(), 1);
|
||||||
|
assertEquals(hfcc.getFailureMobFiles().size(), 0);
|
||||||
|
assertEquals(hfcc.getQuarantinedMobFiles().size(), 1);
|
||||||
|
assertEquals(hfcc.getMissedMobFiles().size(), 0);
|
||||||
|
String quarantinedMobFile = hfcc.getQuarantinedMobFiles().iterator().next().getName();
|
||||||
|
assertEquals(corruptMobFile, quarantinedMobFile);
|
||||||
|
} finally {
|
||||||
|
cleanupTable(table);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test that use this should have a timeout, because this method could potentially wait forever.
|
* Test that use this should have a timeout, because this method could potentially wait forever.
|
||||||
*/
|
*/
|
||||||
|
|
Loading…
Reference in New Issue