HBASE-13932 - Add mob integrity check in HFilePrettyPrinter (Jingcheng du)
This commit is contained in:
parent
c4437e2516
commit
ba4ba32b0d
|
@ -24,9 +24,13 @@ import java.io.DataInput;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.LinkedHashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
import java.util.SortedMap;
|
import java.util.SortedMap;
|
||||||
|
|
||||||
import org.apache.commons.cli.CommandLine;
|
import org.apache.commons.cli.CommandLine;
|
||||||
|
@ -57,12 +61,14 @@ import org.apache.hadoop.hbase.KeyValueUtil;
|
||||||
import org.apache.hadoop.hbase.Tag;
|
import org.apache.hadoop.hbase.Tag;
|
||||||
import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
|
import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
|
||||||
import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
|
import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
|
||||||
|
import org.apache.hadoop.hbase.mob.MobUtils;
|
||||||
import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
|
import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
|
||||||
import org.apache.hadoop.hbase.util.BloomFilter;
|
import org.apache.hadoop.hbase.util.BloomFilter;
|
||||||
import org.apache.hadoop.hbase.util.BloomFilterUtil;
|
import org.apache.hadoop.hbase.util.BloomFilterUtil;
|
||||||
import org.apache.hadoop.hbase.util.BloomFilterFactory;
|
import org.apache.hadoop.hbase.util.BloomFilterFactory;
|
||||||
import org.apache.hadoop.hbase.util.Bytes;
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
import org.apache.hadoop.hbase.util.FSUtils;
|
import org.apache.hadoop.hbase.util.FSUtils;
|
||||||
|
import org.apache.hadoop.hbase.util.HFileArchiveUtil;
|
||||||
import org.apache.hadoop.hbase.util.Writables;
|
import org.apache.hadoop.hbase.util.Writables;
|
||||||
import org.apache.hadoop.util.Tool;
|
import org.apache.hadoop.util.Tool;
|
||||||
import org.apache.hadoop.util.ToolRunner;
|
import org.apache.hadoop.util.ToolRunner;
|
||||||
|
@ -95,6 +101,10 @@ public class HFilePrettyPrinter extends Configured implements Tool {
|
||||||
private boolean checkRow;
|
private boolean checkRow;
|
||||||
private boolean checkFamily;
|
private boolean checkFamily;
|
||||||
private boolean isSeekToRow = false;
|
private boolean isSeekToRow = false;
|
||||||
|
private boolean checkMobIntegrity = false;
|
||||||
|
private Map<String, List<Path>> mobFileLocations;
|
||||||
|
private static final int FOUND_MOB_FILES_CACHE_CAPACITY = 50;
|
||||||
|
private static final int MISSING_MOB_FILES_CACHE_CAPACITY = 20;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The row which the user wants to specify and print all the KeyValues for.
|
* The row which the user wants to specify and print all the KeyValues for.
|
||||||
|
@ -130,6 +140,8 @@ public class HFilePrettyPrinter extends Configured implements Tool {
|
||||||
options.addOption("w", "seekToRow", true,
|
options.addOption("w", "seekToRow", true,
|
||||||
"Seek to this row and print all the kvs for this row only");
|
"Seek to this row and print all the kvs for this row only");
|
||||||
options.addOption("s", "stats", false, "Print statistics");
|
options.addOption("s", "stats", false, "Print statistics");
|
||||||
|
options.addOption("i", "checkMobIntegrity", false,
|
||||||
|
"Print all cells whose mob files are missing");
|
||||||
|
|
||||||
OptionGroup files = new OptionGroup();
|
OptionGroup files = new OptionGroup();
|
||||||
files.addOption(new Option("f", "file", true,
|
files.addOption(new Option("f", "file", true,
|
||||||
|
@ -158,6 +170,7 @@ public class HFilePrettyPrinter extends Configured implements Tool {
|
||||||
printStats = cmd.hasOption("s");
|
printStats = cmd.hasOption("s");
|
||||||
checkRow = cmd.hasOption("k");
|
checkRow = cmd.hasOption("k");
|
||||||
checkFamily = cmd.hasOption("a");
|
checkFamily = cmd.hasOption("a");
|
||||||
|
checkMobIntegrity = cmd.hasOption("i");
|
||||||
|
|
||||||
if (cmd.hasOption("f")) {
|
if (cmd.hasOption("f")) {
|
||||||
files.add(new Path(cmd.getOptionValue("f")));
|
files.add(new Path(cmd.getOptionValue("f")));
|
||||||
|
@ -199,6 +212,12 @@ public class HFilePrettyPrinter extends Configured implements Tool {
|
||||||
files.addAll(regionFiles);
|
files.addAll(regionFiles);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(checkMobIntegrity) {
|
||||||
|
if (verbose) {
|
||||||
|
System.out.println("checkMobIntegrity is enabled");
|
||||||
|
}
|
||||||
|
mobFileLocations = new HashMap<String, List<Path>>();
|
||||||
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -255,7 +274,7 @@ public class HFilePrettyPrinter extends Configured implements Tool {
|
||||||
|
|
||||||
KeyValueStatsCollector fileStats = null;
|
KeyValueStatsCollector fileStats = null;
|
||||||
|
|
||||||
if (verbose || printKey || checkRow || checkFamily || printStats) {
|
if (verbose || printKey || checkRow || checkFamily || printStats || checkMobIntegrity) {
|
||||||
// scan over file and read key/value's and check if requested
|
// scan over file and read key/value's and check if requested
|
||||||
HFileScanner scanner = reader.getScanner(false, false, false);
|
HFileScanner scanner = reader.getScanner(false, false, false);
|
||||||
fileStats = new KeyValueStatsCollector();
|
fileStats = new KeyValueStatsCollector();
|
||||||
|
@ -313,6 +332,9 @@ public class HFilePrettyPrinter extends Configured implements Tool {
|
||||||
private void scanKeysValues(Path file, KeyValueStatsCollector fileStats,
|
private void scanKeysValues(Path file, KeyValueStatsCollector fileStats,
|
||||||
HFileScanner scanner, byte[] row) throws IOException {
|
HFileScanner scanner, byte[] row) throws IOException {
|
||||||
Cell pCell = null;
|
Cell pCell = null;
|
||||||
|
FileSystem fs = FileSystem.get(getConf());
|
||||||
|
Set<String> foundMobFiles = new LinkedHashSet<String>(FOUND_MOB_FILES_CACHE_CAPACITY);
|
||||||
|
Set<String> missingMobFiles = new LinkedHashSet<String>(MISSING_MOB_FILES_CACHE_CAPACITY);
|
||||||
do {
|
do {
|
||||||
Cell cell = scanner.getKeyValue();
|
Cell cell = scanner.getKeyValue();
|
||||||
if (row != null && row.length != 0) {
|
if (row != null && row.length != 0) {
|
||||||
|
@ -369,11 +391,87 @@ public class HFilePrettyPrinter extends Configured implements Tool {
|
||||||
+ "\n\tcurrent -> " + CellUtil.getCellKeyAsString(cell));
|
+ "\n\tcurrent -> " + CellUtil.getCellKeyAsString(cell));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// check if mob files are missing.
|
||||||
|
if (checkMobIntegrity && MobUtils.isMobReferenceCell(cell)) {
|
||||||
|
Tag tnTag = MobUtils.getTableNameTag(cell);
|
||||||
|
if (tnTag == null) {
|
||||||
|
System.err.println("ERROR, wrong tag format in mob reference cell "
|
||||||
|
+ CellUtil.getCellKeyAsString(cell));
|
||||||
|
} else if (!MobUtils.hasValidMobRefCellValue(cell)) {
|
||||||
|
System.err.println("ERROR, wrong value format in mob reference cell "
|
||||||
|
+ CellUtil.getCellKeyAsString(cell));
|
||||||
|
} else {
|
||||||
|
TableName tn = TableName.valueOf(tnTag.getValue());
|
||||||
|
String mobFileName = MobUtils.getMobFileName(cell);
|
||||||
|
boolean exist = mobFileExists(fs, tn, mobFileName,
|
||||||
|
Bytes.toString(CellUtil.cloneFamily(cell)), foundMobFiles, missingMobFiles);
|
||||||
|
if (!exist) {
|
||||||
|
// report error
|
||||||
|
System.err.println("ERROR, the mob file [" + mobFileName
|
||||||
|
+ "] is missing referenced by cell " + CellUtil.getCellKeyAsString(cell));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
pCell = cell;
|
pCell = cell;
|
||||||
++count;
|
++count;
|
||||||
} while (scanner.next());
|
} while (scanner.next());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks whether the referenced mob file exists.
|
||||||
|
*/
|
||||||
|
private boolean mobFileExists(FileSystem fs, TableName tn, String mobFileName, String family,
|
||||||
|
Set<String> foundMobFiles, Set<String> missingMobFiles) throws IOException {
|
||||||
|
if (foundMobFiles.contains(mobFileName)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (missingMobFiles.contains(mobFileName)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
String tableName = tn.getNameAsString();
|
||||||
|
List<Path> locations = mobFileLocations.get(tableName);
|
||||||
|
if (locations == null) {
|
||||||
|
locations = new ArrayList<Path>(2);
|
||||||
|
locations.add(MobUtils.getMobFamilyPath(getConf(), tn, family));
|
||||||
|
locations.add(HFileArchiveUtil.getStoreArchivePath(getConf(), tn,
|
||||||
|
MobUtils.getMobRegionInfo(tn).getEncodedName(), family));
|
||||||
|
mobFileLocations.put(tn.getNameAsString(), locations);
|
||||||
|
}
|
||||||
|
boolean exist = false;
|
||||||
|
for (Path location : locations) {
|
||||||
|
Path mobFilePath = new Path(location, mobFileName);
|
||||||
|
if (fs.exists(mobFilePath)) {
|
||||||
|
exist = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (exist) {
|
||||||
|
evictMobFilesIfNecessary(foundMobFiles, FOUND_MOB_FILES_CACHE_CAPACITY);
|
||||||
|
foundMobFiles.add(mobFileName);
|
||||||
|
} else {
|
||||||
|
evictMobFilesIfNecessary(missingMobFiles, MISSING_MOB_FILES_CACHE_CAPACITY);
|
||||||
|
missingMobFiles.add(mobFileName);
|
||||||
|
}
|
||||||
|
return exist;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Evicts the cached mob files if the set is larger than the limit.
|
||||||
|
*/
|
||||||
|
private void evictMobFilesIfNecessary(Set<String> mobFileNames, int limit) {
|
||||||
|
if (mobFileNames.size() < limit) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
int index = 0;
|
||||||
|
int evict = limit / 2;
|
||||||
|
Iterator<String> fileNamesItr = mobFileNames.iterator();
|
||||||
|
while (index < evict && fileNamesItr.hasNext()) {
|
||||||
|
fileNamesItr.next();
|
||||||
|
fileNamesItr.remove();
|
||||||
|
index++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Format a string of the form "k1=v1, k2=v2, ..." into separate lines
|
* Format a string of the form "k1=v1, k2=v2, ..." into separate lines
|
||||||
* with a four-space indentation.
|
* with a four-space indentation.
|
||||||
|
|
Loading…
Reference in New Issue