HBASE-23239 Reporting on status of backing MOB files from client-facing cells (#785)
* Adds a new MapReduce job that builds a report of health of mob files * Also builds background information on mob system use * add a basic mob architecture in the ref guide to explain how mob takes the reference hfile value and finds the actual cell contents * add a troubleshooting mob section to the ref guide to explain how to do a mob reference scan. Signed-off-by: Peter Somogyi <psomogyi@apache.org>
This commit is contained in:
parent
b38446c017
commit
bc2f162749
|
@ -20,6 +20,7 @@ package org.apache.hadoop.hbase.mapreduce;
|
|||
|
||||
import org.apache.hadoop.hbase.HBaseInterfaceAudience;
|
||||
import org.apache.hadoop.hbase.mapreduce.replication.VerifyReplication;
|
||||
import org.apache.hadoop.hbase.mob.mapreduce.MobRefReporter;
|
||||
import org.apache.hadoop.hbase.snapshot.ExportSnapshot;
|
||||
import org.apache.hadoop.hbase.tool.BulkLoadHFilesTool;
|
||||
import org.apache.hadoop.util.ProgramDriver;
|
||||
|
@ -55,6 +56,8 @@ public class Driver {
|
|||
pgd.addClass(WALPlayer.NAME, WALPlayer.class, "Replay WAL files.");
|
||||
pgd.addClass(ExportSnapshot.NAME, ExportSnapshot.class, "Export" +
|
||||
" the specific snapshot to a given FileSystem.");
|
||||
pgd.addClass(MobRefReporter.NAME, MobRefReporter.class, "Check the mob cells in a particular " +
|
||||
"table and cf and confirm that the files they point to are correct.");
|
||||
|
||||
ProgramDriver.class.getMethod("driver", new Class [] {String[].class}).
|
||||
invoke(pgd, new Object[]{args});
|
||||
|
|
|
@ -0,0 +1,509 @@
|
|||
/**
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.mob.mapreduce;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Base64;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
import java.util.UUID;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.conf.Configured;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hbase.Cell;
|
||||
import org.apache.hadoop.hbase.HBaseConfiguration;
|
||||
import org.apache.hadoop.hbase.HConstants;
|
||||
import org.apache.hadoop.hbase.TableName;
|
||||
import org.apache.hadoop.hbase.client.Admin;
|
||||
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
|
||||
import org.apache.hadoop.hbase.client.Connection;
|
||||
import org.apache.hadoop.hbase.client.ConnectionFactory;
|
||||
import org.apache.hadoop.hbase.client.Result;
|
||||
import org.apache.hadoop.hbase.client.Scan;
|
||||
import org.apache.hadoop.hbase.client.TableDescriptor;
|
||||
import org.apache.hadoop.hbase.io.HFileLink;
|
||||
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
|
||||
import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
|
||||
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
|
||||
import org.apache.hadoop.hbase.mapreduce.TableMapper;
|
||||
import org.apache.hadoop.hbase.mob.MobConstants;
|
||||
import org.apache.hadoop.hbase.mob.MobUtils;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
|
||||
import org.apache.hadoop.hbase.util.FSUtils;
|
||||
import org.apache.hadoop.hbase.util.HFileArchiveUtil;
|
||||
import org.apache.hadoop.hbase.util.Pair;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.mapreduce.Job;
|
||||
import org.apache.hadoop.mapreduce.Reducer;
|
||||
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
|
||||
import org.apache.hadoop.security.UserGroupInformation;
|
||||
import org.apache.hadoop.util.Tool;
|
||||
import org.apache.hadoop.util.ToolRunner;
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
|
||||
/**
|
||||
* Scans a given table + CF for all mob reference cells to get the list of backing mob files.
|
||||
* For each referenced file we attempt to verify that said file is on the FileSystem in a place
|
||||
* that the MOB system will look when attempting to resolve the actual value.
|
||||
*
|
||||
* The job includes counters that can help provide a rough sketch of the mob data.
|
||||
*
|
||||
* <pre>
|
||||
* Map-Reduce Framework
|
||||
* Map input records=10000
|
||||
* ...
|
||||
* Reduce output records=99
|
||||
* ...
|
||||
* CELLS PER ROW
|
||||
* Number of rows with 1s of cells per row=10000
|
||||
* MOB
|
||||
* NUM_CELLS=52364
|
||||
* PROBLEM
|
||||
* Affected rows=338
|
||||
* Problem MOB files=2
|
||||
* ROWS WITH PROBLEMS PER FILE
|
||||
* Number of HFiles with 100s of affected rows=2
|
||||
* SIZES OF CELLS
|
||||
* Number of cells with size in the 10,000s of bytes=627
|
||||
* Number of cells with size in the 100,000s of bytes=51392
|
||||
* Number of cells with size in the 1,000,000s of bytes=345
|
||||
* SIZES OF ROWS
|
||||
* Number of rows with total size in the 100,000s of bytes=6838
|
||||
* Number of rows with total size in the 1,000,000s of bytes=3162
|
||||
* </pre>
|
||||
*
|
||||
* * Map-Reduce Framework:Map input records - the number of rows with mob references
|
||||
* * Map-Reduce Framework:Reduce output records - the number of unique hfiles referenced
|
||||
* * MOB:NUM_CELLS - the total number of mob reference cells
|
||||
* * PROBLEM:Affected rows - the number of rows that reference hfiles with an issue
|
||||
* * PROBLEM:Problem MOB files - the number of unique hfiles that have an issue
|
||||
* * CELLS PER ROW: - this counter group gives a histogram of the order of magnitude of the
|
||||
* number of cells in a given row by grouping by the number of digits used in each count.
|
||||
* This allows us to see more about the distribution of cells than what we can determine
|
||||
* with just the cell count and the row count. In this particular example we can see that
|
||||
* all of our rows have somewhere between 1 - 9 cells.
|
||||
* * ROWS WITH PROBLEMS PER FILE: - this counter group gives a histogram of the order of
|
||||
* magnitude of the number of rows in each of the hfiles with a problem. e.g. in the
|
||||
* example there are 2 hfiles and they each have the same order of magnitude number of rows,
|
||||
* specifically between 100 and 999.
|
||||
* * SIZES OF CELLS: - this counter group gives a histogram of the order of magnitude of
|
||||
* the size of mob values according to our reference cells. e.g. in the example above we
|
||||
* have cell sizes that are all between 10,000 bytes and 9,999,999 bytes. From this
|
||||
* histogram we can also see that _most_ cells are 100,000 - 999,000 bytes and the smaller
|
||||
* and bigger ones are outliers making up less than 2% of mob cells.
|
||||
* * SIZES OF ROWS: - this counter group gives a histogram of the order of magnitude of the
|
||||
* size of mob values across each row according to our reference cells. In the example above
|
||||
* we have rows that are are between 100,000 bytes and 9,999,999 bytes. We can also see that
|
||||
* about 2/3rd of our rows are 100,000 - 999,999 bytes.
|
||||
*
|
||||
* Generates a report that gives one file status per line, with tabs dividing fields.
|
||||
*
|
||||
* <pre>
|
||||
* RESULT OF LOOKUP FILE REF comma seperated, base64 encoded rows when there's a problem
|
||||
* </pre>
|
||||
*
|
||||
* e.g.
|
||||
*
|
||||
* <pre>
|
||||
* MOB DIR 09c576e28a65ed2ead0004d192ffaa382019110184b30a1c7e034573bf8580aef8393402
|
||||
* MISSING FILE 28e252d7f013973174750d483d358fa020191101f73536e7133f4cd3ab1065edf588d509 MmJiMjMyYzBiMTNjNzc0OTY1ZWY4NTU4ZjBmYmQ2MTUtNTIz,MmEzOGE0YTkzMTZjNDllNWE4MzM1MTdjNDVkMzEwNzAtODg=
|
||||
* </pre>
|
||||
*
|
||||
* Possible results are listed; the first three indicate things are working properly.
|
||||
* * MOB DIR - the reference is in the normal MOB area for the given table and CF
|
||||
* * HLINK TO ARCHIVE FOR SAME TABLE - the reference is present in the archive area for this
|
||||
* table and CF
|
||||
* * HLINK TO ARCHIVE FOR OTHER TABLE - the reference is present in a different table and CF,
|
||||
* either in the MOB or archive areas (e.g. from a snapshot restore or clone)
|
||||
* * ARCHIVE WITH HLINK BUT NOT FROM OUR TABLE - the reference is currently present in the archive
|
||||
* area for this table and CF, but it is kept there because a _different_ table has a
|
||||
* reference to it (e.g. from a snapshot clone). If these other tables are removed then
|
||||
* the file will likely be deleted unless there is a snapshot also referencing it.
|
||||
* * ARCHIVE BUT NO HLINKS - the reference is currently present in the archive for this table and
|
||||
* CF, but there are no references present to prevent its removal. Unless it is newer than
|
||||
* the general TTL (default 5 minutes) or referenced in a snapshot it will be subject to
|
||||
* cleaning.
|
||||
* * ARCHIVE BUT FAILURE WHILE CHECKING HLINKS - Check the job logs to see why things failed while
|
||||
* looking for why this file is being kept around.
|
||||
* * MISSING FILE - We couldn't find the reference on the FileSystem. Either there is dataloss due
|
||||
* to a bug in the MOB storage system or the MOB storage is damaged but in an edge case that
|
||||
* allows it to work for now. You can verify which by doing a raw reference scan to get the
|
||||
* referenced hfile and check the underlying filesystem. See the ref guide section on mob
|
||||
* for details.
|
||||
* * HLINK BUT POINT TO MISSING FILE - There is a pointer in our mob area for this table and CF
|
||||
* to a file elsewhere on the FileSystem, however the file it points to no longer exists.
|
||||
* * MISSING FILE BUT FAILURE WHILE CHECKING HLINKS - We could not find the referenced file,
|
||||
* however you should check the job logs to see why we couldn't check to see if there is a
|
||||
* pointer to the referenced file in our archive or another table's archive or mob area.
|
||||
*
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class MobRefReporter extends Configured implements Tool {
|
||||
private static Logger LOG = LoggerFactory.getLogger(MobRefReporter.class);
|
||||
public static final String NAME = "mobrefs";
|
||||
static final String REPORT_JOB_ID = "mob.report.job.id";
|
||||
static final String REPORT_START_DATETIME = "mob.report.job.start";
|
||||
|
||||
public static class MobRefMapper extends TableMapper<Text, ImmutableBytesWritable> {
|
||||
@Override
|
||||
public void map(ImmutableBytesWritable r, Result columns, Context context) throws IOException,
|
||||
InterruptedException {
|
||||
if (columns == null) {
|
||||
return;
|
||||
}
|
||||
Cell[] cells = columns.rawCells();
|
||||
if (cells == null || cells.length == 0) {
|
||||
return;
|
||||
}
|
||||
Set<String> files = new HashSet<>();
|
||||
long count = 0;
|
||||
long size = 0;
|
||||
for (Cell c : cells) {
|
||||
if (MobUtils.hasValidMobRefCellValue(c)) {
|
||||
// TODO confirm there aren't tags
|
||||
String fileName = MobUtils.getMobFileName(c);
|
||||
if (!files.contains(fileName)) {
|
||||
context.write(new Text(fileName), r);
|
||||
files.add(fileName);
|
||||
}
|
||||
final int cellsize = MobUtils.getMobValueLength(c);
|
||||
context.getCounter("SIZES OF CELLS", "Number of cells with size in the " +
|
||||
log10GroupedString(cellsize) + "s of bytes").increment(1L);
|
||||
size += cellsize;
|
||||
count++;
|
||||
} else {
|
||||
LOG.debug("cell is not a mob ref, even though we asked for only refs. cell={}", c);
|
||||
}
|
||||
}
|
||||
context.getCounter("CELLS PER ROW", "Number of rows with " + log10GroupedString(count) +
|
||||
"s of cells per row").increment(1L);
|
||||
context.getCounter("SIZES OF ROWS", "Number of rows with total size in the " +
|
||||
log10GroupedString(size) + "s of bytes").increment(1L);
|
||||
context.getCounter("MOB","NUM_CELLS").increment(count);
|
||||
}
|
||||
}
|
||||
|
||||
public static class MobRefReducer extends
|
||||
Reducer<Text, ImmutableBytesWritable, Text, Text> {
|
||||
|
||||
TableName table;
|
||||
String mobRegion;
|
||||
Path mob;
|
||||
Path archive;
|
||||
String seperator;
|
||||
|
||||
/* Results that mean things are fine */
|
||||
final Text OK_MOB_DIR = new Text("MOB DIR");
|
||||
final Text OK_HLINK_RESTORE = new Text("HLINK TO ARCHIVE FOR SAME TABLE");
|
||||
final Text OK_HLINK_CLONE = new Text("HLINK TO ARCHIVE FOR OTHER TABLE");
|
||||
/* Results that mean something is incorrect */
|
||||
final Text INCONSISTENT_ARCHIVE_BAD_LINK =
|
||||
new Text("ARCHIVE WITH HLINK BUT NOT FROM OUR TABLE");
|
||||
final Text INCONSISTENT_ARCHIVE_STALE = new Text("ARCHIVE BUT NO HLINKS");
|
||||
final Text INCONSISTENT_ARCHIVE_IOE = new Text("ARCHIVE BUT FAILURE WHILE CHECKING HLINKS");
|
||||
/* Results that mean data is probably already gone */
|
||||
final Text DATALOSS_MISSING = new Text("MISSING FILE");
|
||||
final Text DATALOSS_HLINK_DANGLING = new Text("HLINK BUT POINTS TO MISSING FILE");
|
||||
final Text DATALOSS_MISSING_IOE = new Text("MISSING FILE BUT FAILURE WHILE CHECKING HLINKS");
|
||||
final Base64.Encoder base64 = Base64.getEncoder();
|
||||
|
||||
@Override
|
||||
public void setup(Context context) throws IOException, InterruptedException {
|
||||
final Configuration conf = context.getConfiguration();
|
||||
final String tableName = conf.get(TableInputFormat.INPUT_TABLE);
|
||||
if (null == tableName) {
|
||||
throw new IOException("Job configuration did not include table.");
|
||||
}
|
||||
table = TableName.valueOf(tableName);
|
||||
mobRegion = MobUtils.getMobRegionInfo(table).getEncodedName();
|
||||
final String family = conf.get(TableInputFormat.SCAN_COLUMN_FAMILY);
|
||||
if (null == family) {
|
||||
throw new IOException("Job configuration did not include column family");
|
||||
}
|
||||
mob = MobUtils.getMobFamilyPath(conf, table, family);
|
||||
LOG.info("Using active mob area '{}'", mob);
|
||||
archive = HFileArchiveUtil.getStoreArchivePath(conf, table,
|
||||
MobUtils.getMobRegionInfo(table).getEncodedName(), family);
|
||||
LOG.info("Using archive mob area '{}'", archive);
|
||||
seperator = conf.get(TextOutputFormat.SEPERATOR, "\t");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reduce(Text key, Iterable<ImmutableBytesWritable> rows, Context context)
|
||||
throws IOException, InterruptedException {
|
||||
final Configuration conf = context.getConfiguration();
|
||||
final String file = key.toString();
|
||||
// active mob area
|
||||
if (mob.getFileSystem(conf).exists(new Path(mob, file))) {
|
||||
LOG.debug("Found file '{}' in mob area", file);
|
||||
context.write(OK_MOB_DIR, key);
|
||||
// archive area - is there an hlink back reference (from a snapshot from same table)
|
||||
} else if (archive.getFileSystem(conf).exists(new Path(archive, file))) {
|
||||
|
||||
Path backRefDir = HFileLink.getBackReferencesDir(archive, file);
|
||||
try {
|
||||
FileStatus[] backRefs = FSUtils.listStatus(archive.getFileSystem(conf), backRefDir);
|
||||
if (backRefs != null) {
|
||||
boolean found = false;
|
||||
for (FileStatus backRef : backRefs) {
|
||||
Pair<TableName, String> refParts = HFileLink.parseBackReferenceName(
|
||||
backRef.getPath().getName());
|
||||
if (table.equals(refParts.getFirst()) && mobRegion.equals(refParts.getSecond())) {
|
||||
Path hlinkPath = HFileLink.getHFileFromBackReference(MobUtils.getMobHome(conf),
|
||||
backRef.getPath());
|
||||
if (hlinkPath.getFileSystem(conf).exists(hlinkPath)) {
|
||||
found = true;
|
||||
} else {
|
||||
LOG.warn("Found file '{}' in archive area with a back reference to the mob area "
|
||||
+ "for our table, but the mob area does not have a corresponding hfilelink.",
|
||||
file);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (found) {
|
||||
LOG.debug("Found file '{}' in archive area. has proper hlink back references to "
|
||||
+ "suggest it is from a restored snapshot for this table.", file);
|
||||
context.write(OK_HLINK_RESTORE, key);
|
||||
} else {
|
||||
LOG.warn("Found file '{}' in archive area, but the hlink back references do not "
|
||||
+ "properly point to the mob area for our table.", file);
|
||||
context.write(INCONSISTENT_ARCHIVE_BAD_LINK, encodeRows(context, key, rows));
|
||||
}
|
||||
} else {
|
||||
LOG.warn("Found file '{}' in archive area, but there are no hlinks pointing to it. Not "
|
||||
+ "yet used snapshot or an error.", file);
|
||||
context.write(INCONSISTENT_ARCHIVE_STALE, encodeRows(context, key, rows));
|
||||
}
|
||||
} catch (IOException e) {
|
||||
LOG.warn("Found file '{}' in archive area, but got an error while checking "
|
||||
+ "on back references.", file, e);
|
||||
context.write(INCONSISTENT_ARCHIVE_IOE, encodeRows(context, key, rows));
|
||||
}
|
||||
|
||||
} else {
|
||||
// check for an hlink in the active mob area (from a snapshot of a different table)
|
||||
try {
|
||||
/**
|
||||
* we are doing this ourselves instead of using FSUtils.getReferenceFilePaths because
|
||||
* we know the mob region never splits, so we can only have HFileLink references
|
||||
* and looking for just them is cheaper then listing everything.
|
||||
*
|
||||
* This glob should match the naming convention for HFileLinks to our referenced hfile.
|
||||
* As simplified explanation those file names look like "table=region-hfile". For details
|
||||
* see the {@link HFileLink#createHFileLinkName HFileLink implementation}.
|
||||
*/
|
||||
FileStatus[] hlinks = mob.getFileSystem(conf).globStatus(new Path(mob + "/*=*-" + file));
|
||||
if (hlinks != null && hlinks.length != 0) {
|
||||
if (hlinks.length != 1) {
|
||||
LOG.warn("Found file '{}' as hfilelinks in the mob area, but there are more than " +
|
||||
"one: {}", file, Arrays.deepToString(hlinks));
|
||||
}
|
||||
HFileLink found = null;
|
||||
for (FileStatus hlink : hlinks) {
|
||||
HFileLink tmp = HFileLink.buildFromHFileLinkPattern(conf, hlink.getPath());
|
||||
if (tmp.exists(archive.getFileSystem(conf))) {
|
||||
found = tmp;
|
||||
break;
|
||||
} else {
|
||||
LOG.debug("Target file does not exist for ref {}", tmp);
|
||||
}
|
||||
}
|
||||
if (found != null) {
|
||||
LOG.debug("Found file '{}' as a ref in the mob area: {}", file, found);
|
||||
context.write(OK_HLINK_CLONE, key);
|
||||
} else {
|
||||
LOG.warn("Found file '{}' as ref(s) in the mob area but they do not point to an hfile"
|
||||
+ " that exists.", file);
|
||||
context.write(DATALOSS_HLINK_DANGLING, encodeRows(context, key, rows));
|
||||
}
|
||||
} else {
|
||||
LOG.error("Could not find referenced file '{}'. See the docs on this tool.", file);
|
||||
LOG.debug("Note that we don't have the server-side tag from the mob cells that says "
|
||||
+ "what table the reference is originally from. So if the HFileLink in this table "
|
||||
+ "is missing but the referenced file is still in the table from that tag, then "
|
||||
+ "lookups of these impacted rows will work. Do a scan of the reference details "
|
||||
+ "of the cell for the hfile name and then check the entire hbase install if this "
|
||||
+ "table was made from a snapshot of another table. see the ref guide section on "
|
||||
+ "mob for details.");
|
||||
context.write(DATALOSS_MISSING, encodeRows(context, key, rows));
|
||||
}
|
||||
} catch (IOException e) {
|
||||
LOG.error(
|
||||
"Exception while checking mob area of our table for HFileLinks that point to {}",
|
||||
file, e);
|
||||
context.write(DATALOSS_MISSING_IOE, encodeRows(context, key, rows));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* reuses the passed Text key. appends the configured seperator and then a comma seperated list
|
||||
* of base64 encoded row keys
|
||||
*/
|
||||
private Text encodeRows(Context context, Text key, Iterable<ImmutableBytesWritable> rows)
|
||||
throws IOException {
|
||||
StringBuilder sb = new StringBuilder(key.toString());
|
||||
sb.append(seperator);
|
||||
boolean moreThanOne = false;
|
||||
long count = 0;
|
||||
for (ImmutableBytesWritable row : rows) {
|
||||
if (moreThanOne) {
|
||||
sb.append(",");
|
||||
}
|
||||
sb.append(base64.encodeToString(row.copyBytes()));
|
||||
moreThanOne = true;
|
||||
count++;
|
||||
}
|
||||
context.getCounter("PROBLEM", "Problem MOB files").increment(1L);
|
||||
context.getCounter("PROBLEM", "Affected rows").increment(count);
|
||||
context.getCounter("ROWS WITH PROBLEMS PER FILE", "Number of HFiles with " +
|
||||
log10GroupedString(count) + "s of affected rows").increment(1L);
|
||||
key.set(sb.toString());
|
||||
return key;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the string representation of the given number after grouping it
|
||||
* into log10 buckets. e.g. 0-9 -> 1, 10-99 -> 10, ..., 100,000-999,999 -> 100,000, etc.
|
||||
*/
|
||||
static String log10GroupedString(long number) {
|
||||
return String.format("%,d", (long)(Math.pow(10d, Math.floor(Math.log10(number)))));
|
||||
}
|
||||
|
||||
/**
|
||||
* Main method for the tool.
|
||||
* @return 0 if success, 1 for bad args. 2 if job aborted with an exception,
|
||||
* 3 if mr job was unsuccessful
|
||||
*/
|
||||
public int run(String[] args) throws IOException, InterruptedException {
|
||||
// TODO make family and table optional
|
||||
if (args.length != 3) {
|
||||
printUsage();
|
||||
return 1;
|
||||
}
|
||||
final String output = args[0];
|
||||
final String tableName = args[1];
|
||||
final String familyName = args[2];
|
||||
final long reportStartTime = EnvironmentEdgeManager.currentTime();
|
||||
Configuration conf = getConf();
|
||||
try {
|
||||
FileSystem fs = FileSystem.get(conf);
|
||||
// check whether the current user is the same one with the owner of hbase root
|
||||
String currentUserName = UserGroupInformation.getCurrentUser().getShortUserName();
|
||||
FileStatus[] hbaseRootFileStat = fs.listStatus(new Path(conf.get(HConstants.HBASE_DIR)));
|
||||
if (hbaseRootFileStat.length > 0) {
|
||||
String owner = hbaseRootFileStat[0].getOwner();
|
||||
if (!owner.equals(currentUserName)) {
|
||||
String errorMsg = "The current user[" + currentUserName
|
||||
+ "] does not have hbase root credentials."
|
||||
+ " If this job fails due to an inability to read HBase's internal directories, "
|
||||
+ "you will need to rerun as a user with sufficient permissions. The HBase superuser "
|
||||
+ "is a safe choice.";
|
||||
LOG.warn(errorMsg);
|
||||
}
|
||||
} else {
|
||||
LOG.error("The passed configs point to an HBase dir does not exist: {}",
|
||||
conf.get(HConstants.HBASE_DIR));
|
||||
throw new IOException("The target HBase does not exist");
|
||||
}
|
||||
|
||||
byte[] family;
|
||||
int maxVersions;
|
||||
TableName tn = TableName.valueOf(tableName);
|
||||
try (Connection connection = ConnectionFactory.createConnection(conf);
|
||||
Admin admin = connection.getAdmin()) {
|
||||
TableDescriptor htd = admin.getDescriptor(tn);
|
||||
ColumnFamilyDescriptor hcd = htd.getColumnFamily(Bytes.toBytes(familyName));
|
||||
if (hcd == null || !hcd.isMobEnabled()) {
|
||||
throw new IOException("Column family " + familyName + " is not a MOB column family");
|
||||
}
|
||||
family = hcd.getName();
|
||||
maxVersions = hcd.getMaxVersions();
|
||||
}
|
||||
|
||||
|
||||
String id = getClass().getSimpleName() + UUID.randomUUID().toString().replace("-", "");
|
||||
Job job = null;
|
||||
Scan scan = new Scan();
|
||||
scan.addFamily(family);
|
||||
// Do not retrieve the mob data when scanning
|
||||
scan.setAttribute(MobConstants.MOB_SCAN_RAW, Bytes.toBytes(Boolean.TRUE));
|
||||
scan.setAttribute(MobConstants.MOB_SCAN_REF_ONLY, Bytes.toBytes(Boolean.TRUE));
|
||||
// If a scanner caching value isn't set, pick a smaller default since we know we're doing
|
||||
// a full table scan and don't want to impact other clients badly.
|
||||
scan.setCaching(conf.getInt(HConstants.HBASE_CLIENT_SCANNER_CACHING, 10000));
|
||||
scan.setCacheBlocks(false);
|
||||
scan.setMaxVersions(maxVersions);
|
||||
conf.set(REPORT_JOB_ID, id);
|
||||
|
||||
job = Job.getInstance(conf);
|
||||
job.setJarByClass(getClass());
|
||||
TableMapReduceUtil.initTableMapperJob(tn, scan,
|
||||
MobRefMapper.class, Text.class, ImmutableBytesWritable.class, job);
|
||||
|
||||
job.setReducerClass(MobRefReducer.class);
|
||||
job.setOutputFormatClass(TextOutputFormat.class);
|
||||
TextOutputFormat.setOutputPath(job, new Path(output));
|
||||
|
||||
job.setJobName(getClass().getSimpleName() + "-" + tn + "-" + familyName);
|
||||
// for use in the reducer. easier than re-parsing it out of the scan string.
|
||||
job.getConfiguration().set(TableInputFormat.SCAN_COLUMN_FAMILY, familyName);
|
||||
|
||||
// Use when we start this job as the base point for file "recency".
|
||||
job.getConfiguration().setLong(REPORT_START_DATETIME, reportStartTime);
|
||||
|
||||
if (job.waitForCompletion(true)) {
|
||||
LOG.info("Finished creating report for '{}', family='{}'", tn, familyName);
|
||||
} else {
|
||||
System.err.println("Job was not successful");
|
||||
return 3;
|
||||
}
|
||||
return 0;
|
||||
|
||||
} catch (ClassNotFoundException | RuntimeException | IOException | InterruptedException e) {
|
||||
System.err.println("Job aborted due to exception " + e);
|
||||
return 2; // job failed
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
Configuration conf = HBaseConfiguration.create();
|
||||
int ret = ToolRunner.run(conf, new MobRefReporter(), args);
|
||||
System.exit(ret);
|
||||
}
|
||||
|
||||
private void printUsage() {
|
||||
System.err.println("Usage:\n" + "--------------------------\n" + MobRefReporter.class.getName()
|
||||
+ " output-dir tableName familyName");
|
||||
System.err.println(" output-dir Where to write output report.");
|
||||
System.err.println(" tableName The table name");
|
||||
System.err.println(" familyName The column family name");
|
||||
}
|
||||
|
||||
}
|
|
@ -508,7 +508,7 @@ public class HFileLink extends FileLink {
|
|||
return new Path(new Path(regionDir, familyPath.getName()), linkName);
|
||||
}
|
||||
|
||||
static Pair<TableName, String> parseBackReferenceName(String name) {
|
||||
public static Pair<TableName, String> parseBackReferenceName(String name) {
|
||||
int separatorIndex = name.indexOf('.');
|
||||
String linkRegionName = name.substring(0, separatorIndex);
|
||||
String tableSubstr = name.substring(separatorIndex + 1)
|
||||
|
|
|
@ -198,3 +198,116 @@ hbase> major_compact 't1', 'c1’, ‘MOB’
|
|||
|
||||
These commands are also available via `Admin.compact` and
|
||||
`Admin.majorCompact` methods.
|
||||
|
||||
=== MOB architecture
|
||||
|
||||
This section is derived from information found in
|
||||
link:https://issues.apache.org/jira/browse/HBASE-11339[HBASE-11339]. For more information see
|
||||
the attachment on that issue
|
||||
"link:https://issues.apache.org/jira/secure/attachment/12724468/HBase%20MOB%20Design-v5.pdf[Base MOB Design-v5.pdf]".
|
||||
|
||||
==== Overview
|
||||
The MOB feature reduces the overall IO load for configured column families by storing values that
|
||||
are larger than the configured threshold outside of the normal regions to avoid splits, merges, and
|
||||
most importantly normal compactions.
|
||||
|
||||
When a cell is first written to a region it is stored in the WAL and memstore regardless of value
|
||||
size. When memstores from a column family configured to use MOB are eventually flushed two hfiles
|
||||
are written simultaneously. Cells with a value smaller than the threshold size are written to a
|
||||
normal region hfile. Cells with a value larger than the threshold are written into a special MOB
|
||||
hfile and also have a MOB reference cell written into the normal region HFile.
|
||||
|
||||
MOB reference cells have the same key as the cell they are based on. The value of the reference cell
|
||||
is made up of two pieces of metadata: the size of the actual value and the MOB hfile that contains
|
||||
the original cell. In addition to any tags originally written to HBase, the reference cell prepends
|
||||
two additional tags. The first is a marker tag that says the cell is a MOB reference. This can be
|
||||
used later to scan specifically just for reference cells. The second stores the namespace and table
|
||||
at the time the MOB hfile is written out. This tag is used to optimize how the MOB system finds
|
||||
the underlying value in MOB hfiles after a series of HBase snapshot operations (ref HBASE-12332).
|
||||
Note that tags are only available within HBase servers and by default are not sent over RPCs.
|
||||
|
||||
All MOB hfiles for a given table are managed within a logical region that does not directly serve
|
||||
requests. When these MOB hfiles are created from a flush or MOB compaction they are placed in a
|
||||
dedicated mob data area under the hbase root directory specific to the namespace, table, mob
|
||||
logical region, and column family. In general that means a path structured like:
|
||||
|
||||
----
|
||||
%HBase Root Dir%/mobdir/data/%namespace%/%table%/%logical region%/%column family%/
|
||||
----
|
||||
|
||||
With default configs, an example table named 'some_table' in the
|
||||
default namespace with a MOB enabled column family named 'foo' this HDFS directory would be
|
||||
|
||||
----
|
||||
/hbase/mobdir/data/default/some_table/372c1b27e3dc0b56c3a031926e5efbe9/foo/
|
||||
----
|
||||
|
||||
These MOB hfiles are maintained by special chores in the HBase Master rather than by any individual
|
||||
Region Server. Specifically those chores take care of enforcing TTLs and compacting them. Note that
|
||||
this compaction is primarily a matter of controlling the total number of files in HDFS because our
|
||||
operational assumptions for MOB data is that it will seldom update or delete.
|
||||
|
||||
When a given MOB hfile is no longer needed as a result of our compaction process it is archived just
|
||||
like any normal hfile. Because the table's mob region is independent of all the normal regions it
|
||||
can coexist with them in the regular archive storage area:
|
||||
|
||||
----
|
||||
/hbase/archive/data/default/some_table/372c1b27e3dc0b56c3a031926e5efbe9/foo/
|
||||
----
|
||||
|
||||
The same hfile cleaning chores that take care of eventually deleting unneeded archived files from
|
||||
normal regions thus also will take care of these MOB hfiles.
|
||||
|
||||
=== MOB Troubleshooting
|
||||
|
||||
==== Retrieving MOB metadata through the HBase Shell
|
||||
|
||||
While working on troubleshooting failures in the MOB system you can retrieve some of the internal
|
||||
information through the HBase shell by specifying special attributes on a scan.
|
||||
|
||||
----
|
||||
hbase(main):112:0> scan 'some_table', {STARTROW => '00012-example-row-key', LIMIT => 1,
|
||||
hbase(main):113:1* CACHE_BLOCKS => false, ATTRIBUTES => { 'hbase.mob.scan.raw' => '1',
|
||||
hbase(main):114:2* 'hbase.mob.scan.ref.only' => '1' } }
|
||||
----
|
||||
|
||||
The MOB internal information is stored as four bytes for the size of the underlying cell value and
|
||||
then a UTF8 string with the name of the MOB HFile that contains the underlying cell value. Note that
|
||||
by default the entirety of this serialized structure will be passed through the HBase shell's binary
|
||||
string converter. That means the bytes that make up the value size will most likely be written as
|
||||
escaped non-printable byte values, e.g. '\x03', unless they happen to correspond to ASCII
|
||||
characters.
|
||||
|
||||
Let's look at a specific example:
|
||||
|
||||
----
|
||||
hbase(main):112:0> scan 'some_table', {STARTROW => '00012-example-row-key', LIMIT => 1,
|
||||
hbase(main):113:1* CACHE_BLOCKS => false, ATTRIBUTES => { 'hbase.mob.scan.raw' => '1',
|
||||
hbase(main):114:2* 'hbase.mob.scan.ref.only' => '1' } }
|
||||
ROW COLUMN+CELL
|
||||
00012-example-row-key column=foo:bar, timestamp=1511179764, value=\x00\x02|\x94d41d8cd98f00b204
|
||||
e9800998ecf8427e19700118ffd9c244fe69488bbc9f2c77d24a3e6a
|
||||
1 row(s) in 0.0130 seconds
|
||||
----
|
||||
|
||||
In this case the first four bytes are `\x00\x02|\x94` which corresponds to the bytes
|
||||
`[0x00, 0x02, 0x7C, 0x94]`. (Note that the third byte was printed as the ASCII character '|'.)
|
||||
Decoded as an integer this gives us an underlying value size of 162,964 bytes.
|
||||
|
||||
The remaining bytes give us an HFile name,
|
||||
'd41d8cd98f00b204e9800998ecf8427e19700118ffd9c244fe69488bbc9f2c77d24a3e6a'. This HFile will most
|
||||
likely be stored in the designated MOB storage area for this specific table. However, the file could
|
||||
also be in the archive area if this table is from a restored snapshot. Furthermore, if the table is
|
||||
from a cloned snapshot of a different table then the file could be in either the active or archive
|
||||
area of that source table. As mentioned in the explanation of MOB reference cells above, the Region
|
||||
Server will use a server side tag to optimize looking at the mob and archive area of the correct
|
||||
original table when finding the MOB HFile. Since your scan is client side it can't retrieve that tag
|
||||
and you'll either need to already know the lineage of your table or you'll need to search across all
|
||||
tables.
|
||||
|
||||
Assuming you are authenticated as a user with HBase superuser rights, you can search for it:
|
||||
----
|
||||
$> hdfs dfs -find /hbase -name \
|
||||
d41d8cd98f00b204e9800998ecf8427e19700118ffd9c244fe69488bbc9f2c77d24a3e6a
|
||||
/hbase/mobdir/data/default/some_table/372c1b27e3dc0b56c3a031926e5efbe9/foo/d41d8cd98f00b204e9800998ecf8427e19700118ffd9c244fe69488bbc9f2c77d24a3e6a
|
||||
----
|
||||
|
|
Loading…
Reference in New Issue