HBASE-23239 Reporting on status of backing MOB files from client-facing cells (#785)
* Adds a new MapReduce job that builds a report of health of mob files
* Also builds background information on mob system use
* add a basic mob architecture in the ref guide to explain how mob takes the reference hfile value and finds the actual cell contents
* add a troubleshooting mob section to the ref guide to explain how to do a mob reference scan.
Signed-off-by: Peter Somogyi <psomogyi@apache.org>
(cherry picked from commit bc2f162749
)
This commit is contained in:
parent
a3fcc8badb
commit
79134c8a3e
|
@ -20,6 +20,7 @@ package org.apache.hadoop.hbase.mapreduce;
|
||||||
|
|
||||||
import org.apache.hadoop.hbase.HBaseInterfaceAudience;
|
import org.apache.hadoop.hbase.HBaseInterfaceAudience;
|
||||||
import org.apache.hadoop.hbase.mapreduce.replication.VerifyReplication;
|
import org.apache.hadoop.hbase.mapreduce.replication.VerifyReplication;
|
||||||
|
import org.apache.hadoop.hbase.mob.mapreduce.MobRefReporter;
|
||||||
import org.apache.hadoop.hbase.snapshot.ExportSnapshot;
|
import org.apache.hadoop.hbase.snapshot.ExportSnapshot;
|
||||||
import org.apache.hadoop.hbase.tool.BulkLoadHFilesTool;
|
import org.apache.hadoop.hbase.tool.BulkLoadHFilesTool;
|
||||||
import org.apache.hadoop.util.ProgramDriver;
|
import org.apache.hadoop.util.ProgramDriver;
|
||||||
|
@ -55,6 +56,8 @@ public class Driver {
|
||||||
pgd.addClass(WALPlayer.NAME, WALPlayer.class, "Replay WAL files.");
|
pgd.addClass(WALPlayer.NAME, WALPlayer.class, "Replay WAL files.");
|
||||||
pgd.addClass(ExportSnapshot.NAME, ExportSnapshot.class, "Export" +
|
pgd.addClass(ExportSnapshot.NAME, ExportSnapshot.class, "Export" +
|
||||||
" the specific snapshot to a given FileSystem.");
|
" the specific snapshot to a given FileSystem.");
|
||||||
|
pgd.addClass(MobRefReporter.NAME, MobRefReporter.class, "Check the mob cells in a particular " +
|
||||||
|
"table and cf and confirm that the files they point to are correct.");
|
||||||
|
|
||||||
ProgramDriver.class.getMethod("driver", new Class [] {String[].class}).
|
ProgramDriver.class.getMethod("driver", new Class [] {String[].class}).
|
||||||
invoke(pgd, new Object[]{args});
|
invoke(pgd, new Object[]{args});
|
||||||
|
|
|
@ -0,0 +1,509 @@
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hbase.mob.mapreduce;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Base64;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.UUID;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.conf.Configured;
|
||||||
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.hbase.Cell;
|
||||||
|
import org.apache.hadoop.hbase.HBaseConfiguration;
|
||||||
|
import org.apache.hadoop.hbase.HConstants;
|
||||||
|
import org.apache.hadoop.hbase.TableName;
|
||||||
|
import org.apache.hadoop.hbase.client.Admin;
|
||||||
|
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
|
||||||
|
import org.apache.hadoop.hbase.client.Connection;
|
||||||
|
import org.apache.hadoop.hbase.client.ConnectionFactory;
|
||||||
|
import org.apache.hadoop.hbase.client.Result;
|
||||||
|
import org.apache.hadoop.hbase.client.Scan;
|
||||||
|
import org.apache.hadoop.hbase.client.TableDescriptor;
|
||||||
|
import org.apache.hadoop.hbase.io.HFileLink;
|
||||||
|
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
|
||||||
|
import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
|
||||||
|
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
|
||||||
|
import org.apache.hadoop.hbase.mapreduce.TableMapper;
|
||||||
|
import org.apache.hadoop.hbase.mob.MobConstants;
|
||||||
|
import org.apache.hadoop.hbase.mob.MobUtils;
|
||||||
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
|
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
|
||||||
|
import org.apache.hadoop.hbase.util.FSUtils;
|
||||||
|
import org.apache.hadoop.hbase.util.HFileArchiveUtil;
|
||||||
|
import org.apache.hadoop.hbase.util.Pair;
|
||||||
|
import org.apache.hadoop.io.Text;
|
||||||
|
import org.apache.hadoop.mapreduce.Job;
|
||||||
|
import org.apache.hadoop.mapreduce.Reducer;
|
||||||
|
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
|
||||||
|
import org.apache.hadoop.security.UserGroupInformation;
|
||||||
|
import org.apache.hadoop.util.Tool;
|
||||||
|
import org.apache.hadoop.util.ToolRunner;
|
||||||
|
import org.apache.yetus.audience.InterfaceAudience;
|
||||||
|
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Scans a given table + CF for all mob reference cells to get the list of backing mob files.
|
||||||
|
* For each referenced file we attempt to verify that said file is on the FileSystem in a place
|
||||||
|
* that the MOB system will look when attempting to resolve the actual value.
|
||||||
|
*
|
||||||
|
* The job includes counters that can help provide a rough sketch of the mob data.
|
||||||
|
*
|
||||||
|
* <pre>
|
||||||
|
* Map-Reduce Framework
|
||||||
|
* Map input records=10000
|
||||||
|
* ...
|
||||||
|
* Reduce output records=99
|
||||||
|
* ...
|
||||||
|
* CELLS PER ROW
|
||||||
|
* Number of rows with 1s of cells per row=10000
|
||||||
|
* MOB
|
||||||
|
* NUM_CELLS=52364
|
||||||
|
* PROBLEM
|
||||||
|
* Affected rows=338
|
||||||
|
* Problem MOB files=2
|
||||||
|
* ROWS WITH PROBLEMS PER FILE
|
||||||
|
* Number of HFiles with 100s of affected rows=2
|
||||||
|
* SIZES OF CELLS
|
||||||
|
* Number of cells with size in the 10,000s of bytes=627
|
||||||
|
* Number of cells with size in the 100,000s of bytes=51392
|
||||||
|
* Number of cells with size in the 1,000,000s of bytes=345
|
||||||
|
* SIZES OF ROWS
|
||||||
|
* Number of rows with total size in the 100,000s of bytes=6838
|
||||||
|
* Number of rows with total size in the 1,000,000s of bytes=3162
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
|
* * Map-Reduce Framework:Map input records - the number of rows with mob references
|
||||||
|
* * Map-Reduce Framework:Reduce output records - the number of unique hfiles referenced
|
||||||
|
* * MOB:NUM_CELLS - the total number of mob reference cells
|
||||||
|
* * PROBLEM:Affected rows - the number of rows that reference hfiles with an issue
|
||||||
|
* * PROBLEM:Problem MOB files - the number of unique hfiles that have an issue
|
||||||
|
* * CELLS PER ROW: - this counter group gives a histogram of the order of magnitude of the
|
||||||
|
* number of cells in a given row by grouping by the number of digits used in each count.
|
||||||
|
* This allows us to see more about the distribution of cells than what we can determine
|
||||||
|
* with just the cell count and the row count. In this particular example we can see that
|
||||||
|
* all of our rows have somewhere between 1 - 9 cells.
|
||||||
|
* * ROWS WITH PROBLEMS PER FILE: - this counter group gives a histogram of the order of
|
||||||
|
* magnitude of the number of rows in each of the hfiles with a problem. e.g. in the
|
||||||
|
* example there are 2 hfiles and they each have the same order of magnitude number of rows,
|
||||||
|
* specifically between 100 and 999.
|
||||||
|
* * SIZES OF CELLS: - this counter group gives a histogram of the order of magnitude of
|
||||||
|
* the size of mob values according to our reference cells. e.g. in the example above we
|
||||||
|
* have cell sizes that are all between 10,000 bytes and 9,999,999 bytes. From this
|
||||||
|
* histogram we can also see that _most_ cells are 100,000 - 999,000 bytes and the smaller
|
||||||
|
* and bigger ones are outliers making up less than 2% of mob cells.
|
||||||
|
* * SIZES OF ROWS: - this counter group gives a histogram of the order of magnitude of the
|
||||||
|
* size of mob values across each row according to our reference cells. In the example above
|
||||||
|
* we have rows that are are between 100,000 bytes and 9,999,999 bytes. We can also see that
|
||||||
|
* about 2/3rd of our rows are 100,000 - 999,999 bytes.
|
||||||
|
*
|
||||||
|
* Generates a report that gives one file status per line, with tabs dividing fields.
|
||||||
|
*
|
||||||
|
* <pre>
|
||||||
|
* RESULT OF LOOKUP FILE REF comma seperated, base64 encoded rows when there's a problem
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
|
* e.g.
|
||||||
|
*
|
||||||
|
* <pre>
|
||||||
|
* MOB DIR 09c576e28a65ed2ead0004d192ffaa382019110184b30a1c7e034573bf8580aef8393402
|
||||||
|
* MISSING FILE 28e252d7f013973174750d483d358fa020191101f73536e7133f4cd3ab1065edf588d509 MmJiMjMyYzBiMTNjNzc0OTY1ZWY4NTU4ZjBmYmQ2MTUtNTIz,MmEzOGE0YTkzMTZjNDllNWE4MzM1MTdjNDVkMzEwNzAtODg=
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
|
* Possible results are listed; the first three indicate things are working properly.
|
||||||
|
* * MOB DIR - the reference is in the normal MOB area for the given table and CF
|
||||||
|
* * HLINK TO ARCHIVE FOR SAME TABLE - the reference is present in the archive area for this
|
||||||
|
* table and CF
|
||||||
|
* * HLINK TO ARCHIVE FOR OTHER TABLE - the reference is present in a different table and CF,
|
||||||
|
* either in the MOB or archive areas (e.g. from a snapshot restore or clone)
|
||||||
|
* * ARCHIVE WITH HLINK BUT NOT FROM OUR TABLE - the reference is currently present in the archive
|
||||||
|
* area for this table and CF, but it is kept there because a _different_ table has a
|
||||||
|
* reference to it (e.g. from a snapshot clone). If these other tables are removed then
|
||||||
|
* the file will likely be deleted unless there is a snapshot also referencing it.
|
||||||
|
* * ARCHIVE BUT NO HLINKS - the reference is currently present in the archive for this table and
|
||||||
|
* CF, but there are no references present to prevent its removal. Unless it is newer than
|
||||||
|
* the general TTL (default 5 minutes) or referenced in a snapshot it will be subject to
|
||||||
|
* cleaning.
|
||||||
|
* * ARCHIVE BUT FAILURE WHILE CHECKING HLINKS - Check the job logs to see why things failed while
|
||||||
|
* looking for why this file is being kept around.
|
||||||
|
* * MISSING FILE - We couldn't find the reference on the FileSystem. Either there is dataloss due
|
||||||
|
* to a bug in the MOB storage system or the MOB storage is damaged but in an edge case that
|
||||||
|
* allows it to work for now. You can verify which by doing a raw reference scan to get the
|
||||||
|
* referenced hfile and check the underlying filesystem. See the ref guide section on mob
|
||||||
|
* for details.
|
||||||
|
* * HLINK BUT POINT TO MISSING FILE - There is a pointer in our mob area for this table and CF
|
||||||
|
* to a file elsewhere on the FileSystem, however the file it points to no longer exists.
|
||||||
|
* * MISSING FILE BUT FAILURE WHILE CHECKING HLINKS - We could not find the referenced file,
|
||||||
|
* however you should check the job logs to see why we couldn't check to see if there is a
|
||||||
|
* pointer to the referenced file in our archive or another table's archive or mob area.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
@InterfaceAudience.Private
|
||||||
|
public class MobRefReporter extends Configured implements Tool {
|
||||||
|
private static Logger LOG = LoggerFactory.getLogger(MobRefReporter.class);
|
||||||
|
public static final String NAME = "mobrefs";
|
||||||
|
static final String REPORT_JOB_ID = "mob.report.job.id";
|
||||||
|
static final String REPORT_START_DATETIME = "mob.report.job.start";
|
||||||
|
|
||||||
|
public static class MobRefMapper extends TableMapper<Text, ImmutableBytesWritable> {
|
||||||
|
@Override
|
||||||
|
public void map(ImmutableBytesWritable r, Result columns, Context context) throws IOException,
|
||||||
|
InterruptedException {
|
||||||
|
if (columns == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
Cell[] cells = columns.rawCells();
|
||||||
|
if (cells == null || cells.length == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
Set<String> files = new HashSet<>();
|
||||||
|
long count = 0;
|
||||||
|
long size = 0;
|
||||||
|
for (Cell c : cells) {
|
||||||
|
if (MobUtils.hasValidMobRefCellValue(c)) {
|
||||||
|
// TODO confirm there aren't tags
|
||||||
|
String fileName = MobUtils.getMobFileName(c);
|
||||||
|
if (!files.contains(fileName)) {
|
||||||
|
context.write(new Text(fileName), r);
|
||||||
|
files.add(fileName);
|
||||||
|
}
|
||||||
|
final int cellsize = MobUtils.getMobValueLength(c);
|
||||||
|
context.getCounter("SIZES OF CELLS", "Number of cells with size in the " +
|
||||||
|
log10GroupedString(cellsize) + "s of bytes").increment(1L);
|
||||||
|
size += cellsize;
|
||||||
|
count++;
|
||||||
|
} else {
|
||||||
|
LOG.debug("cell is not a mob ref, even though we asked for only refs. cell={}", c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
context.getCounter("CELLS PER ROW", "Number of rows with " + log10GroupedString(count) +
|
||||||
|
"s of cells per row").increment(1L);
|
||||||
|
context.getCounter("SIZES OF ROWS", "Number of rows with total size in the " +
|
||||||
|
log10GroupedString(size) + "s of bytes").increment(1L);
|
||||||
|
context.getCounter("MOB","NUM_CELLS").increment(count);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class MobRefReducer extends
|
||||||
|
Reducer<Text, ImmutableBytesWritable, Text, Text> {
|
||||||
|
|
||||||
|
TableName table;
|
||||||
|
String mobRegion;
|
||||||
|
Path mob;
|
||||||
|
Path archive;
|
||||||
|
String seperator;
|
||||||
|
|
||||||
|
/* Results that mean things are fine */
|
||||||
|
final Text OK_MOB_DIR = new Text("MOB DIR");
|
||||||
|
final Text OK_HLINK_RESTORE = new Text("HLINK TO ARCHIVE FOR SAME TABLE");
|
||||||
|
final Text OK_HLINK_CLONE = new Text("HLINK TO ARCHIVE FOR OTHER TABLE");
|
||||||
|
/* Results that mean something is incorrect */
|
||||||
|
final Text INCONSISTENT_ARCHIVE_BAD_LINK =
|
||||||
|
new Text("ARCHIVE WITH HLINK BUT NOT FROM OUR TABLE");
|
||||||
|
final Text INCONSISTENT_ARCHIVE_STALE = new Text("ARCHIVE BUT NO HLINKS");
|
||||||
|
final Text INCONSISTENT_ARCHIVE_IOE = new Text("ARCHIVE BUT FAILURE WHILE CHECKING HLINKS");
|
||||||
|
/* Results that mean data is probably already gone */
|
||||||
|
final Text DATALOSS_MISSING = new Text("MISSING FILE");
|
||||||
|
final Text DATALOSS_HLINK_DANGLING = new Text("HLINK BUT POINTS TO MISSING FILE");
|
||||||
|
final Text DATALOSS_MISSING_IOE = new Text("MISSING FILE BUT FAILURE WHILE CHECKING HLINKS");
|
||||||
|
final Base64.Encoder base64 = Base64.getEncoder();
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setup(Context context) throws IOException, InterruptedException {
|
||||||
|
final Configuration conf = context.getConfiguration();
|
||||||
|
final String tableName = conf.get(TableInputFormat.INPUT_TABLE);
|
||||||
|
if (null == tableName) {
|
||||||
|
throw new IOException("Job configuration did not include table.");
|
||||||
|
}
|
||||||
|
table = TableName.valueOf(tableName);
|
||||||
|
mobRegion = MobUtils.getMobRegionInfo(table).getEncodedName();
|
||||||
|
final String family = conf.get(TableInputFormat.SCAN_COLUMN_FAMILY);
|
||||||
|
if (null == family) {
|
||||||
|
throw new IOException("Job configuration did not include column family");
|
||||||
|
}
|
||||||
|
mob = MobUtils.getMobFamilyPath(conf, table, family);
|
||||||
|
LOG.info("Using active mob area '{}'", mob);
|
||||||
|
archive = HFileArchiveUtil.getStoreArchivePath(conf, table,
|
||||||
|
MobUtils.getMobRegionInfo(table).getEncodedName(), family);
|
||||||
|
LOG.info("Using archive mob area '{}'", archive);
|
||||||
|
seperator = conf.get(TextOutputFormat.SEPERATOR, "\t");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void reduce(Text key, Iterable<ImmutableBytesWritable> rows, Context context)
|
||||||
|
throws IOException, InterruptedException {
|
||||||
|
final Configuration conf = context.getConfiguration();
|
||||||
|
final String file = key.toString();
|
||||||
|
// active mob area
|
||||||
|
if (mob.getFileSystem(conf).exists(new Path(mob, file))) {
|
||||||
|
LOG.debug("Found file '{}' in mob area", file);
|
||||||
|
context.write(OK_MOB_DIR, key);
|
||||||
|
// archive area - is there an hlink back reference (from a snapshot from same table)
|
||||||
|
} else if (archive.getFileSystem(conf).exists(new Path(archive, file))) {
|
||||||
|
|
||||||
|
Path backRefDir = HFileLink.getBackReferencesDir(archive, file);
|
||||||
|
try {
|
||||||
|
FileStatus[] backRefs = FSUtils.listStatus(archive.getFileSystem(conf), backRefDir);
|
||||||
|
if (backRefs != null) {
|
||||||
|
boolean found = false;
|
||||||
|
for (FileStatus backRef : backRefs) {
|
||||||
|
Pair<TableName, String> refParts = HFileLink.parseBackReferenceName(
|
||||||
|
backRef.getPath().getName());
|
||||||
|
if (table.equals(refParts.getFirst()) && mobRegion.equals(refParts.getSecond())) {
|
||||||
|
Path hlinkPath = HFileLink.getHFileFromBackReference(MobUtils.getMobHome(conf),
|
||||||
|
backRef.getPath());
|
||||||
|
if (hlinkPath.getFileSystem(conf).exists(hlinkPath)) {
|
||||||
|
found = true;
|
||||||
|
} else {
|
||||||
|
LOG.warn("Found file '{}' in archive area with a back reference to the mob area "
|
||||||
|
+ "for our table, but the mob area does not have a corresponding hfilelink.",
|
||||||
|
file);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (found) {
|
||||||
|
LOG.debug("Found file '{}' in archive area. has proper hlink back references to "
|
||||||
|
+ "suggest it is from a restored snapshot for this table.", file);
|
||||||
|
context.write(OK_HLINK_RESTORE, key);
|
||||||
|
} else {
|
||||||
|
LOG.warn("Found file '{}' in archive area, but the hlink back references do not "
|
||||||
|
+ "properly point to the mob area for our table.", file);
|
||||||
|
context.write(INCONSISTENT_ARCHIVE_BAD_LINK, encodeRows(context, key, rows));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
LOG.warn("Found file '{}' in archive area, but there are no hlinks pointing to it. Not "
|
||||||
|
+ "yet used snapshot or an error.", file);
|
||||||
|
context.write(INCONSISTENT_ARCHIVE_STALE, encodeRows(context, key, rows));
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
LOG.warn("Found file '{}' in archive area, but got an error while checking "
|
||||||
|
+ "on back references.", file, e);
|
||||||
|
context.write(INCONSISTENT_ARCHIVE_IOE, encodeRows(context, key, rows));
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
// check for an hlink in the active mob area (from a snapshot of a different table)
|
||||||
|
try {
|
||||||
|
/**
|
||||||
|
* we are doing this ourselves instead of using FSUtils.getReferenceFilePaths because
|
||||||
|
* we know the mob region never splits, so we can only have HFileLink references
|
||||||
|
* and looking for just them is cheaper then listing everything.
|
||||||
|
*
|
||||||
|
* This glob should match the naming convention for HFileLinks to our referenced hfile.
|
||||||
|
* As simplified explanation those file names look like "table=region-hfile". For details
|
||||||
|
* see the {@link HFileLink#createHFileLinkName HFileLink implementation}.
|
||||||
|
*/
|
||||||
|
FileStatus[] hlinks = mob.getFileSystem(conf).globStatus(new Path(mob + "/*=*-" + file));
|
||||||
|
if (hlinks != null && hlinks.length != 0) {
|
||||||
|
if (hlinks.length != 1) {
|
||||||
|
LOG.warn("Found file '{}' as hfilelinks in the mob area, but there are more than " +
|
||||||
|
"one: {}", file, Arrays.deepToString(hlinks));
|
||||||
|
}
|
||||||
|
HFileLink found = null;
|
||||||
|
for (FileStatus hlink : hlinks) {
|
||||||
|
HFileLink tmp = HFileLink.buildFromHFileLinkPattern(conf, hlink.getPath());
|
||||||
|
if (tmp.exists(archive.getFileSystem(conf))) {
|
||||||
|
found = tmp;
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
LOG.debug("Target file does not exist for ref {}", tmp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (found != null) {
|
||||||
|
LOG.debug("Found file '{}' as a ref in the mob area: {}", file, found);
|
||||||
|
context.write(OK_HLINK_CLONE, key);
|
||||||
|
} else {
|
||||||
|
LOG.warn("Found file '{}' as ref(s) in the mob area but they do not point to an hfile"
|
||||||
|
+ " that exists.", file);
|
||||||
|
context.write(DATALOSS_HLINK_DANGLING, encodeRows(context, key, rows));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
LOG.error("Could not find referenced file '{}'. See the docs on this tool.", file);
|
||||||
|
LOG.debug("Note that we don't have the server-side tag from the mob cells that says "
|
||||||
|
+ "what table the reference is originally from. So if the HFileLink in this table "
|
||||||
|
+ "is missing but the referenced file is still in the table from that tag, then "
|
||||||
|
+ "lookups of these impacted rows will work. Do a scan of the reference details "
|
||||||
|
+ "of the cell for the hfile name and then check the entire hbase install if this "
|
||||||
|
+ "table was made from a snapshot of another table. see the ref guide section on "
|
||||||
|
+ "mob for details.");
|
||||||
|
context.write(DATALOSS_MISSING, encodeRows(context, key, rows));
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
LOG.error(
|
||||||
|
"Exception while checking mob area of our table for HFileLinks that point to {}",
|
||||||
|
file, e);
|
||||||
|
context.write(DATALOSS_MISSING_IOE, encodeRows(context, key, rows));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* reuses the passed Text key. appends the configured seperator and then a comma seperated list
|
||||||
|
* of base64 encoded row keys
|
||||||
|
*/
|
||||||
|
private Text encodeRows(Context context, Text key, Iterable<ImmutableBytesWritable> rows)
|
||||||
|
throws IOException {
|
||||||
|
StringBuilder sb = new StringBuilder(key.toString());
|
||||||
|
sb.append(seperator);
|
||||||
|
boolean moreThanOne = false;
|
||||||
|
long count = 0;
|
||||||
|
for (ImmutableBytesWritable row : rows) {
|
||||||
|
if (moreThanOne) {
|
||||||
|
sb.append(",");
|
||||||
|
}
|
||||||
|
sb.append(base64.encodeToString(row.copyBytes()));
|
||||||
|
moreThanOne = true;
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
context.getCounter("PROBLEM", "Problem MOB files").increment(1L);
|
||||||
|
context.getCounter("PROBLEM", "Affected rows").increment(count);
|
||||||
|
context.getCounter("ROWS WITH PROBLEMS PER FILE", "Number of HFiles with " +
|
||||||
|
log10GroupedString(count) + "s of affected rows").increment(1L);
|
||||||
|
key.set(sb.toString());
|
||||||
|
return key;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the string representation of the given number after grouping it
|
||||||
|
* into log10 buckets. e.g. 0-9 -> 1, 10-99 -> 10, ..., 100,000-999,999 -> 100,000, etc.
|
||||||
|
*/
|
||||||
|
static String log10GroupedString(long number) {
|
||||||
|
return String.format("%,d", (long)(Math.pow(10d, Math.floor(Math.log10(number)))));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Main method for the tool.
|
||||||
|
* @return 0 if success, 1 for bad args. 2 if job aborted with an exception,
|
||||||
|
* 3 if mr job was unsuccessful
|
||||||
|
*/
|
||||||
|
public int run(String[] args) throws IOException, InterruptedException {
|
||||||
|
// TODO make family and table optional
|
||||||
|
if (args.length != 3) {
|
||||||
|
printUsage();
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
final String output = args[0];
|
||||||
|
final String tableName = args[1];
|
||||||
|
final String familyName = args[2];
|
||||||
|
final long reportStartTime = EnvironmentEdgeManager.currentTime();
|
||||||
|
Configuration conf = getConf();
|
||||||
|
try {
|
||||||
|
FileSystem fs = FileSystem.get(conf);
|
||||||
|
// check whether the current user is the same one with the owner of hbase root
|
||||||
|
String currentUserName = UserGroupInformation.getCurrentUser().getShortUserName();
|
||||||
|
FileStatus[] hbaseRootFileStat = fs.listStatus(new Path(conf.get(HConstants.HBASE_DIR)));
|
||||||
|
if (hbaseRootFileStat.length > 0) {
|
||||||
|
String owner = hbaseRootFileStat[0].getOwner();
|
||||||
|
if (!owner.equals(currentUserName)) {
|
||||||
|
String errorMsg = "The current user[" + currentUserName
|
||||||
|
+ "] does not have hbase root credentials."
|
||||||
|
+ " If this job fails due to an inability to read HBase's internal directories, "
|
||||||
|
+ "you will need to rerun as a user with sufficient permissions. The HBase superuser "
|
||||||
|
+ "is a safe choice.";
|
||||||
|
LOG.warn(errorMsg);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
LOG.error("The passed configs point to an HBase dir does not exist: {}",
|
||||||
|
conf.get(HConstants.HBASE_DIR));
|
||||||
|
throw new IOException("The target HBase does not exist");
|
||||||
|
}
|
||||||
|
|
||||||
|
byte[] family;
|
||||||
|
int maxVersions;
|
||||||
|
TableName tn = TableName.valueOf(tableName);
|
||||||
|
try (Connection connection = ConnectionFactory.createConnection(conf);
|
||||||
|
Admin admin = connection.getAdmin()) {
|
||||||
|
TableDescriptor htd = admin.getDescriptor(tn);
|
||||||
|
ColumnFamilyDescriptor hcd = htd.getColumnFamily(Bytes.toBytes(familyName));
|
||||||
|
if (hcd == null || !hcd.isMobEnabled()) {
|
||||||
|
throw new IOException("Column family " + familyName + " is not a MOB column family");
|
||||||
|
}
|
||||||
|
family = hcd.getName();
|
||||||
|
maxVersions = hcd.getMaxVersions();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
String id = getClass().getSimpleName() + UUID.randomUUID().toString().replace("-", "");
|
||||||
|
Job job = null;
|
||||||
|
Scan scan = new Scan();
|
||||||
|
scan.addFamily(family);
|
||||||
|
// Do not retrieve the mob data when scanning
|
||||||
|
scan.setAttribute(MobConstants.MOB_SCAN_RAW, Bytes.toBytes(Boolean.TRUE));
|
||||||
|
scan.setAttribute(MobConstants.MOB_SCAN_REF_ONLY, Bytes.toBytes(Boolean.TRUE));
|
||||||
|
// If a scanner caching value isn't set, pick a smaller default since we know we're doing
|
||||||
|
// a full table scan and don't want to impact other clients badly.
|
||||||
|
scan.setCaching(conf.getInt(HConstants.HBASE_CLIENT_SCANNER_CACHING, 10000));
|
||||||
|
scan.setCacheBlocks(false);
|
||||||
|
scan.setMaxVersions(maxVersions);
|
||||||
|
conf.set(REPORT_JOB_ID, id);
|
||||||
|
|
||||||
|
job = Job.getInstance(conf);
|
||||||
|
job.setJarByClass(getClass());
|
||||||
|
TableMapReduceUtil.initTableMapperJob(tn, scan,
|
||||||
|
MobRefMapper.class, Text.class, ImmutableBytesWritable.class, job);
|
||||||
|
|
||||||
|
job.setReducerClass(MobRefReducer.class);
|
||||||
|
job.setOutputFormatClass(TextOutputFormat.class);
|
||||||
|
TextOutputFormat.setOutputPath(job, new Path(output));
|
||||||
|
|
||||||
|
job.setJobName(getClass().getSimpleName() + "-" + tn + "-" + familyName);
|
||||||
|
// for use in the reducer. easier than re-parsing it out of the scan string.
|
||||||
|
job.getConfiguration().set(TableInputFormat.SCAN_COLUMN_FAMILY, familyName);
|
||||||
|
|
||||||
|
// Use when we start this job as the base point for file "recency".
|
||||||
|
job.getConfiguration().setLong(REPORT_START_DATETIME, reportStartTime);
|
||||||
|
|
||||||
|
if (job.waitForCompletion(true)) {
|
||||||
|
LOG.info("Finished creating report for '{}', family='{}'", tn, familyName);
|
||||||
|
} else {
|
||||||
|
System.err.println("Job was not successful");
|
||||||
|
return 3;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
} catch (ClassNotFoundException | RuntimeException | IOException | InterruptedException e) {
|
||||||
|
System.err.println("Job aborted due to exception " + e);
|
||||||
|
return 2; // job failed
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
Configuration conf = HBaseConfiguration.create();
|
||||||
|
int ret = ToolRunner.run(conf, new MobRefReporter(), args);
|
||||||
|
System.exit(ret);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void printUsage() {
|
||||||
|
System.err.println("Usage:\n" + "--------------------------\n" + MobRefReporter.class.getName()
|
||||||
|
+ " output-dir tableName familyName");
|
||||||
|
System.err.println(" output-dir Where to write output report.");
|
||||||
|
System.err.println(" tableName The table name");
|
||||||
|
System.err.println(" familyName The column family name");
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -508,7 +508,7 @@ public class HFileLink extends FileLink {
|
||||||
return new Path(new Path(regionDir, familyPath.getName()), linkName);
|
return new Path(new Path(regionDir, familyPath.getName()), linkName);
|
||||||
}
|
}
|
||||||
|
|
||||||
static Pair<TableName, String> parseBackReferenceName(String name) {
|
public static Pair<TableName, String> parseBackReferenceName(String name) {
|
||||||
int separatorIndex = name.indexOf('.');
|
int separatorIndex = name.indexOf('.');
|
||||||
String linkRegionName = name.substring(0, separatorIndex);
|
String linkRegionName = name.substring(0, separatorIndex);
|
||||||
String tableSubstr = name.substring(separatorIndex + 1)
|
String tableSubstr = name.substring(separatorIndex + 1)
|
||||||
|
|
|
@ -198,3 +198,116 @@ hbase> major_compact 't1', 'c1’, ‘MOB’
|
||||||
|
|
||||||
These commands are also available via `Admin.compact` and
|
These commands are also available via `Admin.compact` and
|
||||||
`Admin.majorCompact` methods.
|
`Admin.majorCompact` methods.
|
||||||
|
|
||||||
|
=== MOB architecture
|
||||||
|
|
||||||
|
This section is derived from information found in
|
||||||
|
link:https://issues.apache.org/jira/browse/HBASE-11339[HBASE-11339]. For more information see
|
||||||
|
the attachment on that issue
|
||||||
|
"link:https://issues.apache.org/jira/secure/attachment/12724468/HBase%20MOB%20Design-v5.pdf[Base MOB Design-v5.pdf]".
|
||||||
|
|
||||||
|
==== Overview
|
||||||
|
The MOB feature reduces the overall IO load for configured column families by storing values that
|
||||||
|
are larger than the configured threshold outside of the normal regions to avoid splits, merges, and
|
||||||
|
most importantly normal compactions.
|
||||||
|
|
||||||
|
When a cell is first written to a region it is stored in the WAL and memstore regardless of value
|
||||||
|
size. When memstores from a column family configured to use MOB are eventually flushed two hfiles
|
||||||
|
are written simultaneously. Cells with a value smaller than the threshold size are written to a
|
||||||
|
normal region hfile. Cells with a value larger than the threshold are written into a special MOB
|
||||||
|
hfile and also have a MOB reference cell written into the normal region HFile.
|
||||||
|
|
||||||
|
MOB reference cells have the same key as the cell they are based on. The value of the reference cell
|
||||||
|
is made up of two pieces of metadata: the size of the actual value and the MOB hfile that contains
|
||||||
|
the original cell. In addition to any tags originally written to HBase, the reference cell prepends
|
||||||
|
two additional tags. The first is a marker tag that says the cell is a MOB reference. This can be
|
||||||
|
used later to scan specifically just for reference cells. The second stores the namespace and table
|
||||||
|
at the time the MOB hfile is written out. This tag is used to optimize how the MOB system finds
|
||||||
|
the underlying value in MOB hfiles after a series of HBase snapshot operations (ref HBASE-12332).
|
||||||
|
Note that tags are only available within HBase servers and by default are not sent over RPCs.
|
||||||
|
|
||||||
|
All MOB hfiles for a given table are managed within a logical region that does not directly serve
|
||||||
|
requests. When these MOB hfiles are created from a flush or MOB compaction they are placed in a
|
||||||
|
dedicated mob data area under the hbase root directory specific to the namespace, table, mob
|
||||||
|
logical region, and column family. In general that means a path structured like:
|
||||||
|
|
||||||
|
----
|
||||||
|
%HBase Root Dir%/mobdir/data/%namespace%/%table%/%logical region%/%column family%/
|
||||||
|
----
|
||||||
|
|
||||||
|
With default configs, an example table named 'some_table' in the
|
||||||
|
default namespace with a MOB enabled column family named 'foo' this HDFS directory would be
|
||||||
|
|
||||||
|
----
|
||||||
|
/hbase/mobdir/data/default/some_table/372c1b27e3dc0b56c3a031926e5efbe9/foo/
|
||||||
|
----
|
||||||
|
|
||||||
|
These MOB hfiles are maintained by special chores in the HBase Master rather than by any individual
|
||||||
|
Region Server. Specifically those chores take care of enforcing TTLs and compacting them. Note that
|
||||||
|
this compaction is primarily a matter of controlling the total number of files in HDFS because our
|
||||||
|
operational assumptions for MOB data is that it will seldom update or delete.
|
||||||
|
|
||||||
|
When a given MOB hfile is no longer needed as a result of our compaction process it is archived just
|
||||||
|
like any normal hfile. Because the table's mob region is independent of all the normal regions it
|
||||||
|
can coexist with them in the regular archive storage area:
|
||||||
|
|
||||||
|
----
|
||||||
|
/hbase/archive/data/default/some_table/372c1b27e3dc0b56c3a031926e5efbe9/foo/
|
||||||
|
----
|
||||||
|
|
||||||
|
The same hfile cleaning chores that take care of eventually deleting unneeded archived files from
|
||||||
|
normal regions thus also will take care of these MOB hfiles.
|
||||||
|
|
||||||
|
=== MOB Troubleshooting
|
||||||
|
|
||||||
|
==== Retrieving MOB metadata through the HBase Shell
|
||||||
|
|
||||||
|
While working on troubleshooting failures in the MOB system you can retrieve some of the internal
|
||||||
|
information through the HBase shell by specifying special attributes on a scan.
|
||||||
|
|
||||||
|
----
|
||||||
|
hbase(main):112:0> scan 'some_table', {STARTROW => '00012-example-row-key', LIMIT => 1,
|
||||||
|
hbase(main):113:1* CACHE_BLOCKS => false, ATTRIBUTES => { 'hbase.mob.scan.raw' => '1',
|
||||||
|
hbase(main):114:2* 'hbase.mob.scan.ref.only' => '1' } }
|
||||||
|
----
|
||||||
|
|
||||||
|
The MOB internal information is stored as four bytes for the size of the underlying cell value and
|
||||||
|
then a UTF8 string with the name of the MOB HFile that contains the underlying cell value. Note that
|
||||||
|
by default the entirety of this serialized structure will be passed through the HBase shell's binary
|
||||||
|
string converter. That means the bytes that make up the value size will most likely be written as
|
||||||
|
escaped non-printable byte values, e.g. '\x03', unless they happen to correspond to ASCII
|
||||||
|
characters.
|
||||||
|
|
||||||
|
Let's look at a specific example:
|
||||||
|
|
||||||
|
----
|
||||||
|
hbase(main):112:0> scan 'some_table', {STARTROW => '00012-example-row-key', LIMIT => 1,
|
||||||
|
hbase(main):113:1* CACHE_BLOCKS => false, ATTRIBUTES => { 'hbase.mob.scan.raw' => '1',
|
||||||
|
hbase(main):114:2* 'hbase.mob.scan.ref.only' => '1' } }
|
||||||
|
ROW COLUMN+CELL
|
||||||
|
00012-example-row-key column=foo:bar, timestamp=1511179764, value=\x00\x02|\x94d41d8cd98f00b204
|
||||||
|
e9800998ecf8427e19700118ffd9c244fe69488bbc9f2c77d24a3e6a
|
||||||
|
1 row(s) in 0.0130 seconds
|
||||||
|
----
|
||||||
|
|
||||||
|
In this case the first four bytes are `\x00\x02|\x94` which corresponds to the bytes
|
||||||
|
`[0x00, 0x02, 0x7C, 0x94]`. (Note that the third byte was printed as the ASCII character '|'.)
|
||||||
|
Decoded as an integer this gives us an underlying value size of 162,964 bytes.
|
||||||
|
|
||||||
|
The remaining bytes give us an HFile name,
|
||||||
|
'd41d8cd98f00b204e9800998ecf8427e19700118ffd9c244fe69488bbc9f2c77d24a3e6a'. This HFile will most
|
||||||
|
likely be stored in the designated MOB storage area for this specific table. However, the file could
|
||||||
|
also be in the archive area if this table is from a restored snapshot. Furthermore, if the table is
|
||||||
|
from a cloned snapshot of a different table then the file could be in either the active or archive
|
||||||
|
area of that source table. As mentioned in the explanation of MOB reference cells above, the Region
|
||||||
|
Server will use a server side tag to optimize looking at the mob and archive area of the correct
|
||||||
|
original table when finding the MOB HFile. Since your scan is client side it can't retrieve that tag
|
||||||
|
and you'll either need to already know the lineage of your table or you'll need to search across all
|
||||||
|
tables.
|
||||||
|
|
||||||
|
Assuming you are authenticated as a user with HBase superuser rights, you can search for it:
|
||||||
|
----
|
||||||
|
$> hdfs dfs -find /hbase -name \
|
||||||
|
d41d8cd98f00b204e9800998ecf8427e19700118ffd9c244fe69488bbc9f2c77d24a3e6a
|
||||||
|
/hbase/mobdir/data/default/some_table/372c1b27e3dc0b56c3a031926e5efbe9/foo/d41d8cd98f00b204e9800998ecf8427e19700118ffd9c244fe69488bbc9f2c77d24a3e6a
|
||||||
|
----
|
||||||
|
|
Loading…
Reference in New Issue