HBASE-20649 Validate HFiles do not have PREFIX_TREE DataBlockEncoding

Amending-Author: Balazs Meszaros <balazs.meszaros@cloudera.com>

Signed-off-by: Sean Busbey <busbey@apache.org>
This commit is contained in:
Peter Somogyi 2018-06-26 10:47:06 +02:00 committed by Sean Busbey
parent 44f6ef1c90
commit ba5d1c1f28
4 changed files with 234 additions and 6 deletions

View File

@ -76,9 +76,8 @@ public class DataBlockEncodingValidator extends AbstractHBaseTool {
if (incompatibilities > 0) { if (incompatibilities > 0) {
LOG.warn("There are {} column families with incompatible Data Block Encodings. Do not " LOG.warn("There are {} column families with incompatible Data Block Encodings. Do not "
+ "upgrade until these encodings are converted to a supported one.", incompatibilities); + "upgrade until these encodings are converted to a supported one. "
LOG.warn("Check http://hbase.apache.org/book.html#upgrade2.0.prefix-tree.removed " + "Check https://s.apache.org/prefixtree for instructions.", incompatibilities);
+ "for instructions.");
} else { } else {
LOG.info("The used Data Block Encodings are compatible with HBase 2.0."); LOG.info("The used Data Block Encodings are compatible with HBase 2.0.");
} }

View File

@ -0,0 +1,126 @@
/**
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.tool;
import java.io.IOException;
import java.util.Collection;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseInterfaceAudience;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.util.AbstractHBaseTool;
import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.util.Threads;
import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine;
@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
public class HFileContentValidator extends AbstractHBaseTool {
private static final Logger LOG = LoggerFactory.getLogger(HFileContentValidator.class);
/**
* Check HFile contents are readable by HBase 2.
*
* @param conf used configuration
* @return number of HFiles corrupted HBase
* @throws IOException if a remote or network exception occurs
*/
private boolean validateHFileContent(Configuration conf) throws IOException {
FileSystem fileSystem = FSUtils.getCurrentFileSystem(conf);
ExecutorService threadPool = createThreadPool(conf);
HFileCorruptionChecker checker;
try {
checker = new HFileCorruptionChecker(conf, threadPool, false);
Path rootDir = FSUtils.getRootDir(conf);
LOG.info("Validating HFile contents under {}", rootDir);
Collection<Path> tableDirs = FSUtils.getTableDirs(fileSystem, rootDir);
checker.checkTables(tableDirs);
Path archiveRootDir = new Path(rootDir, HConstants.HFILE_ARCHIVE_DIRECTORY);
LOG.info("Validating HFile contents under {}", archiveRootDir);
List<Path> archiveTableDirs = FSUtils.getTableDirs(fileSystem, archiveRootDir);
checker.checkTables(archiveTableDirs);
} finally {
threadPool.shutdown();
try {
threadPool.awaitTermination(Long.MAX_VALUE, TimeUnit.SECONDS);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
}
int checkedFiles = checker.getHFilesChecked();
Collection<Path> corrupted = checker.getCorrupted();
if (corrupted.isEmpty()) {
LOG.info("Checked {} HFiles, none of them are corrupted.", checkedFiles);
LOG.info("There are no incompatible HFiles.");
return true;
} else {
LOG.info("Checked {} HFiles, {} are corrupted.", checkedFiles, corrupted.size());
for (Path path : corrupted) {
LOG.info("Corrupted file: {}", path);
}
LOG.info("Change data block encodings before upgrading. "
+ "Check https://s.apache.org/prefixtree for instructions.");
return false;
}
}
private ExecutorService createThreadPool(Configuration conf) {
int availableProcessors = Runtime.getRuntime().availableProcessors();
int numThreads = conf.getInt("hfilevalidator.numthreads", availableProcessors);
return Executors.newFixedThreadPool(numThreads,
Threads.getNamedThreadFactory("hfile-validator"));
}
@Override
protected void addOptions() {
}
@Override
protected void processOptions(CommandLine cmd) {
}
@Override
protected int doWork() throws Exception {
return (validateHFileContent(getConf())) ? EXIT_SUCCESS : EXIT_FAILURE;
}
}

View File

@ -38,6 +38,7 @@ import org.slf4j.LoggerFactory;
* <ul> * <ul>
* <li>validate-cp: Validates Co-processors compatibility</li> * <li>validate-cp: Validates Co-processors compatibility</li>
* <li>validate-dbe: Check Data Block Encoding for column families</li> * <li>validate-dbe: Check Data Block Encoding for column families</li>
* <li>validate-hfile: Check for corrupted HFiles</li>
* </ul> * </ul>
* </p> * </p>
*/ */
@ -49,6 +50,7 @@ public class PreUpgradeValidator implements Tool {
public static final String TOOL_NAME = "pre-upgrade"; public static final String TOOL_NAME = "pre-upgrade";
public static final String VALIDATE_CP_NAME = "validate-cp"; public static final String VALIDATE_CP_NAME = "validate-cp";
public static final String VALIDATE_DBE_NAME = "validate-dbe"; public static final String VALIDATE_DBE_NAME = "validate-dbe";
public static final String VALIDATE_HFILE = "validate-hfile";
private Configuration configuration; private Configuration configuration;
@ -69,6 +71,8 @@ public class PreUpgradeValidator implements Tool {
VALIDATE_CP_NAME); VALIDATE_CP_NAME);
System.out.printf(" %-15s Validate DataBlockEncodings are compatible with HBase%n", System.out.printf(" %-15s Validate DataBlockEncodings are compatible with HBase%n",
VALIDATE_DBE_NAME); VALIDATE_DBE_NAME);
System.out.printf(" %-15s Validate HFile contents are readable%n",
VALIDATE_HFILE);
System.out.println("For further information, please use command -h"); System.out.println("For further information, please use command -h");
} }
@ -88,6 +92,9 @@ public class PreUpgradeValidator implements Tool {
case VALIDATE_DBE_NAME: case VALIDATE_DBE_NAME:
tool = new DataBlockEncodingValidator(); tool = new DataBlockEncodingValidator();
break; break;
case VALIDATE_HFILE:
tool = new HFileContentValidator();
break;
case "-h": case "-h":
printUsage(); printUsage();
return AbstractHBaseTool.EXIT_FAILURE; return AbstractHBaseTool.EXIT_FAILURE;

View File

@ -898,7 +898,8 @@ $ bin/hbase pre-upgrade validate-cp -table .*
It validates every table level co-processors where the table name matches to `.*` regular expression. It validates every table level co-processors where the table name matches to `.*` regular expression.
==== DataBlockEncoding validation ==== DataBlockEncoding validation
HBase 2.0 removed `PREFIX_TREE` Data Block Encoding from column families. HBase 2.0 removed `PREFIX_TREE` Data Block Encoding from column families. For further information
please check <<upgrade2.0.prefix-tree.removed,_prefix-tree_ encoding removed>>.
To verify that none of the column families are using incompatible Data Block Encodings in the cluster run the following command. To verify that none of the column families are using incompatible Data Block Encodings in the cluster run the following command.
[source, bash] [source, bash]
@ -906,8 +907,103 @@ To verify that none of the column families are using incompatible Data Block Enc
$ bin/hbase pre-upgrade validate-dbe $ bin/hbase pre-upgrade validate-dbe
---- ----
This check validates all column families and print out any incompatibilities. This check validates all column families and print out any incompatibilities. For example:
To change `PREFIX_TREE` encoding to supported one check <<upgrade2.0.prefix-tree.removed,_prefix-tree_ encoding removed>>.
----
2018-07-13 09:58:32,028 WARN [main] tool.DataBlockEncodingValidator: Incompatible DataBlockEncoding for table: t, cf: f, encoding: PREFIX_TREE
----
Which means that Data Block Encoding of table `t`, column family `f` is incompatible. To fix, use `alter` command in HBase shell:
----
alter 't', { NAME => 'f', DATA_BLOCK_ENCODING => 'FAST_DIFF' }
----
Please also validate HFiles, which is described in the next section.
==== HFile Content validation
Even though Data Block Encoding is changed from `PREFIX_TREE` it is still possible to have HFiles that contain data encoded that way.
To verify that HFiles are readable with HBase 2 please use _HFile content validator_.
[source, bash]
----
$ bin/hbase pre-upgrade validate-hfile
----
The tool will log the corrupt HFiles and details about the root cause.
If the problem is about PREFIX_TREE encoding it is necessary to change encodings before upgrading to HBase 2.
The following log message shows an example of incorrect HFiles.
----
2018-06-05 16:20:46,976 WARN [hfilevalidator-pool1-t3] hbck.HFileCorruptionChecker: Found corrupt HFile hdfs://example.com:8020/hbase/data/default/t/72ea7f7d625ee30f959897d1a3e2c350/prefix/7e6b3d73263c4851bf2b8590a9b3791e
org.apache.hadoop.hbase.io.hfile.CorruptHFileException: Problem reading HFile Trailer from file hdfs://example.com:8020/hbase/data/default/t/72ea7f7d625ee30f959897d1a3e2c350/prefix/7e6b3d73263c4851bf2b8590a9b3791e
...
Caused by: java.io.IOException: Invalid data block encoding type in file info: PREFIX_TREE
...
Caused by: java.lang.IllegalArgumentException: No enum constant org.apache.hadoop.hbase.io.encoding.DataBlockEncoding.PREFIX_TREE
...
2018-06-05 16:20:47,322 INFO [main] tool.HFileContentValidator: Corrupted file: hdfs://example.com:8020/hbase/data/default/t/72ea7f7d625ee30f959897d1a3e2c350/prefix/7e6b3d73263c4851bf2b8590a9b3791e
2018-06-05 16:20:47,383 INFO [main] tool.HFileContentValidator: Corrupted file: hdfs://example.com:8020/hbase/archive/data/default/t/56be41796340b757eb7fff1eb5e2a905/f/29c641ae91c34fc3bee881f45436b6d1
----
===== Fixing PREFIX_TREE errors
It's possible to get `PREFIX_TREE` errors after changing Data Block Encoding to a supported one. It can happen
because there are some HFiles which still encoded with `PREFIX_TREE` or there are still some snapshots.
For fixing HFiles, please run a major compaction on the table (it was `default:t` according to the log message):
----
major_compact 't'
----
HFiles can be referenced from snapshots, too. It's the case when the HFile is located under `archive/data`.
The first step is to determine which snapshot references that HFile (the name of the file was `29c641ae91c34fc3bee881f45436b6d1`
according to the logs):
[source, bash]
----
for snapshot in $(hbase snapshotinfo -list-snapshots 2> /dev/null | tail -n -1 | cut -f 1 -d \|);
do
echo "checking snapshot named '${snapshot}'";
hbase snapshotinfo -snapshot "${snapshot}" -files 2> /dev/null | grep 29c641ae91c34fc3bee881f45436b6d1;
done
----
The output of this shell script is:
----
checking snapshot named 't_snap'
1.0 K t/56be41796340b757eb7fff1eb5e2a905/f/29c641ae91c34fc3bee881f45436b6d1 (archive)
----
Which means `t_snap` snapshot references the incompatible HFile. If the snapshot is still needed,
then it has to be recreated with HBase shell:
----
# creating a new namespace for the cleanup process
create_namespace 'pre_upgrade_cleanup'
# creating a new snapshot
clone_snapshot 't_snap', 'pre_upgrade_cleanup:t'
alter 'pre_upgrade_cleanup:t', { NAME => 'f', DATA_BLOCK_ENCODING => 'FAST_DIFF' }
major_compact 'pre_upgrade_cleanup:t'
# removing the invalid snapshot
delete_snapshot 't_snap'
# creating a new snapshot
snapshot 'pre_upgrade_cleanup:t', 't_snap'
# removing temporary table
disable 'pre_upgrade_cleanup:t'
drop 'pre_upgrade_cleanup:t'
drop_namespace 'pre_upgrade_cleanup'
----
For further information, please refer to
link:https://issues.apache.org/jira/browse/HBASE-20649?focusedCommentId=16535476#comment-16535476[HBASE-20649].
[[ops.regionmgt]] [[ops.regionmgt]]
== Region Management == Region Management