From ba5d1c1f28301adc99019d9d6c4a04fac98ae511 Mon Sep 17 00:00:00 2001 From: Peter Somogyi Date: Tue, 26 Jun 2018 10:47:06 +0200 Subject: [PATCH] HBASE-20649 Validate HFiles do not have PREFIX_TREE DataBlockEncoding Amending-Author: Balazs Meszaros Signed-off-by: Sean Busbey --- .../tool/DataBlockEncodingValidator.java | 5 +- .../hbase/tool/HFileContentValidator.java | 126 ++++++++++++++++++ .../hbase/tool/PreUpgradeValidator.java | 7 + src/main/asciidoc/_chapters/ops_mgt.adoc | 102 +++++++++++++- 4 files changed, 234 insertions(+), 6 deletions(-) create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/tool/HFileContentValidator.java diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/DataBlockEncodingValidator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/DataBlockEncodingValidator.java index e72521b1225..c909725a616 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/DataBlockEncodingValidator.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/DataBlockEncodingValidator.java @@ -76,9 +76,8 @@ public class DataBlockEncodingValidator extends AbstractHBaseTool { if (incompatibilities > 0) { LOG.warn("There are {} column families with incompatible Data Block Encodings. Do not " - + "upgrade until these encodings are converted to a supported one.", incompatibilities); - LOG.warn("Check http://hbase.apache.org/book.html#upgrade2.0.prefix-tree.removed " - + "for instructions."); + + "upgrade until these encodings are converted to a supported one. " + + "Check https://s.apache.org/prefixtree for instructions.", incompatibilities); } else { LOG.info("The used Data Block Encodings are compatible with HBase 2.0."); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/HFileContentValidator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/HFileContentValidator.java new file mode 100644 index 00000000000..d60844bf953 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/HFileContentValidator.java @@ -0,0 +1,126 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.tool; + +import java.io.IOException; +import java.util.Collection; +import java.util.List; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseInterfaceAudience; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.util.AbstractHBaseTool; +import org.apache.hadoop.hbase.util.FSUtils; +import org.apache.hadoop.hbase.util.Threads; +import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker; +import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine; + +@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS) +public class HFileContentValidator extends AbstractHBaseTool { + + private static final Logger LOG = LoggerFactory.getLogger(HFileContentValidator.class); + + /** + * Check HFile contents are readable by HBase 2. + * + * @param conf used configuration + * @return number of HFiles corrupted HBase + * @throws IOException if a remote or network exception occurs + */ + private boolean validateHFileContent(Configuration conf) throws IOException { + FileSystem fileSystem = FSUtils.getCurrentFileSystem(conf); + + ExecutorService threadPool = createThreadPool(conf); + HFileCorruptionChecker checker; + + try { + checker = new HFileCorruptionChecker(conf, threadPool, false); + + Path rootDir = FSUtils.getRootDir(conf); + LOG.info("Validating HFile contents under {}", rootDir); + + Collection tableDirs = FSUtils.getTableDirs(fileSystem, rootDir); + checker.checkTables(tableDirs); + + Path archiveRootDir = new Path(rootDir, HConstants.HFILE_ARCHIVE_DIRECTORY); + LOG.info("Validating HFile contents under {}", archiveRootDir); + + List archiveTableDirs = FSUtils.getTableDirs(fileSystem, archiveRootDir); + checker.checkTables(archiveTableDirs); + } finally { + threadPool.shutdown(); + + try { + threadPool.awaitTermination(Long.MAX_VALUE, TimeUnit.SECONDS); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + + int checkedFiles = checker.getHFilesChecked(); + Collection corrupted = checker.getCorrupted(); + + if (corrupted.isEmpty()) { + LOG.info("Checked {} HFiles, none of them are corrupted.", checkedFiles); + LOG.info("There are no incompatible HFiles."); + + return true; + } else { + LOG.info("Checked {} HFiles, {} are corrupted.", checkedFiles, corrupted.size()); + + for (Path path : corrupted) { + LOG.info("Corrupted file: {}", path); + } + + LOG.info("Change data block encodings before upgrading. " + + "Check https://s.apache.org/prefixtree for instructions."); + + return false; + } + } + + private ExecutorService createThreadPool(Configuration conf) { + int availableProcessors = Runtime.getRuntime().availableProcessors(); + int numThreads = conf.getInt("hfilevalidator.numthreads", availableProcessors); + return Executors.newFixedThreadPool(numThreads, + Threads.getNamedThreadFactory("hfile-validator")); + } + + @Override + protected void addOptions() { + } + + @Override + protected void processOptions(CommandLine cmd) { + } + + @Override + protected int doWork() throws Exception { + return (validateHFileContent(getConf())) ? EXIT_SUCCESS : EXIT_FAILURE; + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/PreUpgradeValidator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/PreUpgradeValidator.java index 7bf307484b0..818004c272e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/PreUpgradeValidator.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/PreUpgradeValidator.java @@ -38,6 +38,7 @@ import org.slf4j.LoggerFactory; *
    *
  • validate-cp: Validates Co-processors compatibility
  • *
  • validate-dbe: Check Data Block Encoding for column families
  • + *
  • validate-hfile: Check for corrupted HFiles
  • *
*

*/ @@ -49,6 +50,7 @@ public class PreUpgradeValidator implements Tool { public static final String TOOL_NAME = "pre-upgrade"; public static final String VALIDATE_CP_NAME = "validate-cp"; public static final String VALIDATE_DBE_NAME = "validate-dbe"; + public static final String VALIDATE_HFILE = "validate-hfile"; private Configuration configuration; @@ -69,6 +71,8 @@ public class PreUpgradeValidator implements Tool { VALIDATE_CP_NAME); System.out.printf(" %-15s Validate DataBlockEncodings are compatible with HBase%n", VALIDATE_DBE_NAME); + System.out.printf(" %-15s Validate HFile contents are readable%n", + VALIDATE_HFILE); System.out.println("For further information, please use command -h"); } @@ -88,6 +92,9 @@ public class PreUpgradeValidator implements Tool { case VALIDATE_DBE_NAME: tool = new DataBlockEncodingValidator(); break; + case VALIDATE_HFILE: + tool = new HFileContentValidator(); + break; case "-h": printUsage(); return AbstractHBaseTool.EXIT_FAILURE; diff --git a/src/main/asciidoc/_chapters/ops_mgt.adoc b/src/main/asciidoc/_chapters/ops_mgt.adoc index 01e6de683d7..7e7001c714c 100644 --- a/src/main/asciidoc/_chapters/ops_mgt.adoc +++ b/src/main/asciidoc/_chapters/ops_mgt.adoc @@ -898,7 +898,8 @@ $ bin/hbase pre-upgrade validate-cp -table .* It validates every table level co-processors where the table name matches to `.*` regular expression. ==== DataBlockEncoding validation -HBase 2.0 removed `PREFIX_TREE` Data Block Encoding from column families. +HBase 2.0 removed `PREFIX_TREE` Data Block Encoding from column families. For further information +please check <>. To verify that none of the column families are using incompatible Data Block Encodings in the cluster run the following command. [source, bash] @@ -906,8 +907,103 @@ To verify that none of the column families are using incompatible Data Block Enc $ bin/hbase pre-upgrade validate-dbe ---- -This check validates all column families and print out any incompatibilities. -To change `PREFIX_TREE` encoding to supported one check <>. +This check validates all column families and print out any incompatibilities. For example: + +---- +2018-07-13 09:58:32,028 WARN [main] tool.DataBlockEncodingValidator: Incompatible DataBlockEncoding for table: t, cf: f, encoding: PREFIX_TREE +---- + +Which means that Data Block Encoding of table `t`, column family `f` is incompatible. To fix, use `alter` command in HBase shell: + +---- +alter 't', { NAME => 'f', DATA_BLOCK_ENCODING => 'FAST_DIFF' } +---- + +Please also validate HFiles, which is described in the next section. + +==== HFile Content validation +Even though Data Block Encoding is changed from `PREFIX_TREE` it is still possible to have HFiles that contain data encoded that way. +To verify that HFiles are readable with HBase 2 please use _HFile content validator_. + +[source, bash] +---- +$ bin/hbase pre-upgrade validate-hfile +---- + +The tool will log the corrupt HFiles and details about the root cause. +If the problem is about PREFIX_TREE encoding it is necessary to change encodings before upgrading to HBase 2. + +The following log message shows an example of incorrect HFiles. + +---- +2018-06-05 16:20:46,976 WARN [hfilevalidator-pool1-t3] hbck.HFileCorruptionChecker: Found corrupt HFile hdfs://example.com:8020/hbase/data/default/t/72ea7f7d625ee30f959897d1a3e2c350/prefix/7e6b3d73263c4851bf2b8590a9b3791e +org.apache.hadoop.hbase.io.hfile.CorruptHFileException: Problem reading HFile Trailer from file hdfs://example.com:8020/hbase/data/default/t/72ea7f7d625ee30f959897d1a3e2c350/prefix/7e6b3d73263c4851bf2b8590a9b3791e + ... +Caused by: java.io.IOException: Invalid data block encoding type in file info: PREFIX_TREE + ... +Caused by: java.lang.IllegalArgumentException: No enum constant org.apache.hadoop.hbase.io.encoding.DataBlockEncoding.PREFIX_TREE + ... +2018-06-05 16:20:47,322 INFO [main] tool.HFileContentValidator: Corrupted file: hdfs://example.com:8020/hbase/data/default/t/72ea7f7d625ee30f959897d1a3e2c350/prefix/7e6b3d73263c4851bf2b8590a9b3791e +2018-06-05 16:20:47,383 INFO [main] tool.HFileContentValidator: Corrupted file: hdfs://example.com:8020/hbase/archive/data/default/t/56be41796340b757eb7fff1eb5e2a905/f/29c641ae91c34fc3bee881f45436b6d1 +---- + +===== Fixing PREFIX_TREE errors + +It's possible to get `PREFIX_TREE` errors after changing Data Block Encoding to a supported one. It can happen +because there are some HFiles which still encoded with `PREFIX_TREE` or there are still some snapshots. + +For fixing HFiles, please run a major compaction on the table (it was `default:t` according to the log message): + +---- +major_compact 't' +---- + +HFiles can be referenced from snapshots, too. It's the case when the HFile is located under `archive/data`. +The first step is to determine which snapshot references that HFile (the name of the file was `29c641ae91c34fc3bee881f45436b6d1` +according to the logs): + +[source, bash] +---- +for snapshot in $(hbase snapshotinfo -list-snapshots 2> /dev/null | tail -n -1 | cut -f 1 -d \|); +do + echo "checking snapshot named '${snapshot}'"; + hbase snapshotinfo -snapshot "${snapshot}" -files 2> /dev/null | grep 29c641ae91c34fc3bee881f45436b6d1; +done +---- + +The output of this shell script is: + +---- +checking snapshot named 't_snap' + 1.0 K t/56be41796340b757eb7fff1eb5e2a905/f/29c641ae91c34fc3bee881f45436b6d1 (archive) +---- + +Which means `t_snap` snapshot references the incompatible HFile. If the snapshot is still needed, +then it has to be recreated with HBase shell: + +---- +# creating a new namespace for the cleanup process +create_namespace 'pre_upgrade_cleanup' + +# creating a new snapshot +clone_snapshot 't_snap', 'pre_upgrade_cleanup:t' +alter 'pre_upgrade_cleanup:t', { NAME => 'f', DATA_BLOCK_ENCODING => 'FAST_DIFF' } +major_compact 'pre_upgrade_cleanup:t' + +# removing the invalid snapshot +delete_snapshot 't_snap' + +# creating a new snapshot +snapshot 'pre_upgrade_cleanup:t', 't_snap' + +# removing temporary table +disable 'pre_upgrade_cleanup:t' +drop 'pre_upgrade_cleanup:t' +drop_namespace 'pre_upgrade_cleanup' +---- + +For further information, please refer to +link:https://issues.apache.org/jira/browse/HBASE-20649?focusedCommentId=16535476#comment-16535476[HBASE-20649]. [[ops.regionmgt]] == Region Management