From 61d706044e674866f96aee8d5cf74d0e72dddfc1 Mon Sep 17 00:00:00 2001 From: zhangduo Date: Wed, 4 Jul 2018 21:40:52 +0800 Subject: [PATCH] HBASE-20831 Copy master doc into branch-2.1 and edit to make it suit 2.1.0 --- pom.xml | 14 +- src/main/asciidoc/_chapters/amv2.adoc | 173 ++++ .../_chapters/appendix_acl_matrix.adoc | 1 + ...ppendix_contributing_to_documentation.adoc | 6 +- .../appendix_hbase_incompatibilities.adoc | 714 ++++++++++++++ .../_chapters/appendix_hfile_format.adoc | 2 +- src/main/asciidoc/_chapters/architecture.adoc | 255 +++-- .../asciidoc/_chapters/backup_restore.adoc | 912 ------------------ src/main/asciidoc/_chapters/community.adoc | 54 +- src/main/asciidoc/_chapters/compression.adoc | 22 +- .../asciidoc/_chapters/configuration.adoc | 60 +- src/main/asciidoc/_chapters/datamodel.adoc | 35 + src/main/asciidoc/_chapters/developer.adoc | 127 ++- .../asciidoc/_chapters/external_apis.adoc | 109 +-- .../asciidoc/_chapters/getting_started.adoc | 57 +- .../asciidoc/_chapters/hbase-default.adoc | 2 +- src/main/asciidoc/_chapters/hbase_mob.adoc | 4 - src/main/asciidoc/_chapters/images | 2 +- src/main/asciidoc/_chapters/ops_mgt.adoc | 284 +++++- src/main/asciidoc/_chapters/performance.adoc | 2 - src/main/asciidoc/_chapters/pv2.adoc | 163 ++++ .../asciidoc/_chapters/schema_design.adoc | 31 +- src/main/asciidoc/_chapters/security.adoc | 13 +- src/main/asciidoc/_chapters/shell.adoc | 8 +- src/main/asciidoc/_chapters/tracing.adoc | 6 +- .../asciidoc/_chapters/troubleshooting.adoc | 131 ++- src/main/asciidoc/_chapters/unit_testing.adoc | 2 - src/main/asciidoc/_chapters/upgrading.adoc | 405 ++++++++ src/main/asciidoc/book.adoc | 4 +- src/main/asciidoc/images | 2 +- .../site/asciidoc/acid-semantics.adoc | 2 +- src/{main => }/site/asciidoc/bulk-loads.adoc | 1 - src/{main => }/site/asciidoc/cygwin.adoc | 7 +- .../site/asciidoc/export_control.adoc | 8 +- src/{main => }/site/asciidoc/index.adoc | 0 src/{main => }/site/asciidoc/metrics.adoc | 11 +- src/{main => }/site/asciidoc/old_news.adoc | 3 +- .../site/asciidoc/pseudo-distributed.adoc | 1 - src/{main => }/site/asciidoc/replication.adoc | 0 src/{main => }/site/asciidoc/resources.adoc | 1 - src/{main => }/site/asciidoc/sponsors.adoc | 3 +- .../custom/project-info-report.properties | 0 src/{main => }/site/resources/.htaccess | 0 src/{main => }/site/resources/book/.empty | 0 src/{main => }/site/resources/css/site.css | 0 src/{main => }/site/resources/doap_Hbase.rdf | 8 +- .../site/resources/images/architecture.gif | Bin .../images/backup-app-components.png | Bin .../images/backup-cloud-appliance.png | Bin .../images/backup-dedicated-cluster.png | Bin .../resources/images/backup-intra-cluster.png | Bin .../site/resources/images/bc_basic.png | Bin .../site/resources/images/bc_config.png | Bin .../site/resources/images/bc_l1.png | Bin .../site/resources/images/bc_l2_buckets.png | Bin .../site/resources/images/bc_stats.png | Bin .../site/resources/images/big_h_logo.png | Bin .../site/resources/images/big_h_logo.svg | 0 .../images/data_block_diff_encoding.png | Bin .../images/data_block_no_encoding.png | Bin .../images/data_block_prefix_encoding.png | Bin .../site/resources/images/favicon.ico | Bin .../site/resources/images/hadoop-logo.jpg | Bin .../site/resources/images/hbase_logo.png | Bin .../site/resources/images/hbase_logo.svg | 0 .../resources/images/hbase_logo_with_orca.png | Bin .../resources/images/hbase_logo_with_orca.xcf | Bin .../images/hbase_logo_with_orca_large.png | Bin .../images/hbase_replication_diagram.jpg | Bin .../images/hbasecon2015.30percent.png | Bin .../images/hbasecon2016-stack-logo.jpg | Bin .../resources/images/hbasecon2016-stacked.png | Bin .../site/resources/images/hbasecon2017.png | Bin .../resources/images/hbaseconasia2017.png | Bin .../site/resources/images/hfile.png | Bin .../site/resources/images/hfilev2.png | Bin .../resources/images/jumping-orca_rotated.png | Bin .../resources/images/jumping-orca_rotated.xcf | Bin .../images/jumping-orca_rotated_12percent.png | Bin .../images/jumping-orca_rotated_25percent.png | Bin .../jumping-orca_transparent_rotated.xcf | Bin .../resources/images/region_split_process.png | Bin .../site/resources/images/region_states.png | Bin .../resources/images/replication_overview.png | Bin .../resources/images/timeline_consistency.png | Bin .../1.5-HBASE/maven-fluido-skin-1.5-HBASE.jar | Bin .../1.5-HBASE/maven-fluido-skin-1.5-HBASE.pom | 0 .../maven-metadata-local.xml | 0 src/{main => }/site/site.xml | 0 src/{main => }/site/xdoc/acid-semantics.xml | 8 +- src/{main => }/site/xdoc/bulk-loads.xml | 4 +- src/{main => }/site/xdoc/coc.xml | 0 src/{main => }/site/xdoc/cygwin.xml | 0 src/{main => }/site/xdoc/export_control.xml | 8 +- src/{main => }/site/xdoc/index.xml | 0 src/{main => }/site/xdoc/metrics.xml | 30 +- src/{main => }/site/xdoc/old_news.xml | 0 src/{main => }/site/xdoc/poweredbyhbase.xml | 0 .../site/xdoc/pseudo-distributed.xml | 5 +- src/{main => }/site/xdoc/replication.xml | 0 src/{main => }/site/xdoc/resources.xml | 0 src/{main => }/site/xdoc/sponsors.xml | 0 .../site/xdoc/supportingprojects.xml | 0 103 files changed, 2391 insertions(+), 1309 deletions(-) create mode 100644 src/main/asciidoc/_chapters/amv2.adoc create mode 100644 src/main/asciidoc/_chapters/appendix_hbase_incompatibilities.adoc delete mode 100644 src/main/asciidoc/_chapters/backup_restore.adoc create mode 100644 src/main/asciidoc/_chapters/pv2.adoc rename src/{main => }/site/asciidoc/acid-semantics.adoc (99%) rename src/{main => }/site/asciidoc/bulk-loads.adoc (99%) rename src/{main => }/site/asciidoc/cygwin.adoc (99%) rename src/{main => }/site/asciidoc/export_control.adoc (97%) rename src/{main => }/site/asciidoc/index.adoc (100%) rename src/{main => }/site/asciidoc/metrics.adoc (98%) rename src/{main => }/site/asciidoc/old_news.adoc (99%) rename src/{main => }/site/asciidoc/pseudo-distributed.adoc (99%) rename src/{main => }/site/asciidoc/replication.adoc (100%) rename src/{main => }/site/asciidoc/resources.adoc (99%) rename src/{main => }/site/asciidoc/sponsors.adoc (97%) rename src/{main => }/site/custom/project-info-report.properties (100%) rename src/{main => }/site/resources/.htaccess (100%) rename src/{main => }/site/resources/book/.empty (100%) rename src/{main => }/site/resources/css/site.css (100%) rename src/{main => }/site/resources/doap_Hbase.rdf (98%) rename src/{main => }/site/resources/images/architecture.gif (100%) rename src/{main => }/site/resources/images/backup-app-components.png (100%) rename src/{main => }/site/resources/images/backup-cloud-appliance.png (100%) rename src/{main => }/site/resources/images/backup-dedicated-cluster.png (100%) rename src/{main => }/site/resources/images/backup-intra-cluster.png (100%) rename src/{main => }/site/resources/images/bc_basic.png (100%) rename src/{main => }/site/resources/images/bc_config.png (100%) rename src/{main => }/site/resources/images/bc_l1.png (100%) rename src/{main => }/site/resources/images/bc_l2_buckets.png (100%) rename src/{main => }/site/resources/images/bc_stats.png (100%) rename src/{main => }/site/resources/images/big_h_logo.png (100%) rename src/{main => }/site/resources/images/big_h_logo.svg (100%) rename src/{main => }/site/resources/images/data_block_diff_encoding.png (100%) rename src/{main => }/site/resources/images/data_block_no_encoding.png (100%) rename src/{main => }/site/resources/images/data_block_prefix_encoding.png (100%) rename src/{main => }/site/resources/images/favicon.ico (100%) rename src/{main => }/site/resources/images/hadoop-logo.jpg (100%) rename src/{main => }/site/resources/images/hbase_logo.png (100%) rename src/{main => }/site/resources/images/hbase_logo.svg (100%) rename src/{main => }/site/resources/images/hbase_logo_with_orca.png (100%) rename src/{main => }/site/resources/images/hbase_logo_with_orca.xcf (100%) rename src/{main => }/site/resources/images/hbase_logo_with_orca_large.png (100%) rename src/{main => }/site/resources/images/hbase_replication_diagram.jpg (100%) rename src/{main => }/site/resources/images/hbasecon2015.30percent.png (100%) rename src/{main => }/site/resources/images/hbasecon2016-stack-logo.jpg (100%) rename src/{main => }/site/resources/images/hbasecon2016-stacked.png (100%) rename src/{main => }/site/resources/images/hbasecon2017.png (100%) rename src/{main => }/site/resources/images/hbaseconasia2017.png (100%) rename src/{main => }/site/resources/images/hfile.png (100%) rename src/{main => }/site/resources/images/hfilev2.png (100%) rename src/{main => }/site/resources/images/jumping-orca_rotated.png (100%) rename src/{main => }/site/resources/images/jumping-orca_rotated.xcf (100%) rename src/{main => }/site/resources/images/jumping-orca_rotated_12percent.png (100%) rename src/{main => }/site/resources/images/jumping-orca_rotated_25percent.png (100%) rename src/{main => }/site/resources/images/jumping-orca_transparent_rotated.xcf (100%) rename src/{main => }/site/resources/images/region_split_process.png (100%) rename src/{main => }/site/resources/images/region_states.png (100%) rename src/{main => }/site/resources/images/replication_overview.png (100%) rename src/{main => }/site/resources/images/timeline_consistency.png (100%) rename src/{main => }/site/resources/repo/org/apache/maven/skins/maven-fluido-skin/1.5-HBASE/maven-fluido-skin-1.5-HBASE.jar (100%) rename src/{main => }/site/resources/repo/org/apache/maven/skins/maven-fluido-skin/1.5-HBASE/maven-fluido-skin-1.5-HBASE.pom (100%) rename src/{main => }/site/resources/repo/org/apache/maven/skins/maven-fluido-skin/maven-metadata-local.xml (100%) rename src/{main => }/site/site.xml (100%) rename src/{main => }/site/xdoc/acid-semantics.xml (99%) rename src/{main => }/site/xdoc/bulk-loads.xml (98%) rename src/{main => }/site/xdoc/coc.xml (100%) rename src/{main => }/site/xdoc/cygwin.xml (100%) rename src/{main => }/site/xdoc/export_control.xml (97%) rename src/{main => }/site/xdoc/index.xml (100%) rename src/{main => }/site/xdoc/metrics.xml (95%) rename src/{main => }/site/xdoc/old_news.xml (100%) rename src/{main => }/site/xdoc/poweredbyhbase.xml (100%) rename src/{main => }/site/xdoc/pseudo-distributed.xml (98%) rename src/{main => }/site/xdoc/replication.xml (100%) rename src/{main => }/site/xdoc/resources.xml (100%) rename src/{main => }/site/xdoc/sponsors.xml (100%) rename src/{main => }/site/xdoc/supportingprojects.xml (100%) diff --git a/pom.xml b/pom.xml index b0a53c853c1..4db89bd70a4 100755 --- a/pom.xml +++ b/pom.xml @@ -856,7 +856,7 @@ .svn/** **/.settings/** **/patchprocess/** - src/main/site/resources/repo/** + src/site/resources/repo/** **/dependency-reduced-pom.xml **/rat.txt @@ -1136,8 +1136,8 @@ - ${basedir}/src/main/site - ${basedir}/src/main/site/custom/project-info-report.properties + ${basedir}/src/site + ${basedir}/src/site/custom/project-info-report.properties UTF-8 UTF-8 @@ -1217,7 +1217,7 @@ ${project.reporting.outputDirectory}/ - ${basedir}/src/main/site/resources/ + ${basedir}/src/site/resources/ .htaccess @@ -1236,7 +1236,7 @@ ${project.reporting.outputDirectory}/ - ${basedir}/src/main/site/resources/ + ${basedir}/src/site/resources/ book/** @@ -3442,7 +3442,7 @@ - + false @@ -3677,7 +3677,7 @@ project.local project - file:${project.basedir}/src/main/site/resources/repo + file:${project.basedir}/src/site/resources/repo diff --git a/src/main/asciidoc/_chapters/amv2.adoc b/src/main/asciidoc/_chapters/amv2.adoc new file mode 100644 index 00000000000..49841ce3255 --- /dev/null +++ b/src/main/asciidoc/_chapters/amv2.adoc @@ -0,0 +1,173 @@ +//// +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +//// +[[amv2]] += AMv2 Description for Devs +:doctype: book +:numbered: +:toc: left +:icons: font +:experimental: + +The AssignmentManager (AM) in HBase Master manages assignment of Regions over a cluster of RegionServers. + +The AMv2 project is a redo of Assignment in an attempt at addressing the root cause of many of our operational issues in production, namely slow assignment and problematic accounting such that Regions are misplaced stuck offline in the notorious _Regions-In-Transition (RIT)_ limbo state. + +Below are notes for devs on key aspects of AMv2 in no particular order. + +== Background + +Assignment in HBase 1.x has been problematic in operation. It is not hard to see why. Region state is kept at the other end of an RPC in ZooKeeper (Terminal states -- i.e. OPEN or CLOSED -- are published to the _hbase:meta_ table). In HBase-1.x.x, state has multiple writers with Master and RegionServers all able to make state edits concurrently (in _hbase:meta_ table and out on ZooKeeper). If clocks are awry or watchers missed, state changes can be skipped or overwritten. Locking of HBase Entities -- tables, regions -- is not comprehensive so a table operation -- disable/enable -- could clash with a region-level operation; a split or merge. Region state is distributed and hard to reason about and test. Assignment is slow in operation because each assign involves moving remote znodes through transitions. Cluster size tends to top out at a couple of hundred thousand regions; beyond this, cluster start/stop takes hours and is prone to corruption. + +AMv2 (AssignmentManager Version 2) is a refactor (https://issues.apache.org/jira/browse/HBASE-14350[HBASE-14350]) of the hbase-1.x AssignmentManager putting it up on a https://issues.apache.org/jira/browse/HBASE-12439[ProcedureV2 (HBASE-12439)] basis. ProcedureV2 (Pv2)__,__ is an awkwardly named system that allows describing and running multi-step state machines. It is performant and persists all state to a Store which is recoverable post crash. See the companion chapter on <>, to learn more about the ProcedureV2 system. + +In AMv2, all assignment, crash handling, splits and merges are recast as Procedures(v2). ZooKeeper is purged from the mix. As before, the final assignment state gets published to _hbase:meta_ for non-Master participants to read (all-clients) with intermediate state kept in the local Pv2 WAL-based ‘store’ but only the active Master, a single-writer, evolves state. The Master’s in-memory cluster image is the authority and if disagreement, RegionServers are forced to comply. Pv2 adds shared/exclusive locking of all core HBase Entities -- namespace, tables, and regions -- to ensure one actor at a time access and to prevent operations contending over resources (move/split, disable/assign, etc.). + +This redo of AM atop of a purposed, performant state machine with all operations taking on the common Procedure form with a single state writer only moves our AM to a new level of resilience and scale. + +== New System + +Each Region Assign or Unassign of a Region is now a Procedure. A Move (Region) Procedure is a compound of Procedures; it is the running of an Unassign Procedure followed by an Assign Procedure. The Move Procedure spawns the Assign and Unassign in series and then waits on their completions. + +And so on. ServerCrashProcedure spawns the WAL splitting tasks and then the reassign of all regions that were hosted on the crashed server as subprocedures. + +AMv2 Procedures are run by the Master in a ProcedureExecutor instance. All Procedures make use of utility provided by the Pv2 framework. + +For example, Procedures persist each state transition to the frameworks’ Procedure Store. The default implementation is done as a WAL kept on HDFS. On crash, we reopen the Store and rerun all WALs of Procedure transitions to put the Assignment State Machine back into the attitude it had just before crash. We then continue Procedure execution. + +In the new system, the Master is the Authority on all things Assign. Previous we were ambiguous; e.g. the RegionServer was in charge of Split operations. Master keeps an in-memory image of Region states and servers. If disagreement, the Master always prevails; at an extreme it will kill the RegionServer that is in disagreement. + +A new RegionStateStore class takes care of publishing the terminal Region state, whether OPEN or CLOSED, out to the _hbase:meta _table__.__ + +RegionServers now report their run version on Connection. This version is available inside the AM for use running migrating rolling restarts. + +== Procedures Detail + +=== Assign/Unassign + +Assign and Unassign subclass a common RegionTransitionProcedure. There can only be one RegionTransitionProcedure per region running at a time since the RTP instance takes a lock on the region. The RTP base Procedure has three steps; a store the procedure step (REGION_TRANSITION_QUEUE); a dispatch of the procedure open or close followed by a suspend waiting on the remote regionserver to report successful open or fail (REGION_TRANSITION_DISPATCH) or notification that the server fielding the request crashed; and finally registration of the successful open/close in hbase:meta (REGION_TRANSITION_FINISH). + +Here is how the assign of a region 56f985a727afe80a184dac75fbf6860c looks in the logs. The assign was provoked by a Server Crash (Process ID 1176 or pid=1176 which when it is the parent of a procedure, it is identified as ppid=1176). The assign is pid=1179, the second region of the two being assigned by this Server Crash. + +[source] +---- +2017-05-23 12:04:24,175 INFO [ProcExecWrkr-30] procedure2.ProcedureExecutor: Initialized subprocedures=[{pid=1178, ppid=1176, state=RUNNABLE:REGION_TRANSITION_QUEUE; AssignProcedure table=IntegrationTestBigLinkedList, region=bfd57f0b72fd3ca77e9d3c5e3ae48d76, target=ve0540.halxg.example.org,16020,1495525111232}, {pid=1179, ppid=1176, state=RUNNABLE:REGION_TRANSITION_QUEUE; AssignProcedure table=IntegrationTestBigLinkedList, region=56f985a727afe80a184dac75fbf6860c, target=ve0540.halxg.example.org,16020,1495525111232}] +---- + +Next we start the assign by queuing (‘registering’) the Procedure with the framework. + +[source] +---- +2017-05-23 12:04:24,241 INFO [ProcExecWrkr-30] assignment.AssignProcedure: Start pid=1179, ppid=1176, state=RUNNABLE:REGION_TRANSITION_QUEUE; AssignProcedure table=IntegrationTestBigLinkedList, region=56f985a727afe80a184dac75fbf6860c, target=ve0540.halxg.example.org,16020,1495525111232; rit=OFFLINE, location=ve0540.halxg.example.org,16020,1495525111232; forceNewPlan=false, retain=false +---- + +Track the running of Procedures in logs by tracing their process id -- here pid=1179. + +Next we move to the dispatch phase where we update hbase:meta table setting the region state as OPENING on server ve540. We then dispatch an rpc to ve540 asking it to open the region. Thereafter we suspend the Assign until we get a message back from ve540 on whether it has opened the region successfully (or not). + +[source] +---- +2017-05-23 12:04:24,494 INFO [ProcExecWrkr-38] assignment.RegionStateStore: pid=1179 updating hbase:meta row=IntegrationTestBigLinkedList,H\xE3@\x8D\x964\x9D\xDF\x8F@9\x0F\xC8\xCC\xC2,1495566261066.56f985a727afe80a184dac75fbf6860c., regionState=OPENING, regionLocation=ve0540.halxg.example.org,16020,1495525111232 +2017-05-23 12:04:24,498 INFO [ProcExecWrkr-38] assignment.RegionTransitionProcedure: Dispatch pid=1179, ppid=1176, state=RUNNABLE:REGION_TRANSITION_DISPATCH; AssignProcedure table=IntegrationTestBigLinkedList, region=56f985a727afe80a184dac75fbf6860c, target=ve0540.halxg.example.org,16020,1495525111232; rit=OPENING, location=ve0540.halxg.example.org,16020,1495525111232 +---- + +Below we log the incoming report that the region opened successfully on ve540. The Procedure is woken up (you can tell it the procedure is running by the name of the thread, its a ProcedureExecutor thread, ProcExecWrkr-9). The woken up Procedure updates state in hbase:meta to denote the region as open on ve0540. It then reports finished and exits. + +[source] +---- +2017-05-23 12:04:26,643 DEBUG [RpcServer.default.FPBQ.Fifo.handler=46,queue=1,port=16000] assignment.RegionTransitionProcedure: Received report OPENED seqId=11984985, pid=1179, ppid=1176, state=RUNNABLE:REGION_TRANSITION_DISPATCH; AssignProcedure table=IntegrationTestBigLinkedList, region=56f985a727afe80a184dac75fbf6860c, target=ve0540.halxg.example.org,16020,1495525111232; rit=OPENING, location=ve0540.halxg.example.org,16020,1495525111232 2017-05-23 12:04:26,643 INFO [ProcExecWrkr-9] assignment.RegionStateStore: pid=1179 updating hbase:meta row=IntegrationTestBigLinkedList,H\xE3@\x8D\x964\x9D\xDF\x8F@9\x0F\xC8\xCC\xC2,1495566261066.56f985a727afe80a184dac75fbf6860c., regionState=OPEN, openSeqNum=11984985, regionLocation=ve0540.halxg.example.org,16020,1495525111232 +2017-05-23 12:04:26,836 INFO [ProcExecWrkr-9] procedure2.ProcedureExecutor: Finish suprocedure pid=1179, ppid=1176, state=SUCCESS; AssignProcedure table=IntegrationTestBigLinkedList, region=56f985a727afe80a184dac75fbf6860c, target=ve0540.halxg.example.org,16020,1495525111232 +---- +Unassign looks similar given it is based on the base RegionTransitionProcedure. It has the same state transitions and does basically the same steps but with different state name (CLOSING, CLOSED). + +Most other procedures are subclasses of a Pv2 StateMachine implementation. We have both Table and Region focused StateMachines types. + +== UI + +Along the top-bar on the Master, you can now find a ‘Procedures&Locks’ tab which takes you to a page that is ugly but useful. It dumps currently running procedures and framework locks. Look at this when you can’t figure what stuff is stuck; it will at least identify problematic procedures (take the pid and grep the logs…). Look for ROLLEDBACK or pids that have been RUNNING for a long time. + +== Logging + +Procedures log their process ids as pid= and their parent ids (ppid=) everywhere. Work has been done so you can grep the pid and see history of a procedure operation. + +== Implementation Notes + +In this section we note some idiosyncrasies of operation as an attempt at saving you some head-scratching. + +=== Region Transition RPC and RS Heartbeat can arrive at ~same time on Master + +Reporting Region Transition on a RegionServer is now a RPC distinct from RS heartbeating (‘RegionServerServices’ Service). An heartbeat and a status update can arrive at the Master at about the same time. The Master will update its internal state for a Region but this same state is checked when heartbeat processing. We may find the unexpected; i.e. a Region just reported as CLOSED so heartbeat is surprised to find region OPEN on the back of the RS report. In the new system, all slaves must cow to the Masters’ understanding of cluster state; the Master will kill/close any misaligned entities. + +To address the above, we added a lastUpdate for in-memory Master state. Let a region state have some vintage before we act on it (one second currently). + +=== Master as RegionServer or as RegionServer that just does system tables + +AMv2 enforces current master branch default of HMaster carrying system tables only; i.e. the Master in an HBase cluster acts also as a RegionServer only it is the exclusive host for tables such as _hbase:meta_, _hbase:namespace_, etc., the core system tables. This is causing a couple of test failures as AMv1, though it is not supposed to, allows moving hbase:meta off Master while AMv2 does not. + +== New Configs + +These configs all need doc on when you’d change them. + +=== hbase.procedure.remote.dispatcher.threadpool.size + +Defaults 128 + +=== hbase.procedure.remote.dispatcher.delay.msec + +Default 150ms + +=== hbase.procedure.remote.dispatcher.max.queue.size + +Default 32 + +=== hbase.regionserver.rpc.startup.waittime + +Default 60 seconds. + +== Tools + +HBASE-15592 Print Procedure WAL Content + +Patch in https://issues.apache.org/jira/browse/HBASE-18152[HBASE-18152] [AMv2] Corrupt Procedure WAL file; procedure data stored out of order https://issues.apache.org/jira/secure/attachment/12871066/reading_bad_wal.patch[https://issues.apache.org/jira/secure/attachment/12871066/reading_bad_wal.patch] + +=== MasterProcedureSchedulerPerformanceEvaluation + +Tool to test performance of locks and queues in procedure scheduler independently from other framework components. Run this after any substantial changes in proc system. Prints nice output: + +---- +****************************************** +Time - addBack : 5.0600sec +Ops/sec - addBack : 1.9M +Time - poll : 19.4590sec +Ops/sec - poll : 501.9K +Num Operations : 10000000 + +Completed : 10000006 +Yield : 22025876 + +Num Tables : 5 +Regions per table : 10 +Operations type : both +Threads : 10 +****************************************** +Raw format for scripts + +RESULT [num_ops=10000000, ops_type=both, num_table=5, regions_per_table=10, threads=10, num_yield=22025876, time_addback_ms=5060, time_poll_ms=19459] +---- diff --git a/src/main/asciidoc/_chapters/appendix_acl_matrix.adoc b/src/main/asciidoc/_chapters/appendix_acl_matrix.adoc index d5ea0765ba8..cb17346d42c 100644 --- a/src/main/asciidoc/_chapters/appendix_acl_matrix.adoc +++ b/src/main/asciidoc/_chapters/appendix_acl_matrix.adoc @@ -160,6 +160,7 @@ In case the table goes out of date, the unit tests which check for accuracy of p | | getUserPermissions(global level) | global(A) | | getUserPermissions(namespace level) | global(A)\|NS(A) | | getUserPermissions(table level) | global(A)\|NS(A)\|TableOwner\|table(A)\|CF(A)\|CQ(A) +| | hasPermission(table level) | global(A)\|SelfUserCheck | RegionServer | stopRegionServer | superuser\|global(A) | | mergeRegions | superuser\|global(A) | | rollWALWriterRequest | superuser\|global(A) diff --git a/src/main/asciidoc/_chapters/appendix_contributing_to_documentation.adoc b/src/main/asciidoc/_chapters/appendix_contributing_to_documentation.adoc index 6570c9c60ef..a603c16f42b 100644 --- a/src/main/asciidoc/_chapters/appendix_contributing_to_documentation.adoc +++ b/src/main/asciidoc/_chapters/appendix_contributing_to_documentation.adoc @@ -119,7 +119,7 @@ JIRA and add a version number to the name of the new patch. === Editing the HBase Website -The source for the HBase website is in the HBase source, in the _src/main/site/_ directory. +The source for the HBase website is in the HBase source, in the _src/site/_ directory. Within this directory, source for the individual pages is in the _xdocs/_ directory, and images referenced in those pages are in the _resources/images/_ directory. This directory also stores images used in the HBase Reference Guide. @@ -216,7 +216,7 @@ link:http://www.google.com[Google] ---- image::sunset.jpg[Alt Text] ---- -(put the image in the src/main/site/resources/images directory) +(put the image in the src/site/resources/images directory) | An inline image | The image with alt text, as part of the text flow | ---- image:sunset.jpg [Alt Text] @@ -389,7 +389,7 @@ Inline images cannot have titles. They are generally small images like GUI butto image:sunset.jpg[Alt Text] ---- -When doing a local build, save the image to the _src/main/site/resources/images/_ directory. +When doing a local build, save the image to the _src/site/resources/images/_ directory. When you link to the image, do not include the directory portion of the path. The image will be copied to the appropriate target location during the build of the output. diff --git a/src/main/asciidoc/_chapters/appendix_hbase_incompatibilities.adoc b/src/main/asciidoc/_chapters/appendix_hbase_incompatibilities.adoc new file mode 100644 index 00000000000..d450f04ea64 --- /dev/null +++ b/src/main/asciidoc/_chapters/appendix_hbase_incompatibilities.adoc @@ -0,0 +1,714 @@ +//// +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +//// + +[appendix] +== Known Incompatibilities Among HBase Versions +:doctype: book +:numbered: +:toc: left +:icons: font +:experimental: +:toc: left +:source-language: java + +== HBase 2.0 Incompatible Changes + +This appendix describes incompatible changes from earlier versions of HBase against HBase 2.0. +This list is not meant to be wholly encompassing of all possible incompatibilities. +Instead, this content is intended to give insight into some obvious incompatibilities which most +users will face coming from HBase 1.x releases. + +=== List of Major Changes for HBase 2.0 +* HBASE-1912- HBCK is a HBase database checking tool for capturing the inconsistency. As an HBase administrator, you should not use HBase version 1.0 hbck tool to check the HBase 2.0 database. Doing so will break the database and throw an exception error. +* HBASE-16189 and HBASE-18945- You cannot open the HBase 2.0 hfiles through HBase 1.0 version. If you are an admin or an HBase user who is using HBase version 1.x, you must first do a rolling upgrade to the latest version of HBase 1.x and then upgrade to HBase 2.0. +* HBASE-18240 - Changed the ReplicationEndpoint Interface. It also introduces a new hbase-third party 1.0 that packages all the third party utilities, which are expected to run in the hbase cluster. + +=== Coprocessor API changes + +* HBASE-16769 - Deprecated PB references from MasterObserver and RegionServerObserver. +* HBASE-17312 - [JDK8] Use default method for Observer Coprocessors. The interface classes of BaseMasterAndRegionObserver, BaseMasterObserver, BaseRegionObserver, BaseRegionServerObserver and BaseWALObserver uses JDK8's 'default' keyword to provide empty and no-op implementations. +* Interface HTableInterface + HBase 2.0 introduces following changes to the methods listed below: + +==== [−] interface CoprocessorEnvironment changes (2) + +[cols="1,1", frame="all"] +|=== +| Change | Result +| Abstract method getTable ( TableName ) has been removed. | A client program may be interrupted by NoSuchMethodError exception. +| Abstract method getTable ( TableName, ExecutorService ) has been removed. | A client program may be interrupted by NoSuchMethodError exception. +|=== + +* Public Audience + +The following tables describes the coprocessor changes. + +===== [−] class CoprocessorRpcChannel (1) +[cols="1,1", frame="all"] +|=== +| Change | Result +| This class has become interface.| A client program may be interrupted by IncompatibleClassChangeError or InstantiationError exception depending on the usage of this class. +|=== + +===== Class CoprocessorHost +Classes that were Audience Private but were removed. +[cols="1,1", frame="all"] +|=== +| Change | Result +| Type of field coprocessors has been changed from java.util.SortedSet to org.apache.hadoop.hbase.util.SortedList.| A client program may be interrupted by NoSuchFieldError exception. +|=== + + +==== MasterObserver +HBase 2.0 introduces following changes to the MasterObserver interface. + +===== [−] interface MasterObserver (14) +[cols="1,1", frame="all"] +|=== +| Change | Result +| Abstract method voidpostCloneSnapshot ( ObserverContext, HBaseProtos.SnapshotDescription, HTableDescriptor ) has been removed from this interface.| A client program may be interrupted by NoSuchMethodError exception. +| Abstract method voidpostCreateTable ( ObserverContext, HTableDescriptor, HRegionInfo[ ] ) has been removed from this interface.| A client program may be interrupted by NoSuchMethodErrorexception. +| Abstract method voidpostDeleteSnapshot (ObserverContext, HBaseProtos.SnapshotDescription ) has been removed from this interface.| A client program may be interrupted by NoSuchMethodErrorexception. +| Abstract method voidpostGetTableDescriptors ( ObserverContext, List ) has been removed from this interface.| A client program may be interrupted by NoSuchMethodErrorexception. +| Abstract method voidpostModifyTable ( ObserverContext, TableName, HTableDescriptor ) has been removed from this interface.| A client program may be interrupted by NoSuchMethodErrorexception. +| Abstract method voidpostRestoreSnapshot ( ObserverContext, HBaseProtos.SnapshotDescription, HTableDescriptor ) has been removed from this interface.| A client program may be interrupted by NoSuchMethodErrorexception. +| Abstract method voidpostSnapshot ( ObserverContext, HBaseProtos.SnapshotDescription, HTableDescriptor ) has been removed from this interface.| A client program may be interrupted by NoSuchMethodErrorexception. +| Abstract method voidpreCloneSnapshot ( ObserverContext, HBaseProtos.SnapshotDescription, HTableDescriptor ) has been removed from this interface.| A client program may be interrupted by NoSuchMethodErrorexception. +| Abstract method voidpreCreateTable ( ObserverContext, HTableDescriptor, HRegionInfo[ ] ) has been removed from this interface.| A client program may be interrupted by NoSuchMethodErrorexception. +| Abstract method voidpreDeleteSnapshot ( ObserverContext, HBaseProtos.SnapshotDescription ) has been removed from this interface.| A client program may be interrupted by NoSuchMethodErrorexception. +| Abstract method voidpreGetTableDescriptors ( ObserverContext, List, List ) has been removed from this interface.| A client program may be interrupted by NoSuchMethodErrorexception. +| Abstract method voidpreModifyTable ( ObserverContext, TableName, HTableDescriptor ) has been removed from this interface.| A client program may be interrupted by NoSuchMethodErrorexception. +| Abstract method voidpreRestoreSnapshot ( ObserverContext, HBaseProtos.SnapshotDescription, HTableDescriptor ) has been removed from this interface.| A client program may be interrupted by NoSuchMethodErrorexception. +| Abstract method voidpreSnapshot ( ObserverContext, HBaseProtos.SnapshotDescription, HTableDescriptor ) has been removed from this interface.| A client program may be interrupted by NoSuchMethodErrorexception. +|=== + +==== RegionObserver +HBase 2.0 introduces following changes to the RegionObserver interface. + +===== [−] interface RegionObserver (13) +[cols="1,1", frame="all"] +|=== +| Change | Result +| Abstract method voidpostCloseRegionOperation ( ObserverContext, HRegion.Operation ) has been removed from this interface.| A client program may be interrupted by NoSuchMethodError exception. +| Abstract method voidpostCompactSelection ( ObserverContext, Store, ImmutableList ) has been removed from this interface.| A client program may be interrupted by NoSuchMethodError exception. +| Abstract method voidpostCompactSelection ( ObserverContext, Store, ImmutableList, CompactionRequest ) has been removed from this interface.| A client program may be interrupted by NoSuchMethodError exception. +| Abstract method voidpostGetClosestRowBefore ( ObserverContext, byte[ ], byte[ ], Result ) has been removed from this interface.| A client program may be interrupted by NoSuchMethodError exception. +| Abstract method DeleteTrackerpostInstantiateDeleteTracker ( ObserverContext, DeleteTracker ) has been removed from this interface.| A client program may be interrupted by NoSuchMethodError exception. +| Abstract method voidpostSplit ( ObserverContext, HRegion, HRegion ) has been removed from this interface.| A client program may be interrupted by NoSuchMethodError exception. +| Abstract method voidpostStartRegionOperation ( ObserverContext, HRegion.Operation ) has been removed from this interface.| A client program may be interrupted by NoSuchMethodError exception. +| Abstract method StoreFile.ReaderpostStoreFileReaderOpen ( ObserverContext, FileSystem, Path, FSDataInputStreamWrapper, long, CacheConfig, Reference, StoreFile.Reader ) has been removed from this interface.| A client program may be interrupted by NoSuchMethodError exception. +| Abstract method voidpostWALRestore ( ObserverContext, HRegionInfo, HLogKey, WALEdit ) has been removed from this interface.| A client program may be interrupted by NoSuchMethodError exception. +| Abstract method InternalScannerpreFlushScannerOpen ( ObserverContext, Store, KeyValueScanner, InternalScanner ) has been removed from this interface.| A client program may be interrupted by NoSuchMethodError exception. +| Abstract method voidpreGetClosestRowBefore ( ObserverContext, byte[ ], byte[ ], Result ) has been removed from this interface.| A client program may be interrupted by NoSuchMethodError exception. +| Abstract method StoreFile.ReaderpreStoreFileReaderOpen ( ObserverContext, FileSystem, Path, FSDataInputStreamWrapper, long, CacheConfig, Reference, StoreFile.Reader ) has been removed from this interface.| A client program may be interrupted by NoSuchMethodError exception. +| Abstract method voidpreWALRestore ( ObserverContext, HRegionInfo, HLogKey, WALEdit ) has been removed from this interface.| A client program may be interrupted by NoSuchMethodError exception. +|=== + +==== WALObserver +HBase 2.0 introduces following changes to the WALObserver interface. + +====== [−] interface WALObserver +[cols="1,1", frame="all"] +|=== +| Change | Result +| Abstract method voidpostWALWrite ( ObserverContext, HRegionInfo, HLogKey, WALEdit ) has been removed from this interface.| A client program may be interrupted by NoSuchMethodError exception. +| Abstract method booleanpreWALWrite ( ObserverContext, HRegionInfo, HLogKey, WALEdit ) has been removed from this interface.| A client program may be interrupted by NoSuchMethodError exception. +|=== + +==== Miscellaneous +HBase 2.0 introduces changes to the following classes: + +hbase-server-1.0.0.jar, OnlineRegions.class package org.apache.hadoop.hbase.regionserver +[cols="1,1", frame="all"] +===== [−] OnlineRegions.getFromOnlineRegions ( String p1 ) [abstract] : HRegion +org/apache/hadoop/hbase/regionserver/OnlineRegions.getFromOnlineRegions:(Ljava/lang/String;)Lorg/apache/hadoop/hbase/regionserver/HRegion; +[cols="1,1", frame="all"] +|=== +| Change | Result +| Return value type has been changed from Region to Region.| This method has been removed because the return type is part of the method signature. A client program may be interrupted by NoSuchMethodError exception. +|=== + +hbase-server-1.0.0.jar, RegionCoprocessorEnvironment.class package org.apache.hadoop.hbase.coprocessor + +===== [−] RegionCoprocessorEnvironment.getRegion ( ) [abstract] : HRegion +org/apache/hadoop/hbase/coprocessor/RegionCoprocessorEnvironment.getRegion:()Lorg/apache/hadoop/hbase/regionserver/HRegion; +[cols="1,1", frame="all"] +|=== +| Change | Result +| Return value type has been changed from org.apache.hadoop.hbase.regionserver.HRegion to org.apache.hadoop.hbase.regionserver.Region.| This method has been removed because the return type is part of the method signature. A client program may be interrupted by NoSuchMethodError exception. +|=== + +hbase-server-1.0.0.jar, RegionCoprocessorHost.class package org.apache.hadoop.hbase.regionserver + +===== [−] RegionCoprocessorHost.postAppend ( Append append, Result result ) : void +org/apache/hadoop/hbase/regionserver/RegionCoprocessorHost.postAppend:(Lorg/apache/hadoop/hbase/client/Append;Lorg/apache/hadoop/hbase/client/Result;)V +[cols="1,1", frame="all"] +|=== +| Change | Result +| Return value type has been changed from void to org.apache.hadoop.hbase.client.Result.| This method has been removed because the return type is part of the method signature. A client program may be interrupted by NoSuchMethodError exception. +|=== + +===== [−] RegionCoprocessorHost.preStoreFileReaderOpen ( FileSystem fs, Path p, FSDataInputStreamWrapper in, long size,CacheConfig cacheConf, Reference r ) : StoreFile.Reader +org/apache/hadoop/hbase/regionserver/RegionCoprocessorHost.preStoreFileReaderOpen:(Lorg/apache/hadoop/fs/FileSystem;Lorg/apache/hadoop/fs/Path;Lorg/apache/hadoop/hbase/io/FSDataInputStreamWrapper;JLorg/apache/hadoop/hbase/io/hfile/CacheConfig;Lorg/apache/hadoop/hbase/io/Reference;)Lorg/apache/hadoop/hbase/regionserver/StoreFile$Reader; +[cols="1,1", frame="all"] +|=== +| Change | Result +| Return value type has been changed from StoreFile.Reader to StoreFileReader.| This method has been removed because the return type is part of the method signature. A client program may be interrupted by NoSuchMethodError exception. +|=== + +==== IPC +==== Scheduler changes: +1. Following methods became abstract: + +package org.apache.hadoop.hbase.ipc + +===== [−]class RpcScheduler (1) +[cols="1,1", frame="all"] +|=== +| Change | Result +| Abstract method void dispatch ( CallRunner ) has been removed from this class.| A client program may be interrupted by NoSuchMethodError exception. +|=== + +hbase-server-1.0.0.jar, RpcScheduler.class package org.apache.hadoop.hbase.ipc + +===== [−] RpcScheduler.dispatch ( CallRunner p1 ) [abstract] : void 1 +org/apache/hadoop/hbase/ipc/RpcScheduler.dispatch:(Lorg/apache/hadoop/hbase/ipc/CallRunner;)V +[cols="1,1", frame="all"] +|=== +| Change | Result +| Return value type has been changed from void to boolean.| This method has been removed because the return type is part of the method signature. A client program may be interrupted by NoSuchMethodError exception. +|=== + +2. Following abstract methods have been removed: + +===== [−]interface PriorityFunction (2) +[cols="1,1", frame="all"] +|=== +| Change | Result +| Abstract method longgetDeadline ( RPCProtos.RequestHeader, Message ) has been removed from this interface.| A client program may be interrupted by NoSuchMethodError exception. +| Abstract method int getPriority ( RPCProtos.RequestHeader, Message ) has been removed from this interface.| A client program may be interrupted by NoSuchMethodError exception. +|=== + +==== Server API changes: + +===== [−] class RpcServer (12) +[cols="1,1", frame="all"] +|=== +| Change | Result +| Type of field CurCall has been changed from java.lang.ThreadLocal to java.lang.ThreadLocal.| A client program may be interrupted by NoSuchFieldError exception. +| This class became abstract.| A client program may be interrupted by InstantiationError exception. +| Abstract method int getNumOpenConnections ( ) has been added to this class.| This class became abstract and a client program may be interrupted by InstantiationError exception. +| Field callQueueSize of type org.apache.hadoop.hbase.util.Counter has been removed from this class.| A client program may be interrupted by NoSuchFieldError exception. +| Field connectionList of type java.util.List has been removed from this class.| A client program may be interrupted by NoSuchFieldError exception. +| Field maxIdleTime of type int has been removed from this class.| A client program may be interrupted by NoSuchFieldError exception. +| Field numConnections of type int has been removed from this class.| A client program may be interrupted by NoSuchFieldError exception. +| Field port of type int has been removed from this class.| A client program may be interrupted by NoSuchFieldError exception. +| Field purgeTimeout of type long has been removed from this class.| A client program may be interrupted by NoSuchFieldError exception. +| Field responder of type RpcServer.Responder has been removed from this class.| A client program may be interrupted by NoSuchFieldError exception. +| Field socketSendBufferSize of type int has been removed from this class.| A client program may be interrupted by NoSuchFieldError exception. +| Field thresholdIdleConnections of type int has been removed from this class.| A client program may be interrupted by NoSuchFieldError exception. +|=== + +Following abstract method has been removed: +[cols="1,1", frame="all"] +|=== +| Change | Result +| Abstract method Paircall ( BlockingService, Descriptors.MethodDescriptor, Message, CellScanner, long, MonitoredRPCHandler ) has been removed from this interface.| A client program may be interrupted by NoSuchMethodError exception. +|=== + +==== Replication and WAL changes: +HBASE-18733: WALKey has been purged completely in HBase 2.0. +Following are the changes to the WALKey: + +===== [−] classWALKey (8) +[cols="1,1", frame="all"] +|=== +| Change | Result +| Access level of field clusterIds has been changed from protected to private.| A client program may be interrupted by IllegalAccessError exception. +| Access level of field compressionContext has been changed from protected to private.| A client program may be interrupted by IllegalAccessError exception. +| Access level of field encodedRegionName has been changed from protected to private.| A client program may be interrupted by IllegalAccessError exception. +| Access level of field tablename has been changed from protectedto private.| A client program may be interrupted by IllegalAccessError exception. +| Access level of field writeTime has been changed from protectedto private.| A client program may be interrupted by IllegalAccessError exception. +|=== + +Following fields have been removed: +[cols="1,1", frame="all"] +|=== +| Change | Result +| Field LOG of type org.apache.commons.logging.Log has been removed from this class.| A client program may be interrupted by NoSuchFieldError exception. +| Field VERSION of type WALKey.Version has been removed from this class.| A client program may be interrupted by NoSuchFieldError exception. +| Field logSeqNum of type long has been removed from this class.| A client program may be interrupted by NoSuchFieldError exception. +|=== + +Following are the changes to the WALEdit.class: +hbase-server-1.0.0.jar, WALEdit.class package org.apache.hadoop.hbase.regionserver.wal + +===== WALEdit.getCompaction ( Cell kv ) [static] : WALProtos.CompactionDescriptor (1) +org/apache/hadoop/hbase/regionserver/wal/WALEdit.getCompaction:(Lorg/apache/hadoop/hbase/Cell;)Lorg/apache/hadoop/hbase/protobuf/generated/WALProtos$CompactionDescriptor; +[cols="1,1", frame="all"] +|=== +| Change | Result +| Return value type has been changed from org.apache.hadoop.hbase.protobuf.generated.WALProtos.CompactionDescriptor to org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.CompactionDescriptor.| This method has been removed because the return type is part of the method signature. A client program may be interrupted by NoSuchMethodError exception. +|=== + +===== WALEdit.getFlushDescriptor ( Cell cell ) [static] : WALProtos.FlushDescriptor (1) +org/apache/hadoop/hbase/regionserver/wal/WALEdit.getFlushDescriptor:(Lorg/apache/hadoop/hbase/Cell;)Lorg/apache/hadoop/hbase/protobuf/generated/WALProtos$FlushDescriptor; +[cols="1,1", frame="all"] +|=== +| Change | Result +| Return value type has been changed from org.apache.hadoop.hbase.protobuf.generated.WALProtos.FlushDescriptor to org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.FlushDescriptor.| This method has been removed because the return type is part of the method signature. A client program may be interrupted by NoSuchMethodError exception. +|=== + +===== WALEdit.getRegionEventDescriptor ( Cell cell ) [static] : WALProtos.RegionEventDescriptor (1) +org/apache/hadoop/hbase/regionserver/wal/WALEdit.getRegionEventDescriptor:(Lorg/apache/hadoop/hbase/Cell;)Lorg/apache/hadoop/hbase/protobuf/generated/WALProtos$RegionEventDescriptor; +[cols="1,1", frame="all"] +|=== +| Change | Result +| Return value type has been changed from org.apache.hadoop.hbase.protobuf.generated.WALProtos.RegionEventDescriptor to org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.RegionEventDescriptor.| This method has been removed because the return type is part of the method signature. A client program may be interrupted by NoSuchMethodError exception. +|=== + +Following is the change to the WALKey.class: +package org.apache.hadoop.hbase.wal + +===== WALKey.getBuilder ( WALCellCodec.ByteStringCompressor compressor ) : WALProtos.WALKey.Builder 1 +org/apache/hadoop/hbase/wal/WALKey.getBuilder:(Lorg/apache/hadoop/hbase/regionserver/wal/WALCellCodec$ByteStringCompressor;)Lorg/apache/hadoop/hbase/protobuf/generated/WALProtos$WALKey$Builder; +[cols="1,1", frame="all"] +|=== +| Change | Result +| Return value type has been changed from org.apache.hadoop.hbase.protobuf.generated.WALProtos.WALKey.Builder to org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.WALKey.Builder.| This method has been removed because the return type is part of the method signature. A client program may be interrupted by NoSuchMethodError exception. +|=== + +==== Deprecated APIs or coprocessor: + +HBASE-16769 - PB references from MasterObserver and RegionServerObserver has been removed. + +==== Admin Interface API changes: +You cannot administer an HBase 2.0 cluster with an HBase 1.0 client that includes RelicationAdmin, ACC, Thrift and REST usage of Admin ops. Methods returning protobufs have been changed to return POJOs instead. pb is not used in the APIs anymore. Returns have changed from void to Future for async methods. +HBASE-18106 - Admin.listProcedures and Admin.listLocks were renamed to getProcedures and getLocks. +MapReduce makes use of Admin doing following admin.getClusterStatus() to calcluate Splits. + +* Thrift usage of Admin API: +compact(ByteBuffer) +createTable(ByteBuffer, List) +deleteTable(ByteBuffer) +disableTable(ByteBuffer) +enableTable(ByteBuffer) +getTableNames() +majorCompact(ByteBuffer) + +* REST usage of Admin API: +hbase-rest +org.apache.hadoop.hbase.rest +RootResource +getTableList() + TableName[] tableNames = servlet.getAdmin().listTableNames(); +SchemaResource +delete(UriInfo) + Admin admin = servlet.getAdmin(); +update(TableSchemaModel, boolean, UriInfo) + Admin admin = servlet.getAdmin(); +StorageClusterStatusResource +get(UriInfo) + ClusterStatus status = servlet.getAdmin().getClusterStatus(); +StorageClusterVersionResource +get(UriInfo) + model.setVersion(servlet.getAdmin().getClusterStatus().getHBaseVersion()); +TableResource +exists() + return servlet.getAdmin().tableExists(TableName.valueOf(table)); + +Following are the changes to the Admin interface: + +===== [−] interface Admin (9) +[cols="1,1", frame="all"] +|=== +| Change | Result +| Abstract method createTableAsync ( HTableDescriptor, byte[ ][ ] ) has been removed from this interface.| A client program may be interrupted by NoSuchMethodError exception. +| Abstract method disableTableAsync ( TableName ) has been removed from this interface.| A client program may be interrupted by NoSuchMethodError exception. +| Abstract method enableTableAsync ( TableName ) has been removed from this interface.| A client program may be interrupted by NoSuchMethodError exception. +| Abstract method getCompactionState ( TableName ) has been removed from this interface.| A client program may be interrupted by NoSuchMethodError exception. +| Abstract method getCompactionStateForRegion ( byte[ ] ) has been removed from this interface.| A client program may be interrupted by NoSuchMethodError exception. +| Abstract method isSnapshotFinished ( HBaseProtos.SnapshotDescription ) has been removed from this interface.| A client program may be interrupted by NoSuchMethodError exception. +| Abstract method snapshot ( String, TableName, HBaseProtos.SnapshotDescription.Type ) has been removed from this interface.| A client program may be interrupted by NoSuchMethodError exception. +| Abstract method snapshot ( HBaseProtos.SnapshotDescription ) has been removed from this interface.| A client program may be interrupted by NoSuchMethodError exception. +| Abstract method takeSnapshotAsync ( HBaseProtos.SnapshotDescription ) has been removed from this interface.| A client program may be interrupted by NoSuchMethodError exception. +|=== + +Following are the changes to the Admin.class: +hbase-client-1.0.0.jar, Admin.class package org.apache.hadoop.hbase.client + +===== [−] Admin.createTableAsync ( HTableDescriptor p1, byte[ ][ ] p2 ) [abstract] : void 1 +org/apache/hadoop/hbase/client/Admin.createTableAsync:(Lorg/apache/hadoop/hbase/HTableDescriptor;[[B)V +[cols="1,1", frame="all"] +|=== +| Change | Result +| Return value type has been changed from void to java.util.concurrent.Future.| This method has been removed because the return type is part of the method signature. A client program may be interrupted by NoSuchMethodError exception. +|=== + +===== [−] Admin.disableTableAsync ( TableName p1 ) [abstract] : void 1 +org/apache/hadoop/hbase/client/Admin.disableTableAsync:(Lorg/apache/hadoop/hbase/TableName;)V +[cols="1,1", frame="all"] +|=== +| Change | Result +| Return value type has been changed from void to java.util.concurrent.Future.| This method has been removed because the return type is part of the method signature. A client program may be interrupted by NoSuchMethodError exception. +|=== + +===== Admin.enableTableAsync ( TableName p1 ) [abstract] : void 1 +org/apache/hadoop/hbase/client/Admin.enableTableAsync:(Lorg/apache/hadoop/hbase/TableName;)V +[cols="1,1", frame="all"] +|=== +| Change | Result +| Return value type has been changed from void to java.util.concurrent.Future.| This method has been removed because the return type is part of the method signature. A client program may be interrupted by NoSuchMethodError exception. +|=== + +===== [−] Admin.getCompactionState ( TableName p1 ) [abstract] : AdminProtos.GetRegionInfoResponse.CompactionState 1 +org/apache/hadoop/hbase/client/Admin.getCompactionState:(Lorg/apache/hadoop/hbase/TableName;)Lorg/apache/hadoop/hbase/protobuf/generated/AdminProtos$GetRegionInfoResponse$CompactionState; +[cols="1,1", frame="all"] +|=== +| Change | Result +| Return value type has been changed from org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetRegionInfoResponse.CompactionState to CompactionState.| This method has been removed because the return type is part of the method signature. A client program may be interrupted by NoSuchMethodError exception. +|=== + +===== [−] Admin.getCompactionStateForRegion ( byte[ ] p1 ) [abstract] : AdminProtos.GetRegionInfoResponse.CompactionState 1 +org/apache/hadoop/hbase/client/Admin.getCompactionStateForRegion:([B)Lorg/apache/hadoop/hbase/protobuf/generated/AdminProtos$GetRegionInfoResponse$CompactionState; +[cols="1,1", frame="all"] +|=== +| Change | Result +| Return value type has been changed from org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetRegionInfoResponse.CompactionState to CompactionState.| This method has been removed because the return type is part of the method signature. A client program may be interrupted by NoSuchMethodError exception. +|=== + +==== HTableDescriptor and HColumnDescriptor changes +HTableDescriptor and HColumnDescriptor has become interfaces and you can create it through Builders. HCD has become CFD. It no longer implements writable interface. +package org.apache.hadoop.hbase + +===== [−] class HColumnDescriptor (1) +[cols="1,1", frame="all"] +|=== +| Change | Result +| Removed super-interface org.apache.hadoop.io.WritableComparable.| A client program may be interrupted by NoSuchMethodError exception. +|=== + +HColumnDescriptor in 1.0.0 +{code} +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class HColumnDescriptor implements WritableComparable { +{code} + +HColumnDescriptor in 2.0 +{code} +@InterfaceAudience.Public +@Deprecated // remove it in 3.0 +public class HColumnDescriptor implements ColumnFamilyDescriptor, Comparable { +{code} + +For META_TABLEDESC, the maker method had been deprecated already in HTD in 1.0.0. OWNER_KEY is still in HTD. + +===== class HTableDescriptor (3) +[cols="1,1", frame="all"] +|=== +| Change | Result +| Removed super-interface org.apache.hadoop.io.WritableComparable.| A client program may be interrupted by NoSuchMethodError exception. +| Field META_TABLEDESC of type HTableDescriptor has been removed from this class.| A client program may be interrupted by NoSuchFieldError exception. +|=== + +hbase-client-1.0.0.jar, HTableDescriptor.class package org.apache.hadoop.hbase + +===== [−] HTableDescriptor.getColumnFamilies ( ) : HColumnDescriptor[ ] (1) +org/apache/hadoop/hbase/HTableDescriptor.getColumnFamilies:()[Lorg/apache/hadoop/hbase/HColumnDescriptor; + +===== [−] class HColumnDescriptor (1) +[cols="1,1", frame="all"] +|=== +| Change | Result +| Return value type has been changed from HColumnDescriptor[]to client.ColumnFamilyDescriptor[].| This method has been removed because the return type is part of the method signature. A client program may be interrupted by NoSuchMethodError exception. +|=== + +===== [−] HTableDescriptor.getCoprocessors ( ) : List (1) +org/apache/hadoop/hbase/HTableDescriptor.getCoprocessors:()Ljava/util/List; +[cols="1,1", frame="all"] +|=== +| Change | Result +| Return value type has been changed from java.util.List to java.util.Collection.| This method has been removed because the return type is part of the method signature. A client program may be interrupted by NoSuchMethodError exception. +|=== + +* HBASE-12990 MetaScanner is removed and it is replaced by MetaTableAccessor. + +===== HTableWrapper changes: +hbase-server-1.0.0.jar, HTableWrapper.class package org.apache.hadoop.hbase.client + +===== [−] HTableWrapper.createWrapper ( List openTables, TableName tableName, CoprocessorHost.Environment env, ExecutorService pool ) [static] : HTableInterface 1 +org/apache/hadoop/hbase/client/HTableWrapper.createWrapper:(Ljava/util/List;Lorg/apache/hadoop/hbase/TableName;Lorg/apache/hadoop/hbase/coprocessor/CoprocessorHost$Environment;Ljava/util/concurrent/ExecutorService;)Lorg/apache/hadoop/hbase/client/HTableInterface; +[cols="1,1", frame="all"] +|=== +| Change | Result +| Return value type has been changed from HTableInterface to Table.| This method has been removed because the return type is part of the method signature. A client program may be interrupted by NoSuchMethodError exception. +|=== + +* HBASE-12586: Delete all public HTable constructors and delete ConnectionManager#{delete,get}Connection. +* HBASE-9117: Remove HTablePool and all HConnection pooling related APIs. +* HBASE-13214: Remove deprecated and unused methods from HTable class +Following are the changes to the Table interface: + +===== [−] interface Table (4) +[cols="1,1", frame="all"] +|=== +| Change | Result +| Abstract method batch ( List ) has been removed from this interface.| A client program may be interrupted by NoSuchMethodError exception. +| Abstract method batchCallback ( List, Batch.Callback )has been removed from this interface.| A client program may be interrupted by NoSuchMethodError exception. +| Abstract method getWriteBufferSize ( ) has been removed from this interface.| A client program may be interrupted by NoSuchMethodError exception. +| Abstract method setWriteBufferSize ( long ) has been removed from this interface.| A client program may be interrupted by NoSuchMethodError exception. +|=== + +==== Deprecated buffer methods in Table (in 1.0.1) and removed in 2.0.0 + +* HBASE-13298- Clarify if Table.{set|get}WriteBufferSize() is deprecated or not. + +* LockTimeoutException and OperationConflictException classes have been removed. + +==== class OperationConflictException (1) +[cols="1,1", frame="all"] +|=== +| Change | Result +| This class has been removed.| A client program may be interrupted by NoClassDefFoundErrorexception. +|=== + +==== class class LockTimeoutException (1) +[cols="1,1", frame="all"] +|=== +| Change | Result +| This class has been removed.| A client program may be interrupted by NoClassDefFoundErrorexception. +|=== + +==== Filter API changes: +Following methods have been removed: +package org.apache.hadoop.hbase.filter + +===== [−] class Filter (2) +|=== +| Change | Result +| Abstract method getNextKeyHint ( KeyValue ) has been removed from this class.|A client program may be interrupted by NoSuchMethodError exception. +| Abstract method transform ( KeyValue ) has been removed from this class.| A client program may be interrupted by NoSuchMethodError exception. +|=== + +* HBASE-12296 Filters should work with ByteBufferedCell. +* HConnection is removed in HBase 2.0. +* RegionLoad and ServerLoad internally moved to shaded PB. + +===== [−] class RegionLoad (1) +[cols="1,1", frame="all"] +|=== +| Change | Result +| Type of field regionLoadPB has been changed from protobuf.generated.ClusterStatusProtos.RegionLoad to shaded.protobuf.generated.ClusterStatusProtos.RegionLoad.|A client program may be interrupted by NoSuchFieldError exception. +|=== + +* HBASE-15783:AccessControlConstants#OP_ATTRIBUTE_ACL_STRATEGY_CELL_FIRST is not used any more. +package org.apache.hadoop.hbase.security.access + +===== [−] interface AccessControlConstants (3) +[cols="1,1", frame="all"] +|=== +| Change | Result +| Field OP_ATTRIBUTE_ACL_STRATEGY of type java.lang.Stringhas been removed from this interface.| A client program may be interrupted by NoSuchFieldError exception. +| Field OP_ATTRIBUTE_ACL_STRATEGY_CELL_FIRST of type byte[] has been removed from this interface.| A client program may be interrupted by NoSuchFieldError exception. +| Field OP_ATTRIBUTE_ACL_STRATEGY_DEFAULT of type byte[] has been removed from this interface.| A client program may be interrupted by NoSuchFieldError exception. +|=== + +===== ServerLoad returns long instead of int 1 +hbase-client-1.0.0.jar, ServerLoad.class package org.apache.hadoop.hbase + +===== [−] ServerLoad.getNumberOfRequests ( ) : int 1 +org/apache/hadoop/hbase/ServerLoad.getNumberOfRequests:()I +[cols="1,1", frame="all"] +|=== +| Change | Result +| Return value type has been changed from int to long.| This method has been removed because the return type is part of the method signature. A client program may be interrupted by NoSuchMethodError exception. +|=== + +===== [−] ServerLoad.getReadRequestsCount ( ) : int 1 +org/apache/hadoop/hbase/ServerLoad.getReadRequestsCount:()I +[cols="1,1", frame="all"] +|=== +| Change | Result +| Return value type has been changed from int to long.| This method has been removed because the return type is part of the method signature. A client program may be interrupted by NoSuchMethodError exception. +|=== + +===== [−] ServerLoad.getTotalNumberOfRequests ( ) : int 1 +org/apache/hadoop/hbase/ServerLoad.getTotalNumberOfRequests:()I +[cols="1,1", frame="all"] +|=== +| Change | Result +| Return value type has been changed from int to long.|This method has been removed because the return type is part of the method signature. A client program may be interrupted by NoSuchMethodError exception. +|=== + +===== [−]ServerLoad.getWriteRequestsCount ( ) : int 1 +org/apache/hadoop/hbase/ServerLoad.getWriteRequestsCount:()I +[cols="1,1", frame="all"] +|=== +| Change | Result +| Return value type has been changed from int to long.| This method has been removed because the return type is part of the method signature. A client program may be interrupted by NoSuchMethodError exception. +|=== + +* HBASE-13636 Remove deprecation for HBASE-4072 (Reading of zoo.cfg) +* HConstants are removed. HBASE-16040 Remove configuration "hbase.replication" + +===== [−]class HConstants (6) +[cols="1,1", frame="all"] +|=== +| Change | Result +| Field DEFAULT_HBASE_CONFIG_READ_ZOOKEEPER_CONFIG of type boolean has been removed from this class.| A client program may be interrupted by NoSuchFieldError exception. +| Field HBASE_CONFIG_READ_ZOOKEEPER_CONFIG of type java.lang.String has been removed from this class.| A client program may be interrupted by NoSuchFieldError exception. +| Field REPLICATION_ENABLE_DEFAULT of type boolean has been removed from this class.| A client program may be interrupted by NoSuchFieldError exception. +| Field REPLICATION_ENABLE_KEY of type java.lang.String has been removed from this class.| A client program may be interrupted by NoSuchFieldError exception. +| Field ZOOKEEPER_CONFIG_NAME of type java.lang.String has been removed from this class.| A client program may be interrupted by NoSuchFieldError exception. +| Field ZOOKEEPER_USEMULTI of type java.lang.String has been removed from this class.| A client program may be interrupted by NoSuchFieldError exception. +|=== + +* HBASE-18732: [compat 1-2] HBASE-14047 removed Cell methods without deprecation cycle. + +===== [−]interface Cell 5 +[cols="1,1", frame="all"] +|=== +| Change | Result +| Abstract method getFamily ( ) has been removed from this interface.| A client program may be interrupted by NoSuchMethodError exception. +| Abstract method getMvccVersion ( ) has been removed from this interface.| A client program may be interrupted by NoSuchMethodError exception. +| Abstract method getQualifier ( ) has been removed from this interface.| A client program may be interrupted by NoSuchMethodError exception. +| Abstract method getRow ( ) has been removed from this interface.| A client program may be interrupted by NoSuchMethodError exception. +| Abstract method getValue ( ) has been removed from this interface.| A client program may be interrupted by NoSuchMethodError exception. +|=== + +* HBASE-18795:Expose KeyValue.getBuffer() for tests alone. Allows KV#getBuffer in tests only that was deprecated previously. + +==== Region scanner changes: +===== [−]interface RegionScanner (1) +[cols="1,1", frame="all"] +|=== +| Change | Result +| Abstract method boolean nextRaw ( List, int ) has been removed from this interface.| A client program may be interrupted by NoSuchMethodError exception. +|=== + +==== StoreFile changes: +===== [−] class StoreFile (1) +[cols="1,1", frame="all"] +|=== +| Change | Result +| This class became interface.| A client program may be interrupted by IncompatibleClassChangeError or InstantiationError exception dependent on the usage of this class. +|=== + +==== Mapreduce changes: +HFile*Format has been removed in HBase 2.0. + +==== ClusterStatus changes: +HBASE-15843: Replace RegionState.getRegionInTransition() Map with a Set +hbase-client-1.0.0.jar, ClusterStatus.class package org.apache.hadoop.hbase + +===== [−] ClusterStatus.getRegionsInTransition ( ) : Map 1 +org/apache/hadoop/hbase/ClusterStatus.getRegionsInTransition:()Ljava/util/Map; +[cols="1,1", frame="all"] +|=== +| Change | Result +|Return value type has been changed from java.util.Map to java.util.List.| This method has been removed because the return type is part of the method signature. A client program may be interrupted by NoSuchMethodError exception. +|=== +Other changes in ClusterStatus include removal of convert methods that were no longer necessary after purge of PB from API. + +==== Purge of PBs from API +PBs have been deprecated in APIs in HBase 2.0. + +===== [−] HBaseSnapshotException.getSnapshotDescription ( ) : HBaseProtos.SnapshotDescription 1 +org/apache/hadoop/hbase/snapshot/HBaseSnapshotException.getSnapshotDescription:()Lorg/apache/hadoop/hbase/protobuf/generated/HBaseProtos$SnapshotDescription; +[cols="1,1", frame="all"] +|=== +| Change | Result +| Return value type has been changed from org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription to org.apache.hadoop.hbase.client.SnapshotDescription.| This method has been removed because the return type is part of the method signature. A client program may be interrupted by NoSuchMethodError exception. +|=== + +* HBASE-15609: Remove PB references from Result, DoubleColumnInterpreter and any such public facing class for 2.0. +hbase-client-1.0.0.jar, Result.class package org.apache.hadoop.hbase.client + +===== [−] Result.getStats ( ) : ClientProtos.RegionLoadStats 1 +org/apache/hadoop/hbase/client/Result.getStats:()Lorg/apache/hadoop/hbase/protobuf/generated/ClientProtos$RegionLoadStats; +[cols="1,1", frame="all"] +|=== +| Change | Result +| Return value type has been changed from org.apache.hadoop.hbase.protobuf.generated.ClientProtos.RegionLoadStats to RegionLoadStats.| This method has been removed because the return type is part of the method signature. A client program may be interrupted by NoSuchMethodError exception. +|=== + +==== REST changes: +hbase-rest-1.0.0.jar, Client.class package org.apache.hadoop.hbase.rest.client + +===== [−] Client.getHttpClient ( ) : HttpClient 1 +org/apache/hadoop/hbase/rest/client/Client.getHttpClient:()Lorg/apache/commons/httpclient/HttpClient +[cols="1,1", frame="all"] +|=== +| Change | Result +| Return value type has been changed from org.apache.commons.httpclient.HttpClient to org.apache.http.client.HttpClient.| This method has been removed because the return type is part of the method signature. A client program may be interrupted by NoSuchMethodError exception. +|=== + +hbase-rest-1.0.0.jar, Response.class package org.apache.hadoop.hbase.rest.client + +===== [−] Response.getHeaders ( ) : Header[ ] 1 +org/apache/hadoop/hbase/rest/client/Response.getHeaders:()[Lorg/apache/commons/httpclient/Header; +[cols="1,1", frame="all"] +|=== +| Change | Result +| Return value type has been changed from org.apache.commons.httpclient.Header[] to org.apache.http.Header[].| This method has been removed because the return type is part of the method signature. A client program may be interrupted by NoSuchMethodError exception. +|=== + +==== PrettyPrinter changes: +hbase-server-1.0.0.jar, HFilePrettyPrinter.class package org.apache.hadoop.hbase.io.hfile + +===== [−]HFilePrettyPrinter.processFile ( Path file ) : void 1 +org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.processFile:(Lorg/apache/hadoop/fs/Path;)V +[cols="1,1", frame="all"] +|=== +| Change | Result +| Return value type has been changed from void to int.| This method has been removed because the return type is part of the method signature. A client program may be interrupted by NoSuchMethodError exception. +|=== + +==== AccessControlClient changes: +HBASE-13171 Change AccessControlClient methods to accept connection object to reduce setup time. Parameters have been changed in the following methods: + +* hbase-client-1.2.7-SNAPSHOT.jar, AccessControlClient.class +package org.apache.hadoop.hbase.security.access +AccessControlClient.getUserPermissions ( Configuration conf, String tableRegex ) [static] : List *DEPRECATED* +org/apache/hadoop/hbase/security/access/AccessControlClient.getUserPermissions:(Lorg/apache/hadoop/conf/Configuration;Ljava/lang/String;)Ljava/util/List; + +* AccessControlClient.grant ( Configuration conf, String namespace, String userName, Permission.Action... actions )[static] : void *DEPRECATED* +org/apache/hadoop/hbase/security/access/AccessControlClient.grant:(Lorg/apache/hadoop/conf/Configuration;Ljava/lang/String;Ljava/lang/String;[Lorg/apache/hadoop/hbase/security/access/Permission$Action;)V + +* AccessControlClient.grant ( Configuration conf, String userName, Permission.Action... actions ) [static] : void *DEPRECATED* +org/apache/hadoop/hbase/security/access/AccessControlClient.grant:(Lorg/apache/hadoop/conf/Configuration;Ljava/lang/String;[Lorg/apache/hadoop/hbase/security/access/Permission$Action;)V + +* AccessControlClient.grant ( Configuration conf, TableName tableName, String userName, byte[ ] family, byte[ ] qual,Permission.Action... actions ) [static] : void *DEPRECATED* +org/apache/hadoop/hbase/security/access/AccessControlClient.grant:(Lorg/apache/hadoop/conf/Configuration;Lorg/apache/hadoop/hbase/TableName;Ljava/lang/String;[B[B[Lorg/apache/hadoop/hbase/security/access/Permission$Action;)V + +* AccessControlClient.isAccessControllerRunning ( Configuration conf ) [static] : boolean *DEPRECATED* +org/apache/hadoop/hbase/security/access/AccessControlClient.isAccessControllerRunning:(Lorg/apache/hadoop/conf/Configuration;)Z + +* AccessControlClient.revoke ( Configuration conf, String namespace, String userName, Permission.Action... actions )[static] : void *DEPRECATED* +org/apache/hadoop/hbase/security/access/AccessControlClient.revoke:(Lorg/apache/hadoop/conf/Configuration;Ljava/lang/String;Ljava/lang/String;[Lorg/apache/hadoop/hbase/security/access/Permission$Action;)V + +* AccessControlClient.revoke ( Configuration conf, String userName, Permission.Action... actions ) [static] : void *DEPRECATED* +org/apache/hadoop/hbase/security/access/AccessControlClient.revoke:(Lorg/apache/hadoop/conf/Configuration;Ljava/lang/String;[Lorg/apache/hadoop/hbase/security/access/Permission$Action;)V + +* AccessControlClient.revoke ( Configuration conf, TableName tableName, String username, byte[ ] family, byte[ ] qualifier,Permission.Action... actions ) [static] : void *DEPRECATED* +org/apache/hadoop/hbase/security/access/AccessControlClient.revoke:(Lorg/apache/hadoop/conf/Configuration;Lorg/apache/hadoop/hbase/TableName;Ljava/lang/String;[B[B[Lorg/apache/hadoop/hbase/security/access/Permission$Action;)V +* HBASE-18731: [compat 1-2] Mark protected methods of QuotaSettings that touch Protobuf internals as IA.Private diff --git a/src/main/asciidoc/_chapters/appendix_hfile_format.adoc b/src/main/asciidoc/_chapters/appendix_hfile_format.adoc index ba824999629..0f37beb3c88 100644 --- a/src/main/asciidoc/_chapters/appendix_hfile_format.adoc +++ b/src/main/asciidoc/_chapters/appendix_hfile_format.adoc @@ -94,7 +94,7 @@ The version of HBase introducing the above features reads both version 1 and 2 H A version 2 HFile is structured as follows: .HFile Version 2 Structure -image:hfilev2.png[HFile Version 2] +image::hfilev2.png[HFile Version 2] ==== Unified version 2 block format diff --git a/src/main/asciidoc/_chapters/architecture.adoc b/src/main/asciidoc/_chapters/architecture.adoc index 6d362c7537f..19a700a1207 100644 --- a/src/main/asciidoc/_chapters/architecture.adoc +++ b/src/main/asciidoc/_chapters/architecture.adoc @@ -643,44 +643,34 @@ Documentation will eventually move to this reference guide, but the blog is the [[block.cache]] === Block Cache -HBase provides two different BlockCache implementations: the default on-heap `LruBlockCache` and the `BucketCache`, which is (usually) off-heap. -This section discusses benefits and drawbacks of each implementation, how to choose the appropriate option, and configuration options for each. +HBase provides two different BlockCache implementations to cache data read from HDFS: +the default on-heap `LruBlockCache` and the `BucketCache`, which is (usually) off-heap. +This section discusses benefits and drawbacks of each implementation, how to choose the +appropriate option, and configuration options for each. .Block Cache Reporting: UI [NOTE] ==== See the RegionServer UI for detail on caching deploy. -Since HBase 0.98.4, the Block Cache detail has been significantly extended showing configurations, sizings, current usage, time-in-the-cache, and even detail on block counts and types. +See configurations, sizings, current usage, time-in-the-cache, and even detail on block counts and types. ==== ==== Cache Choices -`LruBlockCache` is the original implementation, and is entirely within the Java heap. `BucketCache` is mainly intended for keeping block cache data off-heap, although `BucketCache` can also keep data on-heap and serve from a file-backed cache. +`LruBlockCache` is the original implementation, and is entirely within the Java heap. +`BucketCache` is optional and mainly intended for keeping block cache data off-heap, although `BucketCache` can also be a file-backed cache. -.BucketCache is production ready as of HBase 0.98.6 -[NOTE] -==== -To run with BucketCache, you need HBASE-11678. -This was included in 0.98.6. -==== - -Fetching will always be slower when fetching from BucketCache, as compared to the native on-heap LruBlockCache. -However, latencies tend to be less erratic across time, because there is less garbage collection when you use BucketCache since it is managing BlockCache allocations, not the GC. -If the BucketCache is deployed in off-heap mode, this memory is not managed by the GC at all. -This is why you'd use BucketCache, so your latencies are less erratic and to mitigate GCs and heap fragmentation. -See Nick Dimiduk's link:http://www.n10k.com/blog/blockcache-101/[BlockCache 101] for comparisons running on-heap vs off-heap tests. -Also see link:https://people.apache.org/~stack/bc/[Comparing BlockCache Deploys] which finds that if your dataset fits inside your LruBlockCache deploy, use it otherwise if you are experiencing cache churn (or you want your cache to exist beyond the vagaries of java GC), use BucketCache. - -When you enable BucketCache, you are enabling a two tier caching system, an L1 cache which is implemented by an instance of LruBlockCache and an off-heap L2 cache which is implemented by BucketCache. +When you enable BucketCache, you are enabling a two tier caching system. We used to describe the +tiers as "L1" and "L2" but have deprecated this terminology as of hbase-2.0.0. The "L1" cache referred to an +instance of LruBlockCache and "L2" to an off-heap BucketCache. Instead, when BucketCache is enabled, +all DATA blocks are kept in the BucketCache tier and meta blocks -- INDEX and BLOOM blocks -- are on-heap in the `LruBlockCache`. Management of these two tiers and the policy that dictates how blocks move between them is done by `CombinedBlockCache`. -It keeps all DATA blocks in the L2 BucketCache and meta blocks -- INDEX and BLOOM blocks -- on-heap in the L1 `LruBlockCache`. -See <> for more detail on going off-heap. [[cache.configurations]] ==== General Cache Configurations Apart from the cache implementation itself, you can set some general configuration options to control how the cache performs. -See https://hbase.apache.org/devapidocs/org/apache/hadoop/hbase/io/hfile/CacheConfig.html. +See link:https://hbase.apache.org/devapidocs/org/apache/hadoop/hbase/io/hfile/CacheConfig.html[CacheConfig]. After setting any of these options, restart or rolling restart your cluster for the configuration to take effect. Check logs for errors or unexpected behavior. @@ -729,13 +719,13 @@ The way to calculate how much memory is available in HBase for caching is: number of region servers * heap size * hfile.block.cache.size * 0.99 ---- -The default value for the block cache is 0.25 which represents 25% of the available heap. +The default value for the block cache is 0.4 which represents 40% of the available heap. The last value (99%) is the default acceptable loading factor in the LRU cache after which eviction is started. The reason it is included in this equation is that it would be unrealistic to say that it is possible to use 100% of the available memory since this would make the process blocking from the point where it loads new blocks. Here are some examples: -* One region server with the heap size set to 1 GB and the default block cache size will have 253 MB of block cache available. -* 20 region servers with the heap size set to 8 GB and a default block cache size will have 39.6 of block cache. +* One region server with the heap size set to 1 GB and the default block cache size will have 405 MB of block cache available. +* 20 region servers with the heap size set to 8 GB and a default block cache size will have 63.3 of block cache. * 100 region servers with the heap size set to 24 GB and a block cache size of 0.5 will have about 1.16 TB of block cache. Your data is not the only resident of the block cache. @@ -789,32 +779,59 @@ Since link:https://issues.apache.org/jira/browse/HBASE-4683[HBASE-4683 Always ca [[enable.bucketcache]] ===== How to Enable BucketCache -The usual deploy of BucketCache is via a managing class that sets up two caching tiers: an L1 on-heap cache implemented by LruBlockCache and a second L2 cache implemented with BucketCache. +The usual deploy of BucketCache is via a managing class that sets up two caching tiers: +an on-heap cache implemented by LruBlockCache and a second cache implemented with BucketCache. The managing class is link:https://hbase.apache.org/devapidocs/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.html[CombinedBlockCache] by default. The previous link describes the caching 'policy' implemented by CombinedBlockCache. -In short, it works by keeping meta blocks -- INDEX and BLOOM in the L1, on-heap LruBlockCache tier -- and DATA blocks are kept in the L2, BucketCache tier. -It is possible to amend this behavior in HBase since version 1.0 and ask that a column family have both its meta and DATA blocks hosted on-heap in the L1 tier by setting `cacheDataInL1` via `(HColumnDescriptor.setCacheDataInL1(true)` or in the shell, creating or amending column families setting `CACHE_DATA_IN_L1` to true: e.g. +In short, it works by keeping meta blocks -- INDEX and BLOOM in the on-heap LruBlockCache tier -- and DATA blocks are kept in the BucketCache tier. + +==== +Pre-hbase-2.0.0 versions:: +Fetching will always be slower when fetching from BucketCache in pre-hbase-2.0.0, +as compared to the native on-heap LruBlockCache. However, latencies tend to be less +erratic across time, because there is less garbage collection when you use BucketCache since it is managing BlockCache allocations, not the GC. +If the BucketCache is deployed in off-heap mode, this memory is not managed by the GC at all. +This is why you'd use BucketCache in pre-2.0.0, so your latencies are less erratic, +to mitigate GCs and heap fragmentation, and so you can safely use more memory. +See Nick Dimiduk's link:http://www.n10k.com/blog/blockcache-101/[BlockCache 101] for comparisons running on-heap vs off-heap tests. +Also see link:https://people.apache.org/~stack/bc/[Comparing BlockCache Deploys] which finds that if your dataset fits inside your LruBlockCache deploy, use it otherwise if you are experiencing cache churn (or you want your cache to exist beyond the vagaries of java GC), use BucketCache. ++ +In pre-2.0.0, +one can configure the BucketCache so it receives the `victim` of an LruBlockCache eviction. +All Data and index blocks are cached in L1 first. When eviction happens from L1, the blocks (or `victims`) will get moved to L2. +Set `cacheDataInL1` via `(HColumnDescriptor.setCacheDataInL1(true)` or in the shell, creating or amending column families setting `CACHE_DATA_IN_L1` to true: e.g. [source] ---- hbase(main):003:0> create 't', {NAME => 't', CONFIGURATION => {CACHE_DATA_IN_L1 => 'true'}} ---- -The BucketCache Block Cache can be deployed on-heap, off-heap, or file based. -You set which via the `hbase.bucketcache.ioengine` setting. -Setting it to `heap` will have BucketCache deployed inside the allocated Java heap. -Setting it to `offheap` will have BucketCache make its allocations off-heap, and an ioengine setting of `file:PATH_TO_FILE` will direct BucketCache to use a file caching (Useful in particular if you have some fast I/O attached to the box such as SSDs). +hbase-2.0.0+ versions:: +HBASE-11425 changed the HBase read path so it could hold the read-data off-heap avoiding copying of cached data on to the java heap. +See <>. In hbase-2.0.0, off-heap latencies approach those of on-heap cache latencies with the added +benefit of NOT provoking GC. ++ +From HBase 2.0.0 onwards, the notions of L1 and L2 have been deprecated. When BucketCache is turned on, the DATA blocks will always go to BucketCache and INDEX/BLOOM blocks go to on heap LRUBlockCache. `cacheDataInL1` support hase been removed. +==== -It is possible to deploy an L1+L2 setup where we bypass the CombinedBlockCache policy and have BucketCache working as a strict L2 cache to the L1 LruBlockCache. -For such a setup, set `CacheConfig.BUCKET_CACHE_COMBINED_KEY` to `false`. +The BucketCache Block Cache can be deployed _off-heap_, _file_ or _mmaped_ file mode. + + +You set which via the `hbase.bucketcache.ioengine` setting. +Setting it to `offheap` will have BucketCache make its allocations off-heap, and an ioengine setting of `file:PATH_TO_FILE` will direct BucketCache to use file caching (Useful in particular if you have some fast I/O attached to the box such as SSDs). From 2.0.0, it is possible to have more than one file backing the BucketCache. This is very useful specially when the Cache size requirement is high. For multiple backing files, configure ioengine as `files:PATH_TO_FILE1,PATH_TO_FILE2,PATH_TO_FILE3`. BucketCache can be configured to use an mmapped file also. Configure ioengine as `mmap:PATH_TO_FILE` for this. + +It is possible to deploy a tiered setup where we bypass the CombinedBlockCache policy and have BucketCache working as a strict L2 cache to the L1 LruBlockCache. +For such a setup, set `hbase.bucketcache.combinedcache.enabled` to `false`. In this mode, on eviction from L1, blocks go to L2. When a block is cached, it is cached first in L1. When we go to look for a cached block, we look first in L1 and if none found, then search L2. Let us call this deploy format, _Raw L1+L2_. +NOTE: This L1+L2 mode is removed from 2.0.0. When BucketCache is used, it will be strictly the DATA cache and the LruBlockCache will cache INDEX/META blocks. Other BucketCache configs include: specifying a location to persist cache to across restarts, how many threads to use writing the cache, etc. See the link:https://hbase.apache.org/devapidocs/org/apache/hadoop/hbase/io/hfile/CacheConfig.html[CacheConfig.html] class for configuration options and descriptions. - +To check it enabled, look for the log line describing cache setup; it will detail how BucketCache has been deployed. +Also see the UI. It will detail the cache tiering and their configuration. ====== BucketCache Example Configuration This sample provides a configuration for a 4 GB off-heap BucketCache with a 1 GB on-heap cache. @@ -876,9 +893,10 @@ The following example configures buckets of size 4096 and 8192. [NOTE] ==== The default maximum direct memory varies by JVM. -Traditionally it is 64M or some relation to allocated heap size (-Xmx) or no limit at all (JDK7 apparently). HBase servers use direct memory, in particular short-circuit reading, the hosted DFSClient will allocate direct memory buffers. +Traditionally it is 64M or some relation to allocated heap size (-Xmx) or no limit at all (JDK7 apparently). HBase servers use direct memory, in particular short-circuit reading (See <>), the hosted DFSClient will allocate direct memory buffers. How much the DFSClient uses is not easy to quantify; it is the number of open HFiles * `hbase.dfs.client.read.shortcircuit.buffer.size` where `hbase.dfs.client.read.shortcircuit.buffer.size` is set to 128k in HBase -- see _hbase-default.xml_ default configurations. If you do off-heap block caching, you'll be making use of direct memory. -Starting your JVM, make sure the `-XX:MaxDirectMemorySize` setting in _conf/hbase-env.sh_ is set to some value that is higher than what you have allocated to your off-heap BlockCache (`hbase.bucketcache.size`). It should be larger than your off-heap block cache and then some for DFSClient usage (How much the DFSClient uses is not easy to quantify; it is the number of open HFiles * `hbase.dfs.client.read.shortcircuit.buffer.size` where `hbase.dfs.client.read.shortcircuit.buffer.size` is set to 128k in HBase -- see _hbase-default.xml_ default configurations). Direct memory, which is part of the Java process heap, is separate from the object heap allocated by -Xmx. +The RPCServer uses a ByteBuffer pool. From 2.0.0, these buffers are off-heap ByteBuffers. +Starting your JVM, make sure the `-XX:MaxDirectMemorySize` setting in _conf/hbase-env.sh_ considers off-heap BlockCache (`hbase.bucketcache.size`), DFSClient usage, RPC side ByteBufferPool max size. This has to be bit higher than sum of off heap BlockCache size and max ByteBufferPool size. Allocating an extra of 1-2 GB for the max direct memory size has worked in tests. Direct memory, which is part of the Java process heap, is separate from the object heap allocated by -Xmx. The value allocated by `MaxDirectMemorySize` must not exceed physical RAM, and is likely to be less than the total available RAM due to other memory requirements and system constraints. You can see how much memory -- on-heap and off-heap/direct -- a RegionServer is configured to use and how much it is using at any one time by looking at the _Server Metrics: Memory_ tab in the UI. @@ -898,7 +916,7 @@ If the deploy was using CombinedBlockCache, then the LruBlockCache L1 size was c where size-of-bucket-cache itself is EITHER the value of the configuration `hbase.bucketcache.size` IF it was specified as Megabytes OR `hbase.bucketcache.size` * `-XX:MaxDirectMemorySize` if `hbase.bucketcache.size` is between 0 and 1.0. In 1.0, it should be more straight-forward. -L1 LruBlockCache size is set as a fraction of java heap using `hfile.block.cache.size setting` (not the best name) and L2 is set as above either in absolute Megabytes or as a fraction of allocated maximum direct memory. +Onheap LruBlockCache size is set as a fraction of java heap using `hfile.block.cache.size setting` (not the best name) and BucketCache is set as above in absolute Megabytes. ==== ==== Compressed BlockCache @@ -911,6 +929,54 @@ For a RegionServer hosting data that can comfortably fit into cache, or if your The compressed BlockCache is disabled by default. To enable it, set `hbase.block.data.cachecompressed` to `true` in _hbase-site.xml_ on all RegionServers. +[[regionserver.offheap]] +=== RegionServer Offheap Read/Write Path + +[[regionserver.offheap.readpath]] +==== Offheap read-path +In hbase-2.0.0, link:https://issues.apache.org/jira/browse/HBASE-11425[HBASE-11425] changed the HBase read path so it +could hold the read-data off-heap avoiding copying of cached data on to the java heap. +This reduces GC pauses given there is less garbage made and so less to clear. The off-heap read path has a performance +that is similar/better to that of the on-heap LRU cache. This feature is available since HBase 2.0.0. +If the BucketCache is in `file` mode, fetching will always be slower compared to the native on-heap LruBlockCache. +Refer to below blogs for more details and test results on off heaped read path +link:https://blogs.apache.org/hbase/entry/offheaping_the_read_path_in[Offheaping the Read Path in Apache HBase: Part 1 of 2] +and link:https://blogs.apache.org/hbase/entry/offheap-read-path-in-production[Offheap Read-Path in Production - The Alibaba story] + +For an end-to-end off-heaped read-path, first of all there should be an off-heap backed <>(BC). Configure 'hbase.bucketcache.ioengine' to off-heap in +_hbase-site.xml_. Also specify the total capacity of the BC using `hbase.bucketcache.size` config. Please remember to adjust value of 'HBASE_OFFHEAPSIZE' in +_hbase-env.sh_. This is how we specify the max possible off-heap memory allocation for the +RegionServer java process. This should be bigger than the off-heap BC size. Please keep in mind that there is no default for `hbase.bucketcache.ioengine` +which means the BC is turned OFF by default (See <>). + +Next thing to tune is the ByteBuffer pool on the RPC server side. +The buffers from this pool will be used to accumulate the cell bytes and create a result cell block to send back to the client side. +`hbase.ipc.server.reservoir.enabled` can be used to turn this pool ON or OFF. By default this pool is ON and available. HBase will create off heap ByteBuffers +and pool them. Please make sure not to turn this OFF if you want end-to-end off-heaping in read path. +If this pool is turned off, the server will create temp buffers on heap to accumulate the cell bytes and make a result cell block. This can impact the GC on a highly read loaded server. +The user can tune this pool with respect to how many buffers are in the pool and what should be the size of each ByteBuffer. +Use the config `hbase.ipc.server.reservoir.initial.buffer.size` to tune each of the buffer sizes. Default is 64 KB. + +When the read pattern is a random row read load and each of the rows are smaller in size compared to this 64 KB, try reducing this. +When the result size is larger than one ByteBuffer size, the server will try to grab more than one buffer and make a result cell block out of these. When the pool is running out of buffers, the server will end up creating temporary on-heap buffers. + +The maximum number of ByteBuffers in the pool can be tuned using the config 'hbase.ipc.server.reservoir.initial.max'. Its value defaults to 64 * region server handlers configured (See the config 'hbase.regionserver.handler.count'). The math is such that by default we consider 2 MB as the result cell block size per read result and each handler will be handling a read. For 2 MB size, we need 32 buffers each of size 64 KB (See default buffer size in pool). So per handler 32 ByteBuffers(BB). We allocate twice this size as the max BBs count such that one handler can be creating the response and handing it to the RPC Responder thread and then handling a new request creating a new response cell block (using pooled buffers). Even if the responder could not send back the first TCP reply immediately, our count should allow that we should still have enough buffers in our pool without having to make temporary buffers on the heap. Again for smaller sized random row reads, tune this max count. There are lazily created buffers and the count is the max count to be pooled. + +If you still see GC issues even after making end-to-end read path off-heap, look for issues in the appropriate buffer pool. Check the below RegionServer log with INFO level: +[source] +---- +Pool already reached its max capacity : XXX and no free buffers now. Consider increasing the value for 'hbase.ipc.server.reservoir.initial.max' ? +---- + +The setting for _HBASE_OFFHEAPSIZE_ in _hbase-env.sh_ should consider this off heap buffer pool at the RPC side also. We need to config this max off heap size for the RegionServer as a bit higher than the sum of this max pool size and the off heap cache size. The TCP layer will also need to create direct bytebuffers for TCP communication. Also the DFS client will need some off-heap to do its workings especially if short-circuit reads are configured. Allocating an extra of 1 - 2 GB for the max direct memory size has worked in tests. + +If you are using co processors and refer the Cells in the read results, DO NOT store reference to these Cells out of the scope of the CP hook methods. Some times the CPs need store info about the cell (Like its row key) for considering in the next CP hook call etc. For such cases, pls clone the required fields of the entire Cell as per the use cases. [ See CellUtil#cloneXXX(Cell) APIs ] + +[[regionserver.offheap.writepath]] +==== Offheap write-path + +TODO + [[regionserver_splitting_implementation]] === RegionServer Splitting Implementation @@ -951,8 +1017,11 @@ However, if a RegionServer crashes or becomes unavailable before the MemStore is If writing to the WAL fails, the entire operation to modify the data fails. HBase uses an implementation of the link:https://hbase.apache.org/devapidocs/org/apache/hadoop/hbase/wal/WAL.html[WAL] interface. -Usually, there is only one instance of a WAL per RegionServer. -The RegionServer records Puts and Deletes to it, before recording them to the <> for the affected <>. +Usually, there is only one instance of a WAL per RegionServer. An exception +is the RegionServer that is carrying _hbase:meta_; the _meta_ table gets its +own dedicated WAL. +The RegionServer records Puts and Deletes to its WAL, before recording them +these Mutations <> for the affected <>. .The HLog [NOTE] @@ -962,9 +1031,33 @@ In 0.94, HLog was the name of the implementation of the WAL. You will likely find references to the HLog in documentation tailored to these older versions. ==== -The WAL resides in HDFS in the _/hbase/WALs/_ directory (prior to HBase 0.94, they were stored in _/hbase/.logs/_), with subdirectories per region. +The WAL resides in HDFS in the _/hbase/WALs/_ directory, with subdirectories per region. -For more general information about the concept of write ahead logs, see the Wikipedia link:http://en.wikipedia.org/wiki/Write-ahead_logging[Write-Ahead Log] article. +For more general information about the concept of write ahead logs, see the Wikipedia +link:http://en.wikipedia.org/wiki/Write-ahead_logging[Write-Ahead Log] article. + + +[[wal.providers]] +==== WAL Providers +In HBase, there are a number of WAL imlementations (or 'Providers'). Each is known +by a short name label (that unfortunately is not always descriptive). You set the provider in +_hbase-site.xml_ passing the WAL provder short-name as the value on the +_hbase.wal.provider_ property (Set the provider for _hbase:meta_ using the +_hbase.wal.meta_provider_ property). + + * _asyncfs_: The *default*. New since hbase-2.0.0 (HBASE-15536, HBASE-14790). This _AsyncFSWAL_ provider, as it identifies itself in RegionServer logs, is built on a new non-blocking dfsclient implementation. It is currently resident in the hbase codebase but intent is to move it back up into HDFS itself. WALs edits are written concurrently ("fan-out") style to each of the WAL-block replicas on each DataNode rather than in a chained pipeline as the default client does. Latencies should be better. See link:https://www.slideshare.net/HBaseCon/apache-hbase-improvements-and-practices-at-xiaomi[Apache HBase Improements and Practices at Xiaomi] at slide 14 onward for more detail on implementation. + * _filesystem_: This was the default in hbase-1.x releases. It is built on the blocking _DFSClient_ and writes to replicas in classic _DFSCLient_ pipeline mode. In logs it identifies as _FSHLog_ or _FSHLogProvider_. + * _multiwal_: This provider is made of multiple instances of _asyncfs_ or _filesystem_. See the next section for more on _multiwal_. + +Look for the lines like the below in the RegionServer log to see which provider is in place (The below shows the default AsyncFSWALProvider): + +---- +2018-04-02 13:22:37,983 INFO [regionserver/ve0528:16020] wal.WALFactory: Instantiating WALProvider of type class org.apache.hadoop.hbase.wal.AsyncFSWALProvider +---- + +NOTE: As the _AsyncFSWAL_ hacks into the internal of DFSClient implementation, it will be easily broken by upgrading the hadoop dependencies, even for a simple patch release. So if you do not specify the wal provider explicitly, we will first try to use the _asyncfs_, if failed, we will fall back to use _filesystem_. And notice that this may not always work, so if you still have problem starting HBase due to the problem of starting _AsyncFSWAL_, please specify _filesystem_ explicitly in the config file. + +NOTE: EC support has been added to hadoop-3.x, and it is incompatible with WAL as the EC output stream does not support hflush/hsync. In order to create a non-EC file in an EC directory, we need to use the new builder-based create API for _FileSystem_, but it is only introduced in hadoop-2.9+ and for HBase we still need to support hadoop-2.7.x. So please do not enable EC for the WAL directory until we find a way to deal with it. ==== MultiWAL With a single WAL per RegionServer, the RegionServer must write to the WAL serially, because HDFS files must be sequential. This causes the WAL to be a performance bottleneck. @@ -1090,28 +1183,28 @@ The general process for log splitting, as described in <>). + * _ASYNC_WAL_: Write the WAL asynchronously; do not hold-up clients waiting on the sync of their write to the filesystem but return immediately. The edit becomes visible. Meanwhile, in the background, the Mutation will be flushed to the WAL at some time later. This option currently may lose data. See HBASE-16689. + * _SYNC_WAL_: The *default*. Each edit is sync'd to HDFS before we return success to the client. + * _FSYNC_WAL_: Each edit is fsync'd to HDFS and the filesystem before we return success to the client. -To enable WAL compression, set the `hbase.regionserver.wal.enablecompression` property to `true`. -The default value for this property is `false`. -By default, WAL tag compression is turned on when WAL compression is enabled. -You can turn off WAL tag compression by setting the `hbase.regionserver.wal.tags.enablecompression` property to 'false'. - -A possible downside to WAL compression is that we lose more data from the last block in the WAL if it ill-terminated -mid-write. If entries in this last block were added with new dictionary entries but we failed persist the amended -dictionary because of an abrupt termination, a read of this last block may not be able to resolve last-written entries. +Do not confuse the _ASYNC_WAL_ option on a Mutation or Table with the _AsyncFSWAL_ writer; they are distinct +options unfortunately closely named [[wal.disable]] ==== Disabling the WAL @@ -1249,6 +1338,7 @@ There is no way to disable the WAL for only a specific table. WARNING: If you disable the WAL for anything other than bulk loads, your data is at risk. + [[regions.arch]] == Regions @@ -1605,20 +1695,20 @@ Also see <> for information about the HFile v2 format that was included [[hfile_tool]] ===== HFile Tool -To view a textualized version of HFile content, you can use the `org.apache.hadoop.hbase.io.hfile.HFile` tool. +To view a textualized version of HFile content, you can use the `hbase hfile` tool. Type the following to see usage: [source,bash] ---- -$ ${HBASE_HOME}/bin/hbase org.apache.hadoop.hbase.io.hfile.HFile +$ ${HBASE_HOME}/bin/hbase hfile ---- -For example, to view the content of the file _hdfs://10.81.47.41:8020/hbase/TEST/1418428042/DSMP/4759508618286845475_, type the following: +For example, to view the content of the file _hdfs://10.81.47.41:8020/hbase/default/TEST/1418428042/DSMP/4759508618286845475_, type the following: [source,bash] ---- - $ ${HBASE_HOME}/bin/hbase org.apache.hadoop.hbase.io.hfile.HFile -v -f hdfs://10.81.47.41:8020/hbase/TEST/1418428042/DSMP/4759508618286845475 + $ ${HBASE_HOME}/bin/hbase hfile -v -f hdfs://10.81.47.41:8020/hbase/default/TEST/1418428042/DSMP/4759508618286845475 ---- If you leave off the option -v to see just a summary on the HFile. -See usage for other things to do with the `HFile` tool. +See usage for other things to do with the `hfile` tool. [[store.file.dir]] ===== StoreFile Directory Structure on HDFS @@ -1773,9 +1863,20 @@ These parameters will be explained in context, and then will be given in a table ====== Being Stuck When the MemStore gets too large, it needs to flush its contents to a StoreFile. -However, a Store can only have `hbase.hstore.blockingStoreFiles` files, so the MemStore needs to wait for the number of StoreFiles to be reduced by one or more compactions. -However, if the MemStore grows larger than `hbase.hregion.memstore.flush.size`, it is not able to flush its contents to a StoreFile. -If the MemStore is too large and the number of StoreFiles is also too high, the algorithm is said to be "stuck". The compaction algorithm checks for this "stuck" situation and provides mechanisms to alleviate it. +However, Stores are configured with a bound on the number StoreFiles, +`hbase.hstore.blockingStoreFiles`, and if in excess, the MemStore flush must wait +until the StoreFile count is reduced by one or more compactions. If the MemStore +is too large and the number of StoreFiles is also too high, the algorithm is said +to be "stuck". By default we'll wait on compactions up to +`hbase.hstore.blockingWaitTime` milliseconds. If this period expires, we'll flush +anyways even though we are in excess of the +`hbase.hstore.blockingStoreFiles` count. + +Upping the `hbase.hstore.blockingStoreFiles` count will allow flushes to happen +but a Store with many StoreFiles in will likely have higher read latencies. Try to +figure why Compactions are not keeping up. Is it a write spurt that is bringing +about this situation or is a regular occurance and the cluster is under-provisioned +for the volume of writes? [[exploringcompaction.policy]] ====== The ExploringCompactionPolicy Algorithm @@ -2439,6 +2540,8 @@ See the above HDFS Architecture link for more information. [[arch.timelineconsistent.reads]] == Timeline-consistent High Available Reads +NOTE: The current <> does not work well with region replica, so this feature maybe broken. Use it with caution. + [[casestudies.timelineconsistent.intro]] === Introduction diff --git a/src/main/asciidoc/_chapters/backup_restore.adoc b/src/main/asciidoc/_chapters/backup_restore.adoc deleted file mode 100644 index c6dac85a77d..00000000000 --- a/src/main/asciidoc/_chapters/backup_restore.adoc +++ /dev/null @@ -1,912 +0,0 @@ -//// -/** - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -//// - -[[backuprestore]] -= Backup and Restore -:doctype: book -:numbered: -:toc: left -:icons: font -:experimental: - -[[br.overview]] -== Overview - -Backup and restore is a standard operation provided by many databases. An effective backup and restore -strategy helps ensure that users can recover data in case of unexpected failures. The HBase backup and restore -feature helps ensure that enterprises using HBase as a canonical data repository can recover from catastrophic -failures. Another important feature is the ability to restore the database to a particular -point-in-time, commonly referred to as a snapshot. - -The HBase backup and restore feature provides the ability to create full backups and incremental backups on -tables in an HBase cluster. The full backup is the foundation on which incremental backups are applied -to build iterative snapshots. Incremental backups can be run on a schedule to capture changes over time, -for example by using a Cron task. Incremental backups are more cost-effective than full backups because they only capture -the changes since the last backup and they also enable administrators to restore the database to any prior incremental backup. Furthermore, the -utilities also enable table-level data backup-and-recovery if you do not want to restore the entire dataset -of the backup. - -The backup and restore feature supplements the HBase Replication feature. While HBase replication is ideal for -creating "hot" copies of the data (where the replicated data is immediately available for query), the backup and -restore feature is ideal for creating "cold" copies of data (where a manual step must be taken to restore the system). -Previously, users only had the ability to create full backups via the ExportSnapshot functionality. The incremental -backup implementation is the novel improvement over the previous "art" provided by ExportSnapshot. - -[[br.terminology]] -== Terminology - -The backup and restore feature introduces new terminology which can be used to understand how control flows through the -system. - -* _A backup_: A logical unit of data and metadata which can restore a table to its state at a specific point in time. -* _Full backup_: a type of backup which wholly encapsulates the contents of the table at a point in time. -* _Incremental backup_: a type of backup which contains the changes in a table since a full backup. -* _Backup set_: A user-defined name which references one or more tables over which a backup can be executed. -* _Backup ID_: A unique names which identifies one backup from the rest, e.g. `backupId_1467823988425` - -[[br.planning]] -== Planning - -There are some common strategies which can be used to implement backup and restore in your environment. The following section -shows how these strategies are implemented and identifies potential tradeoffs with each. - -WARNING: This backup and restore tools has not been tested on Transparent Data Encryption (TDE) enabled HDFS clusters. -This is related to the open issue link:https://issues.apache.org/jira/browse/HBASE-16178[HBASE-16178]. - -[[br.intracluster.backup]] -=== Backup within a cluster - -This strategy stores the backups on the same cluster as where the backup was taken. This approach is only appropriate for testing -as it does not provide any additional safety on top of what the software itself already provides. - -.Intra-Cluster Backup -image::backup-intra-cluster.png[] - -[[br.dedicated.cluster.backup]] -=== Backup using a dedicated cluster - -This strategy provides greater fault tolerance and provides a path towards disaster recovery. In this setting, you will -store the backup on a separate HDFS cluster by supplying the backup destination cluster’s HDFS URL to the backup utility. -You should consider backing up to a different physical location, such as a different data center. - -Typically, a backup-dedicated HDFS cluster uses a more economical hardware profile to save money. - -.Dedicated HDFS Cluster Backup -image::backup-dedicated-cluster.png[] - -[[br.cloud.or.vendor.backup]] -=== Backup to the Cloud or a storage vendor appliance - -Another approach to safeguarding HBase incremental backups is to store the data on provisioned, secure servers that belong -to third-party vendors and that are located off-site. The vendor can be a public cloud provider or a storage vendor who uses -a Hadoop-compatible file system, such as S3 and other HDFS-compatible destinations. - -.Backup to Cloud or Vendor Storage Solutions -image::backup-cloud-appliance.png[] - -NOTE: The HBase backup utility does not support backup to multiple destinations. A workaround is to manually create copies -of the backup files from HDFS or S3. - -[[br.initial.setup]] -== First-time configuration steps - -This section contains the necessary configuration changes that must be made in order to use the backup and restore feature. -As this feature makes significant use of YARN's MapReduce framework to parallelize these I/O heavy operations, configuration -changes extend outside of just `hbase-site.xml`. - -=== Allow the "hbase" system user in YARN - -The YARN *container-executor.cfg* configuration file must have the following property setting: _allowed.system.users=hbase_. No spaces -are allowed in entries of this configuration file. - -WARNING: Skipping this step will result in runtime errors when executing the first backup tasks. - -*Example of a valid container-executor.cfg file for backup and restore:* - -[source] ----- -yarn.nodemanager.log-dirs=/var/log/hadoop/mapred -yarn.nodemanager.linux-container-executor.group=yarn -banned.users=hdfs,yarn,mapred,bin -allowed.system.users=hbase -min.user.id=500 ----- - -=== HBase specific changes - -Add the following properties to hbase-site.xml and restart HBase if it is already running. - -NOTE: The ",..." is an ellipsis meant to imply that this is a comma-separated list of values, not literal text which should be added to hbase-site.xml. - -[source] ----- - - hbase.backup.enable - true - - - hbase.master.logcleaner.plugins - org.apache.hadoop.hbase.backup.master.BackupLogCleaner,... - - - hbase.procedure.master.classes - org.apache.hadoop.hbase.backup.master.LogRollMasterProcedureManager,... - - - hbase.procedure.regionserver.classes - org.apache.hadoop.hbase.backup.regionserver.LogRollRegionServerProcedureManager,... - - - hbase.coprocessor.region.classes - org.apache.hadoop.hbase.backup.BackupObserver,... - - - hbase.master.hfilecleaner.plugins - org.apache.hadoop.hbase.backup.BackupHFileCleaner,... - ----- - -== Backup and Restore commands - -This covers the command-line utilities that administrators would run to create, restore, and merge backups. Tools to -inspect details on specific backup sessions is covered in the next section, <>. - -Run the command `hbase backup help ` to access the online help that provides basic information about a command -and its options. The below information is captured in this help message for each command. - -// hbase backup create - -[[br.creating.complete.backup]] -### Creating a Backup Image - -[NOTE] -==== -For HBase clusters also using Apache Phoenix: include the SQL system catalog tables in the backup. In the event that you -need to restore the HBase backup, access to the system catalog tables enable you to resume Phoenix interoperability with the -restored data. -==== - -The first step in running the backup and restore utilities is to perform a full backup and to store the data in a separate image -from the source. At a minimum, you must do this to get a baseline before you can rely on incremental backups. - -Run the following command as HBase superuser: - -[source] ----- -hbase backup create ----- - -After the command finishes running, the console prints a SUCCESS or FAILURE status message. The SUCCESS message includes a _backup_ ID. -The backup ID is the Unix time (also known as Epoch time) that the HBase master received the backup request from the client. - -[TIP] -==== -Record the backup ID that appears at the end of a successful backup. In case the source cluster fails and you need to recover the -dataset with a restore operation, having the backup ID readily available can save time. -==== - -[[br.create.positional.cli.arguments]] -#### Positional Command-Line Arguments - -_type_:: - The type of backup to execute: _full_ or _incremental_. As a reminder, an _incremental_ backup requires a _full_ backup to - already exist. - -_backup_path_:: - The _backup_path_ argument specifies the full filesystem URI of where to store the backup image. Valid prefixes are - are _hdfs:_, _webhdfs:_, _gpfs:_, and _s3fs:_. - -[[br.create.named.cli.arguments]] -#### Named Command-Line Arguments - -_-t _:: - A comma-separated list of tables to back up. If no tables are specified, all tables are backed up. No regular-expression or - wildcard support is present; all table names must be explicitly listed. See <> for more - information about peforming operations on collections of tables. Mutually exclusive with the _-s_ option; one of these - named options are required. - -_-s _:: - Identify tables to backup based on a backup set. See <> for the purpose and usage - of backup sets. Mutually exclusive with the _-t_ option. - -_-w _:: - (Optional) Specifies the number of parallel workers to copy data to backup destination. Backups are currently executed by MapReduce jobs - so this value corresponds to the number of Mappers that will be spawned by the job. - -_-b _:: - (Optional) Specifies the bandwidth of each worker in MB per second. - -_-d_:: - (Optional) Enables "DEBUG" mode which prints additional logging about the backup creation. - -_-q _:: - (Optional) Allows specification of the name of a YARN queue which the MapReduce job to create the backup should be executed in. This option - is useful to prevent backup tasks from stealing resources away from other MapReduce jobs of high importance. - -[[br.usage.examples]] -#### Example usage - -[source] ----- -$ hbase backup create full hdfs://host5:8020/data/backup -t SALES2,SALES3 -w 3 ----- - -This command creates a full backup image of two tables, SALES2 and SALES3, in the HDFS instance who NameNode is host5:8020 -in the path _/data/backup_. The _-w_ option specifies that no more than three parallel works complete the operation. - -// hbase backup restore - -[[br.restoring.backup]] -### Restoring a Backup Image - -Run the following command as an HBase superuser. You can only restore a backup on a running HBase cluster because the data must be -redistributed the RegionServers for the operation to complete successfully. - -[source] ----- -hbase restore ----- - -[[br.restore.positional.args]] -#### Positional Command-Line Arguments - -_backup_path_:: - The _backup_path_ argument specifies the full filesystem URI of where to store the backup image. Valid prefixes are - are _hdfs:_, _webhdfs:_, _gpfs:_, and _s3fs:_. - -_backup_id_:: - The backup ID that uniquely identifies the backup image to be restored. - - -[[br.restore.named.args]] -#### Named Command-Line Arguments - -_-t _:: - A comma-separated list of tables to restore. See <> for more - information about peforming operations on collections of tables. Mutually exclusive with the _-s_ option; one of these - named options are required. - -_-s _:: - Identify tables to backup based on a backup set. See <> for the purpose and usage - of backup sets. Mutually exclusive with the _-t_ option. - -_-q _:: - (Optional) Allows specification of the name of a YARN queue which the MapReduce job to create the backup should be executed in. This option - is useful to prevent backup tasks from stealing resources away from other MapReduce jobs of high importance. - -_-c_:: - (Optional) Perform a dry-run of the restore. The actions are checked, but not executed. - -_-m _:: - (Optional) A comma-separated list of tables to restore into. If this option is not provided, the original table name is used. When - this option is provided, there must be an equal number of entries provided in the `-t` option. - -_-o_:: - (Optional) Overwrites the target table for the restore if the table already exists. - - -[[br.restore.usage]] -#### Example of Usage - -[source] ----- -hbase backup restore /tmp/backup_incremental backupId_1467823988425 -t mytable1,mytable2 ----- - -This command restores two tables of an incremental backup image. In this example: -• `/tmp/backup_incremental` is the path to the directory containing the backup image. -• `backupId_1467823988425` is the backup ID. -• `mytable1` and `mytable2` are the names of tables in the backup image to be restored. - -// hbase backup merge - -[[br.merge.backup]] -### Merging Incremental Backup Images - -This command can be used to merge two or more incremental backup images into a single incremental -backup image. This can be used to consolidate multiple, small incremental backup images into a single -larger incremental backup image. This command could be used to merge hourly incremental backups -into a daily incremental backup image, or daily incremental backups into a weekly incremental backup. - -[source] ----- -$ hbase backup merge ----- - -[[br.merge.backup.positional.cli.arguments]] -#### Positional Command-Line Arguments - -_backup_ids_:: - A comma-separated list of incremental backup image IDs that are to be combined into a single image. - -[[br.merge.backup.named.cli.arguments]] -#### Named Command-Line Arguments - -None. - -[[br.merge.backup.example]] -#### Example usage - -[source] ----- -$ hbase backup merge backupId_1467823988425,backupId_1467827588425 ----- - -// hbase backup set - -[[br.using.backup.sets]] -### Using Backup Sets - -Backup sets can ease the administration of HBase data backups and restores by reducing the amount of repetitive input -of table names. You can group tables into a named backup set with the `hbase backup set add` command. You can then use -the -set option to invoke the name of a backup set in the `hbase backup create` or `hbase backup restore` rather than list -individually every table in the group. You can have multiple backup sets. - -NOTE: Note the differentiation between the `hbase backup set add` command and the _-set_ option. The `hbase backup set add` -command must be run before using the `-set` option in a different command because backup sets must be named and defined -before using backup sets as a shortcut. - -If you run the `hbase backup set add` command and specify a backup set name that does not yet exist on your system, a new set -is created. If you run the command with the name of an existing backup set name, then the tables that you specify are added -to the set. - -In this command, the backup set name is case-sensitive. - -NOTE: The metadata of backup sets are stored within HBase. If you do not have access to the original HBase cluster with the -backup set metadata, then you must specify individual table names to restore the data. - -To create a backup set, run the following command as the HBase superuser: - -[source] ----- -$ hbase backup set ----- - -[[br.set.subcommands]] -#### Backup Set Subcommands - -The following list details subcommands of the hbase backup set command. - -NOTE: You must enter one (and no more than one) of the following subcommands after hbase backup set to complete an operation. -Also, the backup set name is case-sensitive in the command-line utility. - -_add_:: - Adds table[s] to a backup set. Specify a _backup_set_name_ value after this argument to create a backup set. - -_remove_:: - Removes tables from the set. Specify the tables to remove in the tables argument. - -_list_:: - Lists all backup sets. - -_describe_:: - Displays a description of a backup set. The information includes whether the set has full - or incremental backups, start and end times of the backups, and a list of the tables in the set. This subcommand must precede - a valid value for the _backup_set_name_ value. - -_delete_:: - Deletes a backup set. Enter the value for the _backup_set_name_ option directly after the `hbase backup set delete` command. - -[[br.set.positional.cli.arguments]] -#### Positional Command-Line Arguments - -_backup_set_name_:: - Use to assign or invoke a backup set name. The backup set name must contain only printable characters and cannot have any spaces. - -_tables_:: - List of tables (or a single table) to include in the backup set. Enter the table names as a comma-separated list. If no tables - are specified, all tables are included in the set. - -TIP: Maintain a log or other record of the case-sensitive backup set names and the corresponding tables in each set on a separate -or remote cluster, backup strategy. This information can help you in case of failure on the primary cluster. - -[[br.set.usage]] -#### Example of Usage - -[source] ----- -$ hbase backup set add Q1Data TEAM3,TEAM_4 ----- - -Depending on the environment, this command results in _one_ of the following actions: - -* If the `Q1Data` backup set does not exist, a backup set containing tables `TEAM_3` and `TEAM_4` is created. -* If the `Q1Data` backup set exists already, the tables `TEAM_3` and `TEAM_4` are added to the `Q1Data` backup set. - -[[br.administration]] -## Administration of Backup Images - -The `hbase backup` command has several subcommands that help with administering backup images as they accumulate. Most production -environments require recurring backups, so it is necessary to have utilities to help manage the data of the backup repository. -Some subcommands enable you to find information that can help identify backups that are relevant in a search for particular data. -You can also delete backup images. - -The following list details each `hbase backup subcommand` that can help administer backups. Run the full command-subcommand line as -the HBase superuser. - -// hbase backup progress - -[[br.managing.backup.progress]] -### Managing Backup Progress - -You can monitor a running backup in another terminal session by running the _hbase backup progress_ command and specifying the backup ID as an argument. - -For example, run the following command as hbase superuser to view the progress of a backup - -[source] ----- -$ hbase backup progress ----- - -[[br.progress.positional.cli.arguments]] -#### Positional Command-Line Arguments - -_backup_id_:: - Specifies the backup that you want to monitor by seeing the progress information. The backupId is case-sensitive. - -[[br.progress.named.cli.arguments]] -#### Named Command-Line Arguments - -None. - -[[br.progress.example]] -#### Example usage - -[source] ----- -hbase backup progress backupId_1467823988425 ----- - -// hbase backup history - -[[br.managing.backup.history]] -### Managing Backup History - -This command displays a log of backup sessions. The information for each session includes backup ID, type (full or incremental), the tables -in the backup, status, and start and end time. Specify the number of backup sessions to display with the optional -n argument. - -[source] ----- -$ hbase backup history ----- - -[[br.history.positional.cli.arguments]] -#### Positional Command-Line Arguments - -_backup_id_:: - Specifies the backup that you want to monitor by seeing the progress information. The backupId is case-sensitive. - -[[br.history.named.cli.arguments]] -#### Named Command-Line Arguments - -_-n _:: - (Optional) The maximum number of backup records (Default: 10). - -_-p _:: - The full filesystem URI of where backup images are stored. - -_-s _:: - The name of the backup set to obtain history for. Mutually exclusive with the _-t_ option. - -_-t_ :: - The name of table to obtain history for. Mutually exclusive with the _-s_ option. - -[[br.history.backup.example]] -#### Example usage - -[source] ----- -$ hbase backup history -$ hbase backup history -n 20 -$ hbase backup history -t WebIndexRecords ----- - -// hbase backup describe - -[[br.describe.backup]] -### Describing a Backup Image - -This command can be used to obtain information about a specific backup image. - -[source] ----- -$ hbase backup describe ----- - -[[br.describe.backup.positional.cli.arguments]] -#### Positional Command-Line Arguments - -_backup_id_:: - The ID of the backup image to describe. - -[[br.describe.backup.named.cli.arguments]] -#### Named Command-Line Arguments - -None. - -[[br.describe.backup.example]] -#### Example usage - -[source] ----- -$ hbase backup describe backupId_1467823988425 ----- - -// hbase backup delete - -[[br.delete.backup]] -### Deleting a Backup Image - -This command can be used to delete a backup image which is no longer needed. - -[source] ----- -$ hbase backup delete ----- - -[[br.delete.backup.positional.cli.arguments]] -#### Positional Command-Line Arguments - -_backup_id_:: - The ID to the backup image which should be deleted. - -[[br.delete.backup.named.cli.arguments]] -#### Named Command-Line Arguments - -None. - -[[br.delete.backup.example]] -#### Example usage - -[source] ----- -$ hbase backup delete backupId_1467823988425 ----- - -// hbase backup repair - -[[br.repair.backup]] -### Backup Repair Command - -This command attempts to correct any inconsistencies in persisted backup metadata which exists as -the result of software errors or unhandled failure scenarios. While the backup implementation tries -to correct all errors on its own, this tool may be necessary in the cases where the system cannot -automatically recover on its own. - -[source] ----- -$ hbase backup repair ----- - -[[br.repair.backup.positional.cli.arguments]] -#### Positional Command-Line Arguments - -None. - -[[br.repair.backup.named.cli.arguments]] -### Named Command-Line Arguments - -None. - -[[br.repair.backup.example]] -#### Example usage - -[source] ----- -$ hbase backup repair ----- - -[[br.backup.configuration]] -## Configuration keys - -The backup and restore feature includes both required and optional configuration keys. - -### Required properties - -_hbase.backup.enable_: Controls whether or not the feature is enabled (Default: `false`). Set this value to `true`. - -_hbase.master.logcleaner.plugins_: A comma-separated list of classes invoked when cleaning logs in the HBase Master. Set -this value to `org.apache.hadoop.hbase.backup.master.BackupLogCleaner` or append it to the current value. - -_hbase.procedure.master.classes_: A comma-separated list of classes invoked with the Procedure framework in the Master. Set -this value to `org.apache.hadoop.hbase.backup.master.LogRollMasterProcedureManager` or append it to the current value. - -_hbase.procedure.regionserver.classes_: A comma-separated list of classes invoked with the Procedure framework in the RegionServer. -Set this value to `org.apache.hadoop.hbase.backup.regionserver.LogRollRegionServerProcedureManager` or append it to the current value. - -_hbase.coprocessor.region.classes_: A comma-separated list of RegionObservers deployed on tables. Set this value to -`org.apache.hadoop.hbase.backup.BackupObserver` or append it to the current value. - -_hbase.master.hfilecleaner.plugins_: A comma-separated list of HFileCleaners deployed on the Master. Set this value -to `org.apache.hadoop.hbase.backup.BackupHFileCleaner` or append it to the current value. - -### Optional properties - -_hbase.backup.system.ttl_: The time-to-live in seconds of data in the `hbase:backup` tables (default: forever). This property -is only relevant prior to the creation of the `hbase:backup` table. Use the `alter` command in the HBase shell to modify the TTL -when this table already exists. See the <> for more details on the impact of this -configuration property. - -_hbase.backup.attempts.max_: The number of attempts to perform when taking hbase table snapshots (default: 10). - -_hbase.backup.attempts.pause.ms_: The amount of time to wait between failed snapshot attempts in milliseconds (default: 10000). - -_hbase.backup.logroll.timeout.millis_: The amount of time (in milliseconds) to wait for RegionServers to execute a WAL rolling -in the Master's procedure framework (default: 30000). - -[[br.best.practices]] -## Best Practices - -### Formulate a restore strategy and test it. - -Before you rely on a backup and restore strategy for your production environment, identify how backups must be performed, -and more importantly, how restores must be performed. Test the plan to ensure that it is workable. -At a minimum, store backup data from a production cluster on a different cluster or server. To further safeguard the data, -use a backup location that is at a different physical location. - -If you have a unrecoverable loss of data on your primary production cluster as a result of computer system issues, you may -be able to restore the data from a different cluster or server at the same site. However, a disaster that destroys the whole -site renders locally stored backups useless. Consider storing the backup data and necessary resources (both computing capacity -and operator expertise) to restore the data at a site sufficiently remote from the production site. In the case of a catastrophe -at the whole primary site (fire, earthquake, etc.), the remote backup site can be very valuable. - -### Secure a full backup image first. - -As a baseline, you must complete a full backup of HBase data at least once before you can rely on incremental backups. The full -backup should be stored outside of the source cluster. To ensure complete dataset recovery, you must run the restore utility -with the option to restore baseline full backup. The full backup is the foundation of your dataset. Incremental backup data -is applied on top of the full backup during the restore operation to return you to the point in time when backup was last taken. - -### Define and use backup sets for groups of tables that are logical subsets of the entire dataset. - -You can group tables into an object called a backup set. A backup set can save time when you have a particular group of tables -that you expect to repeatedly back up or restore. - -When you create a backup set, you type table names to include in the group. The backup set includes not only groups of related -tables, but also retains the HBase backup metadata. Afterwards, you can invoke the backup set name to indicate what tables apply -to the command execution instead of entering all the table names individually. - -### Document the backup and restore strategy, and ideally log information about each backup. - -Document the whole process so that the knowledge base can transfer to new administrators after employee turnover. As an extra -safety precaution, also log the calendar date, time, and other relevant details about the data of each backup. This metadata -can potentially help locate a particular dataset in case of source cluster failure or primary site disaster. Maintain duplicate -copies of all documentation: one copy at the production cluster site and another at the backup location or wherever it can be -accessed by an administrator remotely from the production cluster. - -[[br.s3.backup.scenario]] -## Scenario: Safeguarding Application Datasets on Amazon S3 - -This scenario describes how a hypothetical retail business uses backups to safeguard application data and then restore the dataset -after failure. - -The HBase administration team uses backup sets to store data from a group of tables that have interrelated information for an -application called green. In this example, one table contains transaction records and the other contains customer details. The -two tables need to be backed up and be recoverable as a group. - -The admin team also wants to ensure daily backups occur automatically. - -.Tables Composing The Backup Set -image::backup-app-components.png[] - -The following is an outline of the steps and examples of commands that are used to backup the data for the _green_ application and -to recover the data later. All commands are run when logged in as HBase superuser. - -1. A backup set called _green_set_ is created as an alias for both the transactions table and the customer table. The backup set can -be used for all operations to avoid typing each table name. The backup set name is case-sensitive and should be formed with only -printable characters and without spaces. - -[source] ----- -$ hbase backup set add green_set transactions -$ hbase backup set add green_set customer ----- - -2. The first backup of green_set data must be a full backup. The following command example shows how credentials are passed to Amazon -S3 and specifies the file system with the s3a: prefix. - -[source] ----- -$ ACCESS_KEY=ABCDEFGHIJKLMNOPQRST -$ SECRET_KEY=123456789abcdefghijklmnopqrstuvwxyzABCD -$ sudo -u hbase hbase backup create full\ - s3a://$ACCESS_KEY:SECRET_KEY@prodhbasebackups/backups -s green_set ----- - -3. Incremental backups should be run according to a schedule that ensures essential data recovery in the event of a catastrophe. At -this retail company, the HBase admin team decides that automated daily backups secures the data sufficiently. The team decides that -they can implement this by modifying an existing Cron job that is defined in `/etc/crontab`. Consequently, IT modifies the Cron job -by adding the following line: - -[source] ----- -@daily hbase hbase backup create incremental s3a://$ACCESS_KEY:$SECRET_KEY@prodhbasebackups/backups -s green_set ----- - -4. A catastrophic IT incident disables the production cluster that the green application uses. An HBase system administrator of the -backup cluster must restore the _green_set_ dataset to the point in time closest to the recovery objective. - -NOTE: If the administrator of the backup HBase cluster has the backup ID with relevant details in accessible records, the following -search with the `hdfs dfs -ls` command and manually scanning the backup ID list can be bypassed. Consider continuously maintaining -and protecting a detailed log of backup IDs outside the production cluster in your environment. - -The HBase administrator runs the following command on the directory where backups are stored to print the list of successful backup -IDs on the console: - -`hdfs dfs -ls -t /prodhbasebackups/backups` - -5. The admin scans the list to see which backup was created at a date and time closest to the recovery objective. To do this, the -admin converts the calendar timestamp of the recovery point in time to Unix time because backup IDs are uniquely identified with -Unix time. The backup IDs are listed in reverse chronological order, meaning the most recent successful backup appears first. - -The admin notices that the following line in the command output corresponds with the _green_set_ backup that needs to be restored: - -`/prodhbasebackups/backups/backup_1467823988425` - -6. The admin restores green_set invoking the backup ID and the -overwrite option. The -overwrite option truncates all existing data -in the destination and populates the tables with data from the backup dataset. Without this flag, the backup data is appended to the -existing data in the destination. In this case, the admin decides to overwrite the data because it is corrupted. - -[source] ----- -$ sudo -u hbase hbase restore -s green_set \ - s3a://$ACCESS_KEY:$SECRET_KEY@prodhbasebackups/backups backup_1467823988425 \ -overwrite ----- - -[[br.data.security]] -## Security of Backup Data - -With this feature which makes copying data to remote locations, it's important to take a moment to clearly state the procedural -concerns that exist around data security. Like the HBase replication feature, backup and restore provides the constructs to automatically -copy data from within a corporate boundary to some system outside of that boundary. It is imperative when storing sensitive data that with backup and restore, much -less any feature which extracts data from HBase, the locations to which data is being sent has undergone a security audit to ensure -that only authenticated users are allowed to access that data. - -For example, with the above example of backing up data to S3, it is of the utmost importance that the proper permissions are assigned -to the S3 bucket to ensure that only a minimum set of authorized users are allowed to access this data. Because the data is no longer -being accessed via HBase, and its authentication and authorization controls, we must ensure that the filesystem storing that data is -providing a comparable level of security. This is a manual step which users *must* implement on their own. - -[[br.technical.details]] -## Technical Details of Incremental Backup and Restore - -HBase incremental backups enable more efficient capture of HBase table images than previous attempts at serial backup and restore -solutions, such as those that only used HBase Export and Import APIs. Incremental backups use Write Ahead Logs (WALs) to capture -the data changes since the previous backup was created. A WAL roll (create new WALs) is executed across all RegionServers to track -the WALs that need to be in the backup. - -After the incremental backup image is created, the source backup files usually are on same node as the data source. A process similar -to the DistCp (distributed copy) tool is used to move the source backup files to the target file systems. When a table restore operation -starts, a two-step process is initiated. First, the full backup is restored from the full backup image. Second, all WAL files from -incremental backups between the last full backup and the incremental backup being restored are converted to HFiles, which the HBase -Bulk Load utility automatically imports as restored data in the table. - -You can only restore on a live HBase cluster because the data must be redistributed to complete the restore operation successfully. - -[[br.filesystem.growth.warning]] -## A Warning on File System Growth - -As a reminder, incremental backups are implemented via retaining the write-ahead logs which HBase primarily uses for data durability. -Thus, to ensure that all data needing to be included in a backup is still available in the system, the HBase backup and restore feature -retains all write-ahead logs since the last backup until the next incremental backup is executed. - -Like HBase Snapshots, this can have an expectedly large impact on the HDFS usage of HBase for high volume tables. Take care in enabling -and using the backup and restore feature, specifically with a mind to removing backup sessions when they are not actively being used. - -The only automated, upper-bound on retained write-ahead logs for backup and restore is based on the TTL of the `hbase:backup` system table which, -as of the time this document is written, is infinite (backup table entries are never automatically deleted). This requires that administrators -perform backups on a schedule whose frequency is relative to the amount of available space on HDFS (e.g. less available HDFS space requires -more aggressive backup merges and deletions). As a reminder, the TTL can be altered on the `hbase:backup` table using the `alter` command -in the HBase shell. Modifying the configuration property `hbase.backup.system.ttl` in hbase-site.xml after the system table exists has no effect. - -[[br.backup.capacity.planning]] -## Capacity Planning - -When designing a distributed system deployment, it is critical that some basic mathmatical rigor is executed to ensure sufficient computational -capacity is available given the data and software requirements of the system. For this feature, the availability of network capacity is the largest -bottleneck when estimating the performance of some implementation of backup and restore. The second most costly function is the speed at which -data can be read/written. - -### Full Backups - -To estimate the duration of a full backup, we have to understand the general actions which are invoked: - -* Write-ahead log roll on each RegionServer: ones to tens of seconds per RegionServer in parallel. Relative to the load on each RegionServer. -* Take an HBase snapshot of the table(s): tens of seconds. Relative to the number of regions and files that comprise the table. -* Export the snapshot to the destination: see below. Relative to the size of the data and the network bandwidth to the destination. - -[[br.export.snapshot.cost]] -To approximate how long the final step will take, we have to make some assumptions on hardware. Be aware that these will *not* be accurate for your -system -- these are numbers that your or your administrator know for your system. Let's say the speed of reading data from HDFS on a single node is -capped at 80MB/s (across all Mappers that run on that host), a modern network interface controller (NIC) supports 10Gb/s, the top-of-rack switch can -handle 40Gb/s, and the WAN between your clusters is 10Gb/s. This means that you can only ship data to your remote at a speed of 1.25GB/s -- meaning -that 16 nodes (`1.25 * 1024 / 80 = 16`) participating in the ExportSnapshot should be able to fully saturate the link between clusters. With more -nodes in the cluster, we can still saturate the network but at a lesser impact on any one node which helps ensure local SLAs are made. If the size -of the snapshot is 10TB, this would full backup would take in the ballpark of 2.5 hours (`10 * 1024 / 1.25 / (60 * 60) = 2.23hrs`) - -As a general statement, it is very likely that the WAN bandwidth between your local cluster and the remote storage is the largest -bottleneck to the speed of a full backup. - -When the concern is restricting the computational impact of backups to a "production system", the above formulas can be reused with the optional -command-line arguments to `hbase backup create`: `-b`, `-w`, `-q`. The `-b` option defines the bandwidth at which each worker (Mapper) would -write data. The `-w` argument limits the number of workers that would be spawned in the DistCp job. The `-q` allows the user to specify a YARN -queue which can limit the specific nodes where the workers will be spawned -- this can quarantine the backup workers performing the copy to -a set of non-critical nodes. Relating the `-b` and `-w` options to our earlier equations: `-b` would be used to restrict each node from reading -data at the full 80MB/s and `-w` is used to limit the job from spawning 16 worker tasks. - -### Incremental Backup - -Like we did for full backups, we have to understand the incremental backup process to approximate its runtime and cost. - -* Identify new write-ahead logs since last full or incremental backup: negligible. Apriori knowledge from the backup system table(s). -* Read, filter, and write "minimized" HFiles equivalent to the WALs: dominated by the speed of writing data. Relative to write speed of HDFS. -* DistCp the HFiles to the destination: <>. - -For the second step, the dominating cost of this operation would be the re-writing the data (under the assumption that a majority of the -data in the WAL is preserved). In this case, we can assume an aggregate write speed of 30MB/s per node. Continuing our 16-node cluster example, -this would require approximately 15 minutes to perform this step for 50GB of data (50 * 1024 / 60 / 60 = 14.2). The amount of time to start the -DistCp MapReduce job would likely dominate the actual time taken to copy the data (50 / 1.25 = 40 seconds) and can be ignored. - -[[br.limitations]] -## Limitations of the Backup and Restore Utility - -*Serial backup operations* - -Backup operations cannot be run concurrently. An operation includes actions like create, delete, restore, and merge. Only one active backup session is supported. link:https://issues.apache.org/jira/browse/HBASE-16391[HBASE-16391] -will introduce multiple-backup sessions support. - -*No means to cancel backups* - -Both backup and restore operations cannot be canceled. (link:https://issues.apache.org/jira/browse/HBASE-15997[HBASE-15997], link:https://issues.apache.org/jira/browse/HBASE-15998[HBASE-15998]). -The workaround to cancel a backup would be to kill the client-side backup command (`control-C`), ensure all relevant MapReduce jobs have exited, and then -run the `hbase backup repair` command to ensure the system backup metadata is consistent. - -*Backups can only be saved to a single location* - -Copying backup information to multiple locations is an exercise left to the user. link:https://issues.apache.org/jira/browse/HBASE-15476[HBASE-15476] will -introduce the ability to specify multiple-backup destinations intrinsically. - -*HBase superuser access is required* - -Only an HBase superuser (e.g. hbase) is allowed to perform backup/restore, can pose a problem for shared HBase installations. Current mitigations would require -coordination with system administrators to build and deploy a backup and restore strategy (link:https://issues.apache.org/jira/browse/HBASE-14138[HBASE-14138]). - -*Backup restoration is an online operation* - -To perform a restore from a backup, it requires that the HBase cluster is online as a caveat of the current implementation (link:https://issues.apache.org/jira/browse/HBASE-16573[HBASE-16573]). - -*Some operations may fail and require re-run* - -The HBase backup feature is primarily client driven. While there is the standard HBase retry logic built into the HBase Connection, persistent errors in executing operations -may propagate back to the client (e.g. snapshot failure due to region splits). The backup implementation should be moved from client-side into the ProcedureV2 framework -in the future which would provide additional robustness around transient/retryable failures. The `hbase backup repair` command is meant to correct states which the system -cannot automatically detect and recover from. - -*Avoidance of declaration of public API* - -While the Java API to interact with this feature exists and its implementation is separated from an interface, insufficient rigor has been applied to determine if -it is exactly what we intend to ship to users. As such, it is marked as for a `Private` audience with the expectation that, as users begin to try the feature, there -will be modifications that would necessitate breaking compatibility (link:https://issues.apache.org/jira/browse/HBASE-17517[HBASE-17517]). - -*Lack of global metrics for backup and restore* - -Individual backup and restore operations contain metrics about the amount of work the operation included, but there is no centralized location (e.g. the Master UI) -which present information for consumption (link:https://issues.apache.org/jira/browse/HBASE-16565[HBASE-16565]). diff --git a/src/main/asciidoc/_chapters/community.adoc b/src/main/asciidoc/_chapters/community.adoc index d141dbf2e10..3a896cf2bd9 100644 --- a/src/main/asciidoc/_chapters/community.adoc +++ b/src/main/asciidoc/_chapters/community.adoc @@ -40,24 +40,6 @@ When the feature is ready for commit, 3 +1s from committers will get your featur See link:http://search-hadoop.com/m/asM982C5FkS1[HBase, mail # dev - Thoughts about large feature dev branches] -[[patchplusonepolicy]] -.Patch +1 Policy - -The below policy is something we put in place 09/2012. -It is a suggested policy rather than a hard requirement. -We want to try it first to see if it works before we cast it in stone. - -Apache HBase is made of link:https://issues.apache.org/jira/projects/HBASE?selectedItem=com.atlassian.jira.jira-projects-plugin:components-page[components]. -Components have one or more <>s. -See the 'Description' field on the link:https://issues.apache.org/jira/projects/HBASE?selectedItem=com.atlassian.jira.jira-projects-plugin:components-page[components] JIRA page for who the current owners are by component. - -Patches that fit within the scope of a single Apache HBase component require, at least, a +1 by one of the component's owners before commit. -If owners are absent -- busy or otherwise -- two +1s by non-owners will suffice. - -Patches that span components need at least two +1s before they can be committed, preferably +1s by owners of components touched by the x-component patch (TODO: This needs tightening up but I think fine for first pass). - -Any -1 on a patch by anyone vetoes a patch; it cannot be committed until the justification for the -1 is addressed. - [[hbase.fix.version.in.jira]] .How to set fix version in JIRA on issue resolve @@ -85,19 +67,37 @@ We also are currently in violation of this basic tenet -- replication at least k [[community.roles]] == Community Roles -[[owner]] -.Component Owner/Lieutenant +=== Release Managers -Component owners are listed in the description field on this Apache HBase JIRA link:https://issues.apache.org/jira/projects/HBASE?selectedItem=com.atlassian.jira.jira-projects-plugin:components-page[components] page. -The owners are listed in the 'Description' field rather than in the 'Component Lead' field because the latter only allows us list one individual whereas it is encouraged that components have multiple owners. +Each maintained release branch has a release manager, who volunteers to coordinate new features and bug fixes are backported to that release. +The release managers are link:https://hbase.apache.org/team-list.html[committers]. +If you would like your feature or bug fix to be included in a given release, communicate with that release manager. +If this list goes out of date or you can't reach the listed person, reach out to someone else on the list. -Owners or component lieutenants are volunteers who are (usually, but not necessarily) expert in their component domain and may have an agenda on how they think their Apache HBase component should evolve. +NOTE: End-of-life releases are not included in this list. -. Owners will try and review patches that land within their component's scope. -. If applicable, if an owner has an agenda, they will publish their goals or the design toward which they are driving their component +.Release Managers +[cols="1,1", options="header"] +|=== +| Release +| Release Manager -If you would like to be volunteer as a component owner, just write the dev list and we'll sign you up. -Owners do not need to be committers. +| 1.2 +| Sean Busbey + +| 1.3 +| Mikhail Antonov + +| 1.4 +| Andrew Purtell + +| 2.0 +| Michael Stack + +| 2.1 +| Duo Zhang + +|=== [[hbase.commit.msg.format]] == Commit Message format diff --git a/src/main/asciidoc/_chapters/compression.adoc b/src/main/asciidoc/_chapters/compression.adoc index 6fe0d769fe9..b2ff5ce6999 100644 --- a/src/main/asciidoc/_chapters/compression.adoc +++ b/src/main/asciidoc/_chapters/compression.adoc @@ -335,25 +335,18 @@ You do not need to re-create the table or copy data. If you are changing codecs, be sure the old codec is still available until all the old StoreFiles have been compacted. .Enabling Compression on a ColumnFamily of an Existing Table using HBaseShell -==== ---- - hbase> disable 'test' hbase> alter 'test', {NAME => 'cf', COMPRESSION => 'GZ'} hbase> enable 'test' ---- -==== .Creating a New Table with Compression On a ColumnFamily -==== ---- - hbase> create 'test2', { NAME => 'cf2', COMPRESSION => 'SNAPPY' } ---- -==== .Verifying a ColumnFamily's Compression Settings -==== ---- hbase> describe 'test' @@ -366,7 +359,6 @@ DESCRIPTION ENABLED LOCKCACHE => 'true'} 1 row(s) in 0.1070 seconds ---- -==== ==== Testing Compression Performance @@ -374,9 +366,7 @@ HBase includes a tool called LoadTestTool which provides mechanisms to test your You must specify either `-write` or `-update-read` as your first parameter, and if you do not specify another parameter, usage advice is printed for each option. .+LoadTestTool+ Usage -==== ---- - $ bin/hbase org.apache.hadoop.hbase.util.LoadTestTool -h usage: bin/hbase org.apache.hadoop.hbase.util.LoadTestTool Options: @@ -387,7 +377,7 @@ Options: LZ4] -data_block_encoding Encoding algorithm (e.g. prefix compression) to use for data blocks in the test column family, one - of [NONE, PREFIX, DIFF, FAST_DIFF, PREFIX_TREE]. + of [NONE, PREFIX, DIFF, FAST_DIFF, ROW_INDEX_V1]. -encryption Enables transparent encryption on the test table, one of [AES] -generator The class which generates load for the tool. Any @@ -429,16 +419,12 @@ Options: port numbers -zk_root name of parent znode in zookeeper ---- -==== .Example Usage of LoadTestTool -==== ---- - $ hbase org.apache.hadoop.hbase.util.LoadTestTool -write 1:10:100 -num_keys 1000000 -read 100:30 -num_tables 1 -data_block_encoding NONE -tn load_test_tool_NONE ---- -==== [[data.block.encoding.enable]] === Enable Data Block Encoding @@ -449,9 +435,7 @@ Disable the table before altering its DATA_BLOCK_ENCODING setting. Following is an example using HBase Shell: .Enable Data Block Encoding On a Table -==== ---- - hbase> disable 'test' hbase> alter 'test', { NAME => 'cf', DATA_BLOCK_ENCODING => 'FAST_DIFF' } Updating all regions with the new schema... @@ -462,12 +446,9 @@ Done. hbase> enable 'test' 0 row(s) in 0.1580 seconds ---- -==== .Verifying a ColumnFamily's Data Block Encoding -==== ---- - hbase> describe 'test' DESCRIPTION ENABLED 'test', {NAME => 'cf', DATA_BLOCK_ENCODING => 'FAST true @@ -478,7 +459,6 @@ DESCRIPTION ENABLED e', BLOCKCACHE => 'true'} 1 row(s) in 0.0650 seconds ---- -==== :numbered: diff --git a/src/main/asciidoc/_chapters/configuration.adoc b/src/main/asciidoc/_chapters/configuration.adoc index 66fe5ddf949..174aa80f2b0 100644 --- a/src/main/asciidoc/_chapters/configuration.adoc +++ b/src/main/asciidoc/_chapters/configuration.adoc @@ -29,7 +29,7 @@ This chapter expands upon the <> chapter to further explain configuration of Apache HBase. Please read this chapter carefully, especially the <> -to ensure that your HBase testing and deployment goes smoothly, and prevent data loss. +to ensure that your HBase testing and deployment goes smoothly. Familiarize yourself with <> as well. == Configuration Files @@ -92,24 +92,42 @@ This section lists required services and some required system configuration. [[java]] .Java -[cols="1,1,4", options="header"] + +The following table summarizes the recommendation of the HBase community wrt deploying on various Java versions. An entry of "yes" is meant to indicate a base level of testing and willingness to help diagnose and address issues you might run into. Similarly, an entry of "no" or "Not Supported" generally means that should you run into an issue the community is likely to ask you to change the Java environment before proceeding to help. In some cases, specific guidance on limitations (e.g. wether compiling / unit tests work, specific operational issues, etc) will also be noted. + +.Long Term Support JDKs are recommended +[TIP] +==== +HBase recommends downstream users rely on JDK releases that are marked as Long Term Supported (LTS) either from the OpenJDK project or vendors. As of March 2018 that means Java 8 is the only applicable version and that the next likely version to see testing will be Java 11 near Q3 2018. +==== + +.Java support by release line +[cols="1,1,1,1,1", options="header"] |=== |HBase Version |JDK 7 |JDK 8 +|JDK 9 +|JDK 10 |2.0 |link:http://search-hadoop.com/m/YGbbsPxZ723m3as[Not Supported] |yes +|link:https://issues.apache.org/jira/browse/HBASE-20264[Not Supported] +|link:https://issues.apache.org/jira/browse/HBASE-20264[Not Supported] |1.3 |yes |yes +|link:https://issues.apache.org/jira/browse/HBASE-20264[Not Supported] +|link:https://issues.apache.org/jira/browse/HBASE-20264[Not Supported] |1.2 |yes |yes +|link:https://issues.apache.org/jira/browse/HBASE-20264[Not Supported] +|link:https://issues.apache.org/jira/browse/HBASE-20264[Not Supported] |=== @@ -146,9 +164,9 @@ It is recommended to raise the ulimit to at least 10,000, but more likely 10,240 + For example, assuming that a schema had 3 ColumnFamilies per region with an average of 3 StoreFiles per ColumnFamily, and there are 100 regions per RegionServer, the JVM will open `3 * 3 * 100 = 900` file descriptors, not counting open JAR files, configuration files, and others. Opening a file does not take many resources, and the risk of allowing a user to open too many files is minimal. + -Another related setting is the number of processes a user is allowed to run at once. In Linux and Unix, the number of processes is set using the `ulimit -u` command. This should not be confused with the `nproc` command, which controls the number of CPUs available to a given user. Under load, a `ulimit -u` that is too low can cause OutOfMemoryError exceptions. See Jack Levin's major HDFS issues thread on the hbase-users mailing list, from 2011. +Another related setting is the number of processes a user is allowed to run at once. In Linux and Unix, the number of processes is set using the `ulimit -u` command. This should not be confused with the `nproc` command, which controls the number of CPUs available to a given user. Under load, a `ulimit -u` that is too low can cause OutOfMemoryError exceptions. + -Configuring the maximum number of file descriptors and processes for the user who is running the HBase process is an operating system configuration, rather than an HBase configuration. It is also important to be sure that the settings are changed for the user that actually runs HBase. To see which user started HBase, and that user's ulimit configuration, look at the first line of the HBase log for that instance. A useful read setting config on your hadoop cluster is Aaron Kimball's Configuration Parameters: What can you just ignore? +Configuring the maximum number of file descriptors and processes for the user who is running the HBase process is an operating system configuration, rather than an HBase configuration. It is also important to be sure that the settings are changed for the user that actually runs HBase. To see which user started HBase, and that user's ulimit configuration, look at the first line of the HBase log for that instance. + .`ulimit` Settings on Ubuntu ==== @@ -183,7 +201,8 @@ See link:https://wiki.apache.org/hadoop/Distributions%20and%20Commercial%20Suppo .Hadoop 2.x is recommended. [TIP] ==== -Hadoop 2.x is faster and includes features, such as short-circuit reads, which will help improve your HBase random read profile. +Hadoop 2.x is faster and includes features, such as short-circuit reads (see <>), +which will help improve your HBase random read profile. Hadoop 2.x also includes important bug fixes that will improve your overall HBase experience. HBase does not support running with earlier versions of Hadoop. See the table below for requirements specific to different HBase versions. @@ -211,7 +230,9 @@ Use the following legend to interpret this table: |Hadoop-2.8.2 | NT | NT | NT | NT | NT |Hadoop-2.8.3+ | NT | NT | NT | S | S |Hadoop-2.9.0 | X | X | X | X | X -|Hadoop-3.0.0 | NT | NT | NT | NT | NT +|Hadoop-2.9.1+ | NT | NT | NT | NT | NT +|Hadoop-3.0.x | X | X | X | X | X +|Hadoop-3.1.0 | X | X | X | X | X |=== .Hadoop Pre-2.6.1 and JDK 1.8 Kerberos @@ -232,27 +253,35 @@ HBase on top of an HDFS Encryption Zone. Failure to do so will result in cluster data loss. This patch is present in Apache Hadoop releases 2.6.1+. ==== -.Hadoop 2.7.x +.Hadoop 2.y.0 Releases [TIP] ==== -Hadoop version 2.7.0 is not tested or supported as the Hadoop PMC has explicitly labeled that release as not being stable. (reference the link:https://s.apache.org/hadoop-2.7.0-announcement[announcement of Apache Hadoop 2.7.0].) +Starting around the time of Hadoop version 2.7.0, the Hadoop PMC got into the habit of calling out new minor releases on their major version 2 release line as not stable / production ready. As such, HBase expressly advises downstream users to avoid running on top of these releases. Note that additionally the 2.8.1 release was given the same caveat by the Hadoop PMC. For reference, see the release announcements for link:https://s.apache.org/hadoop-2.7.0-announcement[Apache Hadoop 2.7.0], link:https://s.apache.org/hadoop-2.8.0-announcement[Apache Hadoop 2.8.0], link:https://s.apache.org/hadoop-2.8.1-announcement[Apache Hadoop 2.8.1], and link:https://s.apache.org/hadoop-2.9.0-announcement[Apache Hadoop 2.9.0]. ==== -.Hadoop 2.8.x +.Hadoop 3.0.x Releases [TIP] ==== -Hadoop version 2.8.0 and 2.8.1 are not tested or supported as the Hadoop PMC has explicitly labeled that releases as not being stable. (reference the link:https://s.apache.org/hadoop-2.8.0-announcement[announcement of Apache Hadoop 2.8.0] and link:https://s.apache.org/hadoop-2.8.1-announcement[announcement of Apache Hadoop 2.8.1].) +Hadoop distributions that include the Application Timeline Service feature may cause unexpected versions of HBase classes to be present in the application classpath. Users planning on running MapReduce applications with HBase should make sure that link:https://issues.apache.org/jira/browse/YARN-7190[YARN-7190] is present in their YARN service (currently fixed in 2.9.1+ and 3.1.0+). +==== + +.Hadoop 3.1.0 Release +[TIP] +==== +The Hadoop PMC called out the 3.1.0 release as not stable / production ready. As such, HBase expressly advises downstream users to avoid running on top of this release. For reference, see the link:https://s.apache.org/hadoop-3.1.0-announcement[release announcement for Hadoop 3.1.0]. ==== .Replace the Hadoop Bundled With HBase! [NOTE] ==== -Because HBase depends on Hadoop, it bundles an instance of the Hadoop jar under its _lib_ directory. -The bundled jar is ONLY for use in standalone mode. +Because HBase depends on Hadoop, it bundles Hadoop jars under its _lib_ directory. +The bundled jars are ONLY for use in standalone mode. In distributed mode, it is _critical_ that the version of Hadoop that is out on your cluster match what is under HBase. -Replace the hadoop jar found in the HBase lib directory with the hadoop jar you are running on your cluster to avoid version mismatch issues. -Make sure you replace the jar in HBase across your whole cluster. -Hadoop version mismatch issues have various manifestations but often all look like its hung. +Replace the hadoop jars found in the HBase lib directory with the equivalent hadoop jars from the version you are running +on your cluster to avoid version mismatch issues. +Make sure you replace the jars under HBase across your whole cluster. +Hadoop version mismatch issues have various manifestations. Check for mismatch if +HBase appears hung. ==== [[dfs.datanode.max.transfer.threads]] @@ -537,7 +566,6 @@ If you are configuring an IDE to run an HBase client, you should include the _co For Java applications using Maven, including the hbase-shaded-client module is the recommended dependency when connecting to a cluster: [source,xml] ---- - org.apache.hbase hbase-shaded-client diff --git a/src/main/asciidoc/_chapters/datamodel.adoc b/src/main/asciidoc/_chapters/datamodel.adoc index 3674566e4e6..ba4961a5a19 100644 --- a/src/main/asciidoc/_chapters/datamodel.adoc +++ b/src/main/asciidoc/_chapters/datamodel.adoc @@ -343,6 +343,7 @@ In particular: Below we describe how the version dimension in HBase currently works. See link:https://issues.apache.org/jira/browse/HBASE-2406[HBASE-2406] for discussion of HBase versions. link:https://www.ngdata.com/bending-time-in-hbase/[Bending time in HBase] makes for a good read on the version, or time, dimension in HBase. It has more detail on versioning than is provided here. + As of this writing, the limitation _Overwriting values at existing timestamps_ mentioned in the article no longer holds in HBase. This section is basically a synopsis of this article by Bruno Dumon. @@ -503,8 +504,42 @@ Otherwise, a delete marker with a timestamp in the future is kept until the majo NOTE: This behavior represents a fix for an unexpected change that was introduced in HBase 0.94, and was fixed in link:https://issues.apache.org/jira/browse/HBASE-10118[HBASE-10118]. The change has been backported to HBase 0.94 and newer branches. +[[new.version.behavior]] +=== Optional New Version and Delete behavior in HBase-2.0.0 + +In `hbase-2.0.0`, the operator can specify an alternate version and +delete treatment by setting the column descriptor property +`NEW_VERSION_BEHAVIOR` to true (To set a property on a column family +descriptor, you must first disable the table and then alter the +column family descriptor; see <> for an example +of editing an attribute on a column family descriptor). + +The 'new version behavior', undoes the limitations listed below +whereby a `Delete` ALWAYS overshadows a `Put` if at the same +location -- i.e. same row, column family, qualifier and timestamp +-- regardless of which arrived first. Version accounting is also +changed as deleted versions are considered toward total version count. +This is done to ensure results are not changed should a major +compaction intercede. See `HBASE-15968` and linked issues for +discussion. + +Running with this new configuration currently costs; we factor +the Cell MVCC on every compare so we burn more CPU. The slow +down will depend. In testing we've seen between 0% and 25% +degradation. + +If replicating, it is advised that you run with the new +serial replication feature (See `HBASE-9465`; the serial +replication feature did NOT make it into `hbase-2.0.0` but +should arrive in a subsequent hbase-2.x release) as now +the order in which Mutations arrive is a factor. + + === Current Limitations +The below limitations are addressed in hbase-2.0.0. See +the section above, <>. + ==== Deletes mask Puts Deletes mask puts, even puts that happened after the delete was entered. diff --git a/src/main/asciidoc/_chapters/developer.adoc b/src/main/asciidoc/_chapters/developer.adoc index 11ef4bacd2f..6d0a7d1a56b 100644 --- a/src/main/asciidoc/_chapters/developer.adoc +++ b/src/main/asciidoc/_chapters/developer.adoc @@ -773,15 +773,15 @@ To do this, log in to Apache's Nexus at link:https://repository.apache.org[repos Find your artifacts in the staging repository. Click on 'Staging Repositories' and look for a new one ending in "hbase" with a status of 'Open', select it. Use the tree view to expand the list of repository contents and inspect if the artifacts you expect are present. Check the POMs. As long as the staging repo is open you can re-upload if something is missing or built incorrectly. - ++ If something is seriously wrong and you would like to back out the upload, you can use the 'Drop' button to drop and delete the staging repository. Sometimes the upload fails in the middle. This is another reason you might have to 'Drop' the upload from the staging repository. - ++ If it checks out, close the repo using the 'Close' button. The repository must be closed before a public URL to it becomes available. It may take a few minutes for the repository to close. Once complete you'll see a public URL to the repository in the Nexus UI. You may also receive an email with the URL. Provide the URL to the temporary staging repository in the email that announces the release candidate. (Folks will need to add this repo URL to their local poms or to their local _settings.xml_ file to pull the published release candidate artifacts.) - ++ When the release vote concludes successfully, return here and click the 'Release' button to release the artifacts to central. The release process will automatically drop and delete the staging repository. - ++ .hbase-downstreamer [NOTE] ==== @@ -792,15 +792,18 @@ Make sure you are pulling from the repository when tests run and that you are no ==== See link:https://www.apache.org/dev/publishing-maven-artifacts.html[Publishing Maven Artifacts] for some pointers on this maven staging process. - ++ If the HBase version ends in `-SNAPSHOT`, the artifacts go elsewhere. They are put into the Apache snapshots repository directly and are immediately available. Making a SNAPSHOT release, this is what you want to happen. - -At this stage, you have two tarballs in your 'build output directory' and a set of artifacts in a staging area of the maven repository, in the 'closed' state. - ++ +At this stage, you have two tarballs in your 'build output directory' and a set of artifacts +in a staging area of the maven repository, in the 'closed' state. Next sign, fingerprint and then 'stage' your release candiate build output directory via svnpubsub by committing -your directory to link:https://dist.apache.org/repos/dist/dev/hbase/[The 'dev' distribution directory] (See comments on link:https://issues.apache.org/jira/browse/HBASE-10554[HBASE-10554 Please delete old releases from mirroring system] but in essence it is an svn checkout of https://dist.apache.org/repos/dist/dev/hbase -- releases are at https://dist.apache.org/repos/dist/release/hbase). In the _version directory_ run the following commands: +your directory to link:https://dist.apache.org/repos/dist/dev/hbase/[The dev distribution directory] +(See comments on link:https://issues.apache.org/jira/browse/HBASE-10554[HBASE-10554 Please delete old releases from mirroring system] +but in essence it is an svn checkout of link:https://dist.apache.org/repos/dist/dev/hbase[dev/hbase] -- releases are at +link:https://dist.apache.org/repos/dist/release/hbase[release/hbase]). In the _version directory_ run the following commands: [source,bourne] ---- @@ -867,6 +870,50 @@ See link:http://search-hadoop.com/m/DHED4dhFaU[HBase, mail # dev - On recent discussion clarifying ASF release policy]. for how we arrived at this process. +[[hbase.release.announcement]] +== Announcing Releases + +Once an RC has passed successfully and the needed artifacts have been staged for disribution, you'll need to let everyone know about our shiny new release. It's not a requirement, but to make things easier for release managers we have a template you can start with. Be sure you replace \_version_ and other markers with the relevant version numbers. You should manually verify all links before sending. + +[source,email] +---- +The HBase team is happy to announce the immediate availability of HBase _version_. + +Apache HBase™ is an open-source, distributed, versioned, non-relational database. +Apache HBase gives you low latency random access to billions of rows with +millions of columns atop non-specialized hardware. To learn more about HBase, +see https://hbase.apache.org/. + +HBase _version_ is the _nth_ minor release in the HBase _major_.x line, which aims to +improve the stability and reliability of HBase. This release includes roughly +XXX resolved issues not covered by previous _major_.x releases. + +Notable new features include: +- List text descriptions of features that fit on one line +- Including if JDK or Hadoop support versions changes +- If the "stable" pointer changes, call that out +- For those with obvious JIRA IDs, include them (HBASE-YYYYY) + +The full list of issues can be found in the included CHANGES.md and RELEASENOTES.md, +or via our issue tracker: + + https://s.apache.org/hbase-_version_-jira + +To download please follow the links and instructions on our website: + + https://hbase.apache.org/downloads.html + + +Question, comments, and problems are always welcome at: dev@hbase.apache.org. + +Thanks to all who contributed and made this release possible. + +Cheers, +The HBase Dev Team +---- + +You should sent this message to the following lists: dev@hbase.apache.org, user@hbase.apache.org, announce@apache.org. If you'd like a spot check before sending, feel free to ask via jira or the dev list. + [[documentation]] == Generating the HBase Reference Guide @@ -909,13 +956,21 @@ For any other module, for example `hbase-common`, the tests must be strict unit ==== Testing the HBase Shell The HBase shell and its tests are predominantly written in jruby. -In order to make these tests run as a part of the standard build, there is a single JUnit test, `TestShell`, that takes care of loading the jruby implemented tests and running them. + +In order to make these tests run as a part of the standard build, there are a few JUnit test classes that take care of loading the jruby implemented tests and running them. +The tests were split into separate classes to accomodate class level timeouts (see <> for specifics). You can run all of these tests from the top level with: [source,bourne] +---- + mvn clean test -Dtest=Test*Shell ---- - mvn clean test -Dtest=TestShell +If you have previously done a `mvn install`, then you can instruct maven to run only the tests in the hbase-shell module with: + +[source,bourne] +---- + mvn clean test -pl hbase-shell ---- Alternatively, you may limit the shell tests that run using the system variable `shell.test`. @@ -924,8 +979,7 @@ For example, the tests that cover the shell commands for altering tables are con [source,bourne] ---- - - mvn clean test -Dtest=TestShell -Dshell.test=/AdminAlterTableTest/ + mvn clean test -pl hbase-shell -Dshell.test=/AdminAlterTableTest/ ---- You may also use a link:http://docs.ruby-doc.com/docs/ProgrammingRuby/html/language.html#UJ[Ruby Regular Expression @@ -935,14 +989,13 @@ You can run all of the HBase admin related tests, including both the normal admi [source,bourne] ---- - mvn clean test -Dtest=TestShell -Dshell.test=/.*Admin.*Test/ + mvn clean test -pl hbase-shell -Dshell.test=/.*Admin.*Test/ ---- In the event of a test failure, you can see details by examining the XML version of the surefire report results [source,bourne] ---- - vim hbase-shell/target/surefire-reports/TEST-org.apache.hadoop.hbase.client.TestShell.xml ---- @@ -1462,9 +1515,8 @@ HBase ships with several ChaosMonkey policies, available in the [[chaos.monkey.properties]] ==== Configuring Individual ChaosMonkey Actions -Since HBase version 1.0.0 (link:https://issues.apache.org/jira/browse/HBASE-11348[HBASE-11348]), ChaosMonkey integration tests can be configured per test run. -Create a Java properties file in the HBase classpath and pass it to ChaosMonkey using +Create a Java properties file in the HBase CLASSPATH and pass it to ChaosMonkey using the `-monkeyProps` configuration flag. Configurable properties, along with their default values if applicable, are listed in the `org.apache.hadoop.hbase.chaos.factories.MonkeyConstants` class. For properties that have defaults, you can override them by including them @@ -1477,7 +1529,9 @@ The following example uses a properties file called <<TableSchema name="users"><ColumnSchema name="cf" /></TableSchema>' \ + -d '<?xml version="1.0" encoding="UTF-8"?><TableSchema name="users"><ColumnSchema name="cf" KEEP_DELETED_CELLS="true" /></TableSchema>' \ "http://example.com:8000/users/schema" |/_table_/schema |PUT -|Update an existing table with the provided schema fragment +|Create a new table, or replace an existing table's schema |curl -vi -X PUT \ -H "Accept: text/xml" \ -H "Content-Type: text/xml" \ - -d '<?xml version="1.0" encoding="UTF-8"?><TableSchema name="users"><ColumnSchema name="cf" KEEP_DELETED_CELLS="true" /></TableSchema>' \ + -d '<?xml version="1.0" encoding="UTF-8"?><TableSchema name="users"><ColumnSchema name="cf" /></TableSchema>' \ "http://example.com:8000/users/schema" |/_table_/schema @@ -851,23 +851,14 @@ println(Bytes.toString(value)) === Setting the Classpath To use Jython with HBase, your CLASSPATH must include HBase's classpath as well as -the Jython JARs required by your code. First, use the following command on a server -running the HBase RegionServer process, to get HBase's classpath. +the Jython JARs required by your code. + +Set the path to directory containing the `jython.jar` and each additional Jython-related JAR needed for +your project. Then export HBASE_CLASSPATH pointing to the $JYTHON_HOME env. variable. [source, bash] ---- -$ ps aux |grep regionserver| awk -F 'java.library.path=' {'print $2'} | awk {'print $1'} - -/usr/lib/hadoop/lib/native:/usr/lib/hbase/lib/native/Linux-amd64-64 ----- - -Set the `$CLASSPATH` environment variable to include the path you found in the previous -step, plus the path to `jython.jar` and each additional Jython-related JAR needed for -your project. - -[source, bash] ----- -$ export CLASSPATH=$CLASSPATH:/usr/lib/hadoop/lib/native:/usr/lib/hbase/lib/native/Linux-amd64-64:/path/to/jython.jar +$ export HBASE_CLASSPATH=/directory/jython.jar ---- Start a Jython shell with HBase and Hadoop JARs in the classpath: @@ -877,55 +868,52 @@ $ bin/hbase org.python.util.jython .Table Creation, Population, Get, and Delete with Jython ==== -The following Jython code example creates a table, populates it with data, fetches -the data, and deletes the table. +The following Jython code example checks for table, +if it exists, deletes it and then creates it. Then it +populates the table with data and fetches the data. [source,jython] ---- import java.lang -from org.apache.hadoop.hbase import HBaseConfiguration, HTableDescriptor, HColumnDescriptor, HConstants, TableName -from org.apache.hadoop.hbase.client import HBaseAdmin, HTable, Get -from org.apache.hadoop.hbase.io import Cell, RowResult +from org.apache.hadoop.hbase import HBaseConfiguration, HTableDescriptor, HColumnDescriptor, TableName +from org.apache.hadoop.hbase.client import Admin, Connection, ConnectionFactory, Get, Put, Result, Table +from org.apache.hadoop.conf import Configuration # First get a conf object. This will read in the configuration # that is out in your hbase-*.xml files such as location of the # hbase master node. -conf = HBaseConfiguration() +conf = HBaseConfiguration.create() +connection = ConnectionFactory.createConnection(conf) +admin = connection.getAdmin() -# Create a table named 'test' that has two column families, -# one named 'content, and the other 'anchor'. The colons -# are required for column family names. -tablename = TableName.valueOf("test") +# Create a table named 'test' that has a column family +# named 'content'. +tableName = TableName.valueOf("test") +table = connection.getTable(tableName) -desc = HTableDescriptor(tablename) -desc.addFamily(HColumnDescriptor("content:")) -desc.addFamily(HColumnDescriptor("anchor:")) -admin = HBaseAdmin(conf) +desc = HTableDescriptor(tableName) +desc.addFamily(HColumnDescriptor("content")) # Drop and recreate if it exists -if admin.tableExists(tablename): - admin.disableTable(tablename) - admin.deleteTable(tablename) -admin.createTable(desc) +if admin.tableExists(tableName): + admin.disableTable(tableName) + admin.deleteTable(tableName) -tables = admin.listTables() -table = HTable(conf, tablename) +admin.createTable(desc) # Add content to 'column:' on a row named 'row_x' row = 'row_x' -update = Get(row) -update.put('content:', 'some content') -table.commit(update) +put = Put(row) +put.addColumn("content", "qual", "some content") +table.put(put) # Now fetch the content just added, returns a byte[] -data_row = table.get(row, "content:") -data = java.lang.String(data_row.value, "UTF8") +get = Get(row) + +result = table.get(get) +data = java.lang.String(result.getValue("content", "qual"), "UTF8") print "The fetched row contains the value '%s'" % data - -# Delete the table. -admin.disableTable(desc.getName()) -admin.deleteTable(desc.getName()) ---- ==== @@ -935,24 +923,23 @@ This example scans a table and returns the results that match a given family qua [source, jython] ---- -# Print all rows that are members of a particular column family -# by passing a regex for family qualifier - import java.lang +from org.apache.hadoop.hbase import TableName, HBaseConfiguration +from org.apache.hadoop.hbase.client import Connection, ConnectionFactory, Result, ResultScanner, Table, Admin +from org.apache.hadoop.conf import Configuration +conf = HBaseConfiguration.create() +connection = ConnectionFactory.createConnection(conf) +admin = connection.getAdmin() +tableName = TableName.valueOf('wiki') +table = connection.getTable(tableName) -from org.apache.hadoop.hbase import HBaseConfiguration -from org.apache.hadoop.hbase.client import HTable - -conf = HBaseConfiguration() - -table = HTable(conf, "wiki") -col = "title:.*$" - -scanner = table.getScanner([col], "") +cf = "title" +attr = "attr" +scanner = table.getScanner(cf) while 1: result = scanner.next() if not result: - break - print java.lang.String(result.row), java.lang.String(result.get('title:').value) + break + print java.lang.String(result.row), java.lang.String(result.getValue(cf, attr)) ---- ==== diff --git a/src/main/asciidoc/_chapters/getting_started.adoc b/src/main/asciidoc/_chapters/getting_started.adoc index 1cdc0a2d70b..84ebcaa67b5 100644 --- a/src/main/asciidoc/_chapters/getting_started.adoc +++ b/src/main/asciidoc/_chapters/getting_started.adoc @@ -52,7 +52,7 @@ See <> for information about supported JDK versions. === Get Started with HBase .Procedure: Download, Configure, and Start HBase in Standalone Mode -. Choose a download site from this list of link:https://www.apache.org/dyn/closer.cgi/hbase/[Apache Download Mirrors]. +. Choose a download site from this list of link:https://www.apache.org/dyn/closer.lua/hbase/[Apache Download Mirrors]. Click on the suggested top link. This will take you to a mirror of _HBase Releases_. Click on the folder named _stable_ and then download the binary file that ends in _.tar.gz_ to your local filesystem. @@ -82,7 +82,7 @@ JAVA_HOME=/usr + . Edit _conf/hbase-site.xml_, which is the main HBase configuration file. - At this time, you only need to specify the directory on the local filesystem where HBase and ZooKeeper write data. + At this time, you need to specify the directory on the local filesystem where HBase and ZooKeeper write data and acknowledge some risks. By default, a new directory is created under /tmp. Many servers are configured to delete the contents of _/tmp_ upon reboot, so you should store the data elsewhere. The following configuration will store HBase's data in the _hbase_ directory, in the home directory of the user called `testuser`. @@ -102,6 +102,21 @@ JAVA_HOME=/usr hbase.zookeeper.property.dataDir /home/testuser/zookeeper + + hbase.unsafe.stream.capability.enforce + false + + Controls whether HBase will check for stream capabilities (hflush/hsync). + + Disable this if you intend to run on LocalFileSystem, denoted by a rootdir + with the 'file://' scheme, but be mindful of the NOTE below. + + WARNING: Setting this to false blinds you to potential data loss and + inconsistent system state in the event of process and/or node failures. If + HBase is complaining of an inability to use hsync or hflush it's most + likely not a false positive. + + ---- ==== @@ -111,7 +126,14 @@ HBase will do this for you. If you create the directory, HBase will attempt to do a migration, which is not what you want. + NOTE: The _hbase.rootdir_ in the above example points to a directory -in the _local filesystem_. The 'file:/' prefix is how we denote local filesystem. +in the _local filesystem_. The 'file://' prefix is how we denote local +filesystem. You should take the WARNING present in the configuration example +to heart. In standalone mode HBase makes use of the local filesystem abstraction +from the Apache Hadoop project. That abstraction doesn't provide the durability +promises that HBase needs to operate safely. This is fine for local development +and testing use cases where the cost of cluster failure is well contained. It is +not appropriate for production deployments; eventually you will lose data. + To home HBase on an existing instance of HDFS, set the _hbase.rootdir_ to point at a directory up on your instance: e.g. _hdfs://namenode.example.org:8020/hbase_. For more on this variant, see the section below on Standalone HBase over HDFS. @@ -163,7 +185,7 @@ hbase(main):001:0> create 'test', 'cf' . List Information About your Table + -Use the `list` command to +Use the `list` command to confirm your table exists + ---- hbase(main):002:0> list 'test' @@ -174,6 +196,22 @@ test => ["test"] ---- ++ +Now use the `describe` command to see details, including configuration defaults ++ +---- +hbase(main):003:0> describe 'test' +Table test is ENABLED +test +COLUMN FAMILIES DESCRIPTION +{NAME => 'cf', VERSIONS => '1', EVICT_BLOCKS_ON_CLOSE => 'false', NEW_VERSION_BEHAVIOR => 'false', KEEP_DELETED_CELLS => 'FALSE', CACHE_DATA_ON_WRITE => +'false', DATA_BLOCK_ENCODING => 'NONE', TTL => 'FOREVER', MIN_VERSIONS => '0', REPLICATION_SCOPE => '0', BLOOMFILTER => 'ROW', CACHE_INDEX_ON_WRITE => 'f +alse', IN_MEMORY => 'false', CACHE_BLOOMS_ON_WRITE => 'false', PREFETCH_BLOCKS_ON_OPEN => 'false', COMPRESSION => 'NONE', BLOCKCACHE => 'true', BLOCKSIZE + => '65536'} +1 row(s) +Took 0.9998 seconds +---- + . Put data into your table. + To put data into your table, use the `put` command. @@ -314,7 +352,7 @@ First, add the following property which directs HBase to run in distributed mode ---- + Next, change the `hbase.rootdir` from the local filesystem to the address of your HDFS instance, using the `hdfs:////` URI syntax. -In this example, HDFS is running on the localhost at port 8020. +In this example, HDFS is running on the localhost at port 8020. Be sure to either remove the entry for `hbase.unsafe.stream.capability.enforce` or set it to true. + [source,xml] ---- @@ -371,7 +409,7 @@ The following command starts 3 backup servers using ports 16002/16012, 16003/160 + ---- -$ ./bin/local-master-backup.sh 2 3 5 +$ ./bin/local-master-backup.sh start 2 3 5 ---- + To kill a backup master without killing the entire cluster, you need to find its process ID (PID). The PID is stored in a file with a name like _/tmp/hbase-USER-X-master.pid_. @@ -566,18 +604,14 @@ On each node of the cluster, run the `jps` command and verify that the correct p You may see additional Java processes running on your servers as well, if they are used for other purposes. + .`node-a` `jps` Output -==== ---- - $ jps 20355 Jps 20071 HQuorumPeer 20137 HMaster ---- -==== + .`node-b` `jps` Output -==== ---- $ jps 15930 HRegionServer @@ -585,17 +619,14 @@ $ jps 15838 HQuorumPeer 16010 HMaster ---- -==== + .`node-c` `jps` Output -==== ---- $ jps 13901 Jps 13639 HQuorumPeer 13737 HRegionServer ---- -==== + .ZooKeeper Process Name [NOTE] diff --git a/src/main/asciidoc/_chapters/hbase-default.adoc b/src/main/asciidoc/_chapters/hbase-default.adoc index 77986576a36..f809f28fcc5 100644 --- a/src/main/asciidoc/_chapters/hbase-default.adoc +++ b/src/main/asciidoc/_chapters/hbase-default.adoc @@ -150,7 +150,7 @@ A comma-separated list of BaseLogCleanerDelegate invoked by *`hbase.master.logcleaner.ttl`*:: + .Description -Maximum time a WAL can stay in the .oldlogdir directory, +Maximum time a WAL can stay in the oldWALs directory, after which it will be cleaned by a Master thread. + .Default diff --git a/src/main/asciidoc/_chapters/hbase_mob.adoc b/src/main/asciidoc/_chapters/hbase_mob.adoc index 97305292ffe..8048772e504 100644 --- a/src/main/asciidoc/_chapters/hbase_mob.adoc +++ b/src/main/asciidoc/_chapters/hbase_mob.adoc @@ -61,12 +61,10 @@ an object is considered to be a MOB. Only `IS_MOB` is required. If you do not specify the `MOB_THRESHOLD`, the default threshold value of 100 KB is used. .Configure a Column for MOB Using HBase Shell -==== ---- hbase> create 't1', {NAME => 'f1', IS_MOB => true, MOB_THRESHOLD => 102400} hbase> alter 't1', {NAME => 'f1', IS_MOB => true, MOB_THRESHOLD => 102400} ---- -==== .Configure a Column for MOB Using the Java API ==== @@ -91,7 +89,6 @@ weekly policy - compact MOB Files for one week into one large MOB file montly policy - compact MOB Files for one month into one large MOB File .Configure MOB compaction policy Using HBase Shell -==== ---- hbase> create 't1', {NAME => 'f1', IS_MOB => true, MOB_THRESHOLD => 102400, MOB_COMPACT_PARTITION_POLICY => 'daily'} hbase> create 't1', {NAME => 'f1', IS_MOB => true, MOB_THRESHOLD => 102400, MOB_COMPACT_PARTITION_POLICY => 'weekly'} @@ -101,7 +98,6 @@ hbase> alter 't1', {NAME => 'f1', IS_MOB => true, MOB_THRESHOLD => 102400, MOB_C hbase> alter 't1', {NAME => 'f1', IS_MOB => true, MOB_THRESHOLD => 102400, MOB_COMPACT_PARTITION_POLICY => 'weekly'} hbase> alter 't1', {NAME => 'f1', IS_MOB => true, MOB_THRESHOLD => 102400, MOB_COMPACT_PARTITION_POLICY => 'monthly'} ---- -==== === Configure MOB Compaction mergeable threshold diff --git a/src/main/asciidoc/_chapters/images b/src/main/asciidoc/_chapters/images index 1e0c6c1bac5..dc4cd20d1ba 120000 --- a/src/main/asciidoc/_chapters/images +++ b/src/main/asciidoc/_chapters/images @@ -1 +1 @@ -../../site/resources/images \ No newline at end of file +../../../site/resources/images/ \ No newline at end of file diff --git a/src/main/asciidoc/_chapters/ops_mgt.adoc b/src/main/asciidoc/_chapters/ops_mgt.adoc index c7362acd8a0..10508f490ea 100644 --- a/src/main/asciidoc/_chapters/ops_mgt.adoc +++ b/src/main/asciidoc/_chapters/ops_mgt.adoc @@ -68,8 +68,12 @@ Some commands take arguments. Pass no args or -h for usage. pe Run PerformanceEvaluation ltt Run LoadTestTool canary Run the Canary tool - regionsplitter Run the RegionSplitter tool version Print the version + backup Backup tables for recovery + restore Restore tables from existing backup image + regionsplitter Run RegionSplitter tool + rowcounter Run RowCounter tool + cellcounter Run CellCounter tool CLASSNAME Run the class named CLASSNAME ---- @@ -79,7 +83,7 @@ Others, such as `hbase shell` (<>), `hbase upgrade` (<>), and === Canary There is a Canary class can help users to canary-test the HBase cluster status, with every column-family for every regions or RegionServer's granularity. -To see the usage, use the `--help` parameter. +To see the usage, use the `-help` parameter. ---- $ ${HBASE_HOME}/bin/hbase canary -help @@ -108,6 +112,13 @@ Usage: hbase canary [opts] [table1 [table2]...] | [regionserver1 [regionserver2] -D= assigning or override the configuration params ---- +[NOTE] +The `Sink` class is instantiated using the `hbase.canary.sink.class` configuration property which +will also determine the used Monitor class. In the absence of this property RegionServerStdOutSink +will be used. You need to use the Sink according to the passed parameters to the _canary_ command. +As an example you have to set `hbase.canary.sink.class` property to +`org.apache.hadoop.hbase.tool.Canary$RegionStdOutSink` for using table parameters. + This tool will return non zero error codes to user for collaborating with other monitoring tools, such as Nagios. The error code definitions are: @@ -192,10 +203,10 @@ This daemon will stop itself and return non-zero error code if any error occurs, $ ${HBASE_HOME}/bin/hbase canary -daemon ---- -Run repeatedly with internal 5 seconds and will not stop itself even if errors occur in the test. +Run repeatedly with 5 second intervals and will not stop itself even if errors occur in the test. ---- -$ ${HBASE_HOME}/bin/hbase canary -daemon -interval 50000 -f false +$ ${HBASE_HOME}/bin/hbase canary -daemon -interval 5 -f false ---- ==== Force timeout if canary test stuck @@ -205,7 +216,7 @@ Because of this we provide a timeout option to kill the canary test and return a This run sets the timeout value to 60 seconds, the default value is 600 seconds. ---- -$ ${HBASE_HOME}/bin/hbase canary -t 600000 +$ ${HBASE_HOME}/bin/hbase canary -t 60000 ---- ==== Enable write sniffing in canary @@ -234,7 +245,7 @@ while returning normal exit code. To treat read / write failure as error, you ca with the `-treatFailureAsError` option. When enabled, read / write failure would result in error exit code. ---- -$ ${HBASE_HOME}/bin/hbase canary --treatFailureAsError +$ ${HBASE_HOME}/bin/hbase canary -treatFailureAsError ---- ==== Running Canary in a Kerberos-enabled Cluster @@ -266,7 +277,7 @@ This example shows each of the properties with valid values. /etc/hbase/conf/keytab.krb5 -property> + hbase.client.dns.interface default @@ -381,7 +392,7 @@ directory. You can get a textual dump of a WAL file content by doing the following: ---- - $ ./bin/hbase org.apache.hadoop.hbase.regionserver.wal.FSHLog --dump hdfs://example.org:8020/hbase/.logs/example.org,60020,1283516293161/10.10.21.10%3A60020.1283973724012 + $ ./bin/hbase org.apache.hadoop.hbase.regionserver.wal.FSHLog --dump hdfs://example.org:8020/hbase/WALs/example.org,60020,1283516293161/10.10.21.10%3A60020.1283973724012 ---- The return code will be non-zero if there are any issues with the file so you can test wholesomeness of file by redirecting `STDOUT` to `/dev/null` and testing the program return. @@ -389,7 +400,7 @@ The return code will be non-zero if there are any issues with the file so you ca Similarly you can force a split of a log file directory by doing: ---- - $ ./bin/hbase org.apache.hadoop.hbase.regionserver.wal.FSHLog --split hdfs://example.org:8020/hbase/.logs/example.org,60020,1283516293161/ + $ ./bin/hbase org.apache.hadoop.hbase.regionserver.wal.FSHLog --split hdfs://example.org:8020/hbase/WALs/example.org,60020,1283516293161/ ---- [[hlog_tool.prettyprint]] @@ -399,7 +410,7 @@ The `WALPrettyPrinter` is a tool with configurable options to print the contents You can invoke it via the HBase cli with the 'wal' command. ---- - $ ./bin/hbase wal hdfs://example.org:8020/hbase/.logs/example.org,60020,1283516293161/10.10.21.10%3A60020.1283973724012 + $ ./bin/hbase wal hdfs://example.org:8020/hbase/WALs/example.org,60020,1283516293161/10.10.21.10%3A60020.1283973724012 ---- .WAL Printing in older versions of HBase @@ -677,6 +688,7 @@ Assuming you're running HDFS with permissions enabled, those permissions will ne For more information about bulk-loading HFiles into HBase, see <>. +[[walplayer]] === WALPlayer WALPlayer is a utility to replay WAL files into HBase. @@ -701,25 +713,63 @@ $ bin/hbase org.apache.hadoop.hbase.mapreduce.WALPlayer /backuplogdir oldTable1, WALPlayer, by default, runs as a mapreduce job. To NOT run WALPlayer as a mapreduce job on your cluster, force it to run all in the local process by adding the flags `-Dmapreduce.jobtracker.address=local` on the command line. -[[rowcounter]] -=== RowCounter and CellCounter +[[walplayer.options]] +==== WALPlayer Options -link:https://hbase.apache.org/apidocs/org/apache/hadoop/hbase/mapreduce/RowCounter.html[RowCounter] is a mapreduce job to count all the rows of a table. -This is a good utility to use as a sanity check to ensure that HBase can read all the blocks of a table if there are any concerns of metadata inconsistency. -It will run the mapreduce all in a single process but it will run faster if you have a MapReduce cluster in place for it to exploit. It is also possible to limit -the time range of data to be scanned by using the `--starttime=[starttime]` and `--endtime=[endtime]` flags. +Running `WALPlayer` with no arguments prints brief usage information: ---- -$ bin/hbase org.apache.hadoop.hbase.mapreduce.RowCounter [ ...] +Usage: WALPlayer [options] [] +Replay all WAL files into HBase. + is a comma separated list of tables. +If no tables ("") are specified, all tables are imported. +(Be careful, hbase:meta entries will be imported in this case.) + +WAL entries can be mapped to new set of tables via . + is a comma separated list of target tables. +If specified, each table in must have a mapping. + +By default WALPlayer will load data directly into HBase. +To generate HFiles for a bulk data load instead, pass the following option: + -Dwal.bulk.output=/path/for/output + (Only one table can be specified, and no mapping is allowed!) +Time range options: + -Dwal.start.time=[date|ms] + -Dwal.end.time=[date|ms] + (The start and the end date of timerange. The dates can be expressed + in milliseconds since epoch or in yyyy-MM-dd'T'HH:mm:ss.SS format. + E.g. 1234567890120 or 2009-02-13T23:32:30.12) +Other options: + -Dmapreduce.job.name=jobName + Use the specified mapreduce job name for the wal player +For performance also consider the following options: + -Dmapreduce.map.speculative=false + -Dmapreduce.reduce.speculative=false +---- + +[[rowcounter]] +=== RowCounter + +link:https://hbase.apache.org/apidocs/org/apache/hadoop/hbase/mapreduce/RowCounter.html[RowCounter] is a mapreduce job to count all the rows of a table. +This is a good utility to use as a sanity check to ensure that HBase can read all the blocks of a table if there are any concerns of metadata inconsistency. +It will run the mapreduce all in a single process but it will run faster if you have a MapReduce cluster in place for it to exploit. +It is possible to limit the time range of data to be scanned by using the `--starttime=[starttime]` and `--endtime=[endtime]` flags. +The scanned data can be limited based on keys using the `--range=[startKey],[endKey][;[startKey],[endKey]...]` option. + +---- +$ bin/hbase rowcounter [options] [--starttime= --endtime=] [--range=[startKey],[endKey][;[startKey],[endKey]...]] [ ...] ---- RowCounter only counts one version per cell. -Note: caching for the input Scan is configured via `hbase.client.scanner.caching` in the job configuration. +For performance consider to use `-Dhbase.client.scanner.caching=100` and `-Dmapreduce.map.speculative=false` options. + +[[cellcounter]] +=== CellCounter HBase ships another diagnostic mapreduce job called link:https://hbase.apache.org/apidocs/org/apache/hadoop/hbase/mapreduce/CellCounter.html[CellCounter]. Like RowCounter, it gathers more fine-grained statistics about your table. -The statistics gathered by RowCounter are more fine-grained and include: +The statistics gathered by CellCounter are more fine-grained and include: * Total number of rows in the table. * Total number of CFs across all rows. @@ -730,12 +780,12 @@ The statistics gathered by RowCounter are more fine-grained and include: The program allows you to limit the scope of the run. Provide a row regex or prefix to limit the rows to analyze. -Specify a time range to scan the table by using the `--starttime=[starttime]` and `--endtime=[endtime]` flags. +Specify a time range to scan the table by using the `--starttime=` and `--endtime=` flags. Use `hbase.mapreduce.scan.column.family` to specify scanning a single column family. ---- -$ bin/hbase org.apache.hadoop.hbase.mapreduce.CellCounter [regex or prefix] +$ bin/hbase cellcounter [reportSeparator] [regex or prefix] [--starttime= --endtime=] ---- Note: just like RowCounter, caching for the input Scan is configured via `hbase.client.scanner.caching` in the job configuration. @@ -743,8 +793,7 @@ Note: just like RowCounter, caching for the input Scan is configured via `hbase. === mlockall It is possible to optionally pin your servers in physical memory making them less likely to be swapped out in oversubscribed environments by having the servers call link:http://linux.die.net/man/2/mlockall[mlockall] on startup. -See link:https://issues.apache.org/jira/browse/HBASE-4391[HBASE-4391 Add ability to - start RS as root and call mlockall] for how to build the optional library and have it run on startup. +See link:https://issues.apache.org/jira/browse/HBASE-4391[HBASE-4391 Add ability to start RS as root and call mlockall] for how to build the optional library and have it run on startup. [[compaction.tool]] === Offline Compaction Tool @@ -1024,13 +1073,10 @@ The script requires you to set some environment variables before running it. Examine the script and modify it to suit your needs. ._rolling-restart.sh_ General Usage -==== ---- - $ ./bin/rolling-restart.sh --help Usage: rolling-restart.sh [--config ] [--rs-only] [--master-only] [--graceful] [--maxthreads xx] ---- -==== Rolling Restart on RegionServers Only:: To perform a rolling restart on the RegionServers only, use the `--rs-only` option. @@ -2645,8 +2691,10 @@ full implications and have a sufficient background in managing HBase clusters. It was developed by Yahoo! and they run it at scale on their large grid cluster. See link:http://www.slideshare.net/HBaseCon/keynote-apache-hbase-at-yahoo-scale[HBase at Yahoo! Scale]. -RSGroups can be defined and managed with shell commands or corresponding Java -APIs. A server can be added to a group with hostname and port pair and tables +RSGroups are defined and managed with shell commands. The shell drives a +Coprocessor Endpoint whose API is marked private given this is an evolving +feature; the Coprocessor API is not for public consumption. +A server can be added to a group with hostname and port pair and tables can be moved to this group so that only regionservers in the same rsgroup can host the regions of the table. RegionServers and tables can only belong to one rsgroup at a time. By default, all tables and regionservers belong to the @@ -2781,6 +2829,48 @@ Viewing the Master log will give you insight on rsgroup operation. If it appears stuck, restart the Master process. +=== Remove RegionServer Grouping +Removing RegionServer Grouping feature from a cluster on which it was enabled involves +more steps in addition to removing the relevant properties from `hbase-site.xml`. This is +to clean the RegionServer grouping related meta data so that if the feature is re-enabled +in the future, the old meta data will not affect the functioning of the cluster. + +- Move all tables in non-default rsgroups to `default` regionserver group +[source,bash] +---- +#Reassigning table t1 from non default group - hbase shell +hbase(main):005:0> move_tables_rsgroup 'default',['t1'] +---- +- Move all regionservers in non-default rsgroups to `default` regionserver group +[source, bash] +---- +#Reassigning all the servers in the non-default rsgroup to default - hbase shell +hbase(main):008:0> move_servers_rsgroup 'default',['rs1.xxx.com:16206','rs2.xxx.com:16202','rs3.xxx.com:16204'] +---- +- Remove all non-default rsgroups. `default` rsgroup created implicitly doesn't have to be removed +[source,bash] +---- +#removing non default rsgroup - hbase shell +hbase(main):009:0> remove_rsgroup 'group2' +---- +- Remove the changes made in `hbase-site.xml` and restart the cluster +- Drop the table `hbase:rsgroup` from `hbase` +[source, bash] +---- +#Through hbase shell drop table hbase:rsgroup +hbase(main):001:0> disable 'hbase:rsgroup' +0 row(s) in 2.6270 seconds + +hbase(main):002:0> drop 'hbase:rsgroup' +0 row(s) in 1.2730 seconds +---- +- Remove znode `rsgroup` from the cluster ZooKeeper using zkCli.sh +[source, bash] +---- +#From ZK remove the node /hbase/rsgroup through zkCli.sh +rmr /hbase/rsgroup +---- + === ACL To enable ACL, add the following to your hbase-site.xml and restart your Master: @@ -2793,3 +2883,141 @@ To enable ACL, add the following to your hbase-site.xml and restart your Master: ---- + +[[normalizer]] +== Region Normalizer + +The Region Normalizer tries to make Regions all in a table about the same in size. +It does this by finding a rough average. Any region that is larger than twice this +size is split. Any region that is much smaller is merged into an adjacent region. +It is good to run the Normalizer on occasion on a down time after the cluster has +been running a while or say after a burst of activity such as a large delete. + +(The bulk of the below detail was copied wholesale from the blog by Romil Choksi at +link:https://community.hortonworks.com/articles/54987/hbase-region-normalizer.html[HBase Region Normalizer]) + +The Region Normalizer is feature available since HBase-1.2. It runs a set of +pre-calculated merge/split actions to resize regions that are either too +large or too small compared to the average region size for a given table. Region +Normalizer when invoked computes a normalization 'plan' for all of the tables in +HBase. System tables (such as hbase:meta, hbase:namespace, Phoenix system tables +etc) and user tables with normalization disabled are ignored while computing the +plan. For normalization enabled tables, normalization plan is carried out in +parallel across multiple tables. + +Normalizer can be enabled or disabled globally for the entire cluster using the +‘normalizer_switch’ command in the HBase shell. Normalization can also be +controlled on a per table basis, which is disabled by default when a table is +created. Normalization for a table can be enabled or disabled by setting the +NORMALIZATION_ENABLED table attribute to true or false. + +To check normalizer status and enable/disable normalizer + +[source,bash] +---- +hbase(main):001:0> normalizer_enabled +true +0 row(s) in 0.4870 seconds + +hbase(main):002:0> normalizer_switch false +true +0 row(s) in 0.0640 seconds + +hbase(main):003:0> normalizer_enabled +false +0 row(s) in 0.0120 seconds + +hbase(main):004:0> normalizer_switch true +false +0 row(s) in 0.0200 seconds + +hbase(main):005:0> normalizer_enabled +true +0 row(s) in 0.0090 seconds +---- + +When enabled, Normalizer is invoked in the background every 5 mins (by default), +which can be configured using `hbase.normalization.period` in `hbase-site.xml`. +Normalizer can also be invoked manually/programmatically at will using HBase shell’s +`normalize` command. HBase by default uses `SimpleRegionNormalizer`, but users can +design their own normalizer as long as they implement the RegionNormalizer Interface. +Details about the logic used by `SimpleRegionNormalizer` to compute its normalization +plan can be found link:https://hbase.apache.org/devapidocs/org/apache/hadoop/hbase/master/normalizer/SimpleRegionNormalizer.html[here]. + +The below example shows a normalization plan being computed for an user table, and +merge action being taken as a result of the normalization plan computed by SimpleRegionNormalizer. + +Consider an user table with some pre-split regions having 3 equally large regions +(about 100K rows) and 1 relatively small region (about 25K rows). Following is the +snippet from an hbase meta table scan showing each of the pre-split regions for +the user table. + +---- +table_p8ddpd6q5z,,1469494305548.68b9892220865cb6048 column=info:regioninfo, timestamp=1469494306375, value={ENCODED => 68b9892220865cb604809c950d1adf48, NAME => 'table_p8ddpd6q5z,,1469494305548.68b989222 09c950d1adf48. 0865cb604809c950d1adf48.', STARTKEY => '', ENDKEY => '1'} +.... +table_p8ddpd6q5z,1,1469494317178.867b77333bdc75a028 column=info:regioninfo, timestamp=1469494317848, value={ENCODED => 867b77333bdc75a028bb4c5e4b235f48, NAME => 'table_p8ddpd6q5z,1,1469494317178.867b7733 bb4c5e4b235f48. 3bdc75a028bb4c5e4b235f48.', STARTKEY => '1', ENDKEY => '3'} +.... +table_p8ddpd6q5z,3,1469494328323.98f019a753425e7977 column=info:regioninfo, timestamp=1469494328486, value={ENCODED => 98f019a753425e7977ab8636e32deeeb, NAME => 'table_p8ddpd6q5z,3,1469494328323.98f019a7 ab8636e32deeeb. 53425e7977ab8636e32deeeb.', STARTKEY => '3', ENDKEY => '7'} +.... +table_p8ddpd6q5z,7,1469494339662.94c64e748979ecbb16 column=info:regioninfo, timestamp=1469494339859, value={ENCODED => 94c64e748979ecbb166f6cc6550e25c6, NAME => 'table_p8ddpd6q5z,7,1469494339662.94c64e74 6f6cc6550e25c6. 8979ecbb166f6cc6550e25c6.', STARTKEY => '7', ENDKEY => '8'} +.... +table_p8ddpd6q5z,8,1469494339662.6d2b3f5fd1595ab8e7 column=info:regioninfo, timestamp=1469494339859, value={ENCODED => 6d2b3f5fd1595ab8e7c031876057b1ee, NAME => 'table_p8ddpd6q5z,8,1469494339662.6d2b3f5f c031876057b1ee. d1595ab8e7c031876057b1ee.', STARTKEY => '8', ENDKEY => ''} +---- +Invoking the normalizer using ‘normalize’ int the HBase shell, the below log snippet +from HMaster log shows the normalization plan computed as per the logic defined for +SimpleRegionNormalizer. Since the total region size (in MB) for the adjacent smallest +regions in the table is less than the average region size, the normalizer computes a +plan to merge these two regions. + +---- +2016-07-26 07:08:26,928 DEBUG [B.fifo.QRpcServer.handler=20,queue=2,port=20000] master.HMaster: Skipping normalization for table: hbase:namespace, as it's either system table or doesn't have auto +normalization turned on +2016-07-26 07:08:26,928 DEBUG [B.fifo.QRpcServer.handler=20,queue=2,port=20000] master.HMaster: Skipping normalization for table: hbase:backup, as it's either system table or doesn't have auto normalization turned on +2016-07-26 07:08:26,928 DEBUG [B.fifo.QRpcServer.handler=20,queue=2,port=20000] master.HMaster: Skipping normalization for table: hbase:meta, as it's either system table or doesn't have auto normalization turned on +2016-07-26 07:08:26,928 DEBUG [B.fifo.QRpcServer.handler=20,queue=2,port=20000] master.HMaster: Skipping normalization for table: table_h2osxu3wat, as it's either system table or doesn't have autonormalization turned on +2016-07-26 07:08:26,928 DEBUG [B.fifo.QRpcServer.handler=20,queue=2,port=20000] normalizer.SimpleRegionNormalizer: Computing normalization plan for table: table_p8ddpd6q5z, number of regions: 5 +2016-07-26 07:08:26,929 DEBUG [B.fifo.QRpcServer.handler=20,queue=2,port=20000] normalizer.SimpleRegionNormalizer: Table table_p8ddpd6q5z, total aggregated regions size: 12 +2016-07-26 07:08:26,929 DEBUG [B.fifo.QRpcServer.handler=20,queue=2,port=20000] normalizer.SimpleRegionNormalizer: Table table_p8ddpd6q5z, average region size: 2.4 +2016-07-26 07:08:26,929 INFO [B.fifo.QRpcServer.handler=20,queue=2,port=20000] normalizer.SimpleRegionNormalizer: Table table_p8ddpd6q5z, small region size: 0 plus its neighbor size: 0, less thanthe avg size 2.4, merging them +2016-07-26 07:08:26,971 INFO [B.fifo.QRpcServer.handler=20,queue=2,port=20000] normalizer.MergeNormalizationPlan: Executing merging normalization plan: MergeNormalizationPlan{firstRegion={ENCODED=> d51df2c58e9b525206b1325fd925a971, NAME => 'table_p8ddpd6q5z,,1469514755237.d51df2c58e9b525206b1325fd925a971.', STARTKEY => '', ENDKEY => '1'}, secondRegion={ENCODED => e69c6b25c7b9562d078d9ad3994f5330, NAME => 'table_p8ddpd6q5z,1,1469514767669.e69c6b25c7b9562d078d9ad3994f5330.', +STARTKEY => '1', ENDKEY => '3'}} +---- +Region normalizer as per it’s computed plan, merged the region with start key as ‘’ +and end key as ‘1’, with another region having start key as ‘1’ and end key as ‘3’. +Now, that these regions have been merged we see a single new region with start key +as ‘’ and end key as ‘3’ +---- +table_p8ddpd6q5z,,1469516907210.e06c9b83c4a252b130e column=info:mergeA, timestamp=1469516907431, +value=PBUF\x08\xA5\xD9\x9E\xAF\xE2*\x12\x1B\x0A\x07default\x12\x10table_p8ddpd6q5z\x1A\x00"\x011(\x000\x00 ea74d246741ba. 8\x00 +table_p8ddpd6q5z,,1469516907210.e06c9b83c4a252b130e column=info:mergeB, timestamp=1469516907431, +value=PBUF\x08\xB5\xBA\x9F\xAF\xE2*\x12\x1B\x0A\x07default\x12\x10table_p8ddpd6q5z\x1A\x011"\x013(\x000\x0 ea74d246741ba. 08\x00 +table_p8ddpd6q5z,,1469516907210.e06c9b83c4a252b130e column=info:regioninfo, timestamp=1469516907431, value={ENCODED => e06c9b83c4a252b130eea74d246741ba, NAME => 'table_p8ddpd6q5z,,1469516907210.e06c9b83c ea74d246741ba. 4a252b130eea74d246741ba.', STARTKEY => '', ENDKEY => '3'} +.... +table_p8ddpd6q5z,3,1469514778736.bf024670a847c0adff column=info:regioninfo, timestamp=1469514779417, value={ENCODED => bf024670a847c0adffb74b2e13408b32, NAME => 'table_p8ddpd6q5z,3,1469514778736.bf024670 b74b2e13408b32. a847c0adffb74b2e13408b32.' STARTKEY => '3', ENDKEY => '7'} +.... +table_p8ddpd6q5z,7,1469514790152.7c5a67bc755e649db2 column=info:regioninfo, timestamp=1469514790312, value={ENCODED => 7c5a67bc755e649db22f49af6270f1e1, NAME => 'table_p8ddpd6q5z,7,1469514790152.7c5a67bc 2f49af6270f1e1. 755e649db22f49af6270f1e1.', STARTKEY => '7', ENDKEY => '8'} +.... +table_p8ddpd6q5z,8,1469514790152.58e7503cda69f98f47 column=info:regioninfo, timestamp=1469514790312, value={ENCODED => 58e7503cda69f98f4755178e74288c3a, NAME => 'table_p8ddpd6q5z,8,1469514790152.58e7503c 55178e74288c3a. da69f98f4755178e74288c3a.', STARTKEY => '8', ENDKEY => ''} +---- + +A similar example can be seen for an user table with 3 smaller regions and 1 +relatively large region. For this example, we have an user table with 1 large region containing 100K rows, and 3 relatively smaller regions with about 33K rows each. As seen from the normalization plan, since the larger region is more than twice the average region size it ends being split into two regions – one with start key as ‘1’ and end key as ‘154717’ and the other region with start key as '154717' and end key as ‘3’ +---- +2016-07-26 07:39:45,636 DEBUG [B.fifo.QRpcServer.handler=7,queue=1,port=20000] master.HMaster: Skipping normalization for table: hbase:backup, as it's either system table or doesn't have auto normalization turned on +2016-07-26 07:39:45,636 DEBUG [B.fifo.QRpcServer.handler=7,queue=1,port=20000] normalizer.SimpleRegionNormalizer: Computing normalization plan for table: table_p8ddpd6q5z, number of regions: 4 +2016-07-26 07:39:45,636 DEBUG [B.fifo.QRpcServer.handler=7,queue=1,port=20000] normalizer.SimpleRegionNormalizer: Table table_p8ddpd6q5z, total aggregated regions size: 12 +2016-07-26 07:39:45,636 DEBUG [B.fifo.QRpcServer.handler=7,queue=1,port=20000] normalizer.SimpleRegionNormalizer: Table table_p8ddpd6q5z, average region size: 3.0 +2016-07-26 07:39:45,636 DEBUG [B.fifo.QRpcServer.handler=7,queue=1,port=20000] normalizer.SimpleRegionNormalizer: No normalization needed, regions look good for table: table_p8ddpd6q5z +2016-07-26 07:39:45,636 DEBUG [B.fifo.QRpcServer.handler=7,queue=1,port=20000] normalizer.SimpleRegionNormalizer: Computing normalization plan for table: table_h2osxu3wat, number of regions: 5 +2016-07-26 07:39:45,636 DEBUG [B.fifo.QRpcServer.handler=7,queue=1,port=20000] normalizer.SimpleRegionNormalizer: Table table_h2osxu3wat, total aggregated regions size: 7 +2016-07-26 07:39:45,636 DEBUG [B.fifo.QRpcServer.handler=7,queue=1,port=20000] normalizer.SimpleRegionNormalizer: Table table_h2osxu3wat, average region size: 1.4 +2016-07-26 07:39:45,636 INFO [B.fifo.QRpcServer.handler=7,queue=1,port=20000] normalizer.SimpleRegionNormalizer: Table table_h2osxu3wat, large region table_h2osxu3wat,1,1469515926544.27f2fdbb2b6612ea163eb6b40753c3db. has size 4, more than twice avg size, splitting +2016-07-26 07:39:45,640 INFO [B.fifo.QRpcServer.handler=7,queue=1,port=20000] normalizer.SplitNormalizationPlan: Executing splitting normalization plan: SplitNormalizationPlan{regionInfo={ENCODED => 27f2fdbb2b6612ea163eb6b40753c3db, NAME => 'table_h2osxu3wat,1,1469515926544.27f2fdbb2b6612ea163eb6b40753c3db.', STARTKEY => '1', ENDKEY => '3'}, splitPoint=null} +2016-07-26 07:39:45,656 DEBUG [B.fifo.QRpcServer.handler=7,queue=1,port=20000] master.HMaster: Skipping normalization for table: hbase:namespace, as it's either system table or doesn't have auto normalization turned on +2016-07-26 07:39:45,656 DEBUG [B.fifo.QRpcServer.handler=7,queue=1,port=20000] master.HMaster: Skipping normalization for table: hbase:meta, as it's either system table or doesn't +have auto normalization turned on …..…..…. +2016-07-26 07:39:46,246 DEBUG [AM.ZK.Worker-pool2-t278] master.RegionStates: Onlined 54de97dae764b864504704c1c8d3674a on hbase-test-rc-5.openstacklocal,16020,1469419333913 {ENCODED => 54de97dae764b864504704c1c8d3674a, NAME => 'table_h2osxu3wat,1,1469518785661.54de97dae764b864504704c1c8d3674a.', STARTKEY => '1', ENDKEY => '154717'} +2016-07-26 07:39:46,246 INFO [AM.ZK.Worker-pool2-t278] master.RegionStates: Transition {d6b5625df331cfec84dce4f1122c567f state=SPLITTING_NEW, ts=1469518786246, server=hbase-test-rc-5.openstacklocal,16020,1469419333913} to {d6b5625df331cfec84dce4f1122c567f state=OPEN, ts=1469518786246, +server=hbase-test-rc-5.openstacklocal,16020,1469419333913} +2016-07-26 07:39:46,246 DEBUG [AM.ZK.Worker-pool2-t278] master.RegionStates: Onlined d6b5625df331cfec84dce4f1122c567f on hbase-test-rc-5.openstacklocal,16020,1469419333913 {ENCODED => d6b5625df331cfec84dce4f1122c567f, NAME => 'table_h2osxu3wat,154717,1469518785661.d6b5625df331cfec84dce4f1122c567f.', STARTKEY => '154717', ENDKEY => '3'} +---- diff --git a/src/main/asciidoc/_chapters/performance.adoc b/src/main/asciidoc/_chapters/performance.adoc index c917646999e..866779ca785 100644 --- a/src/main/asciidoc/_chapters/performance.adoc +++ b/src/main/asciidoc/_chapters/performance.adoc @@ -188,11 +188,9 @@ It is useful for tuning the IO impact of prefetching versus the time before all To enable prefetching on a given column family, you can use HBase Shell or use the API. .Enable Prefetch Using HBase Shell -==== ---- hbase> create 'MyTable', { NAME => 'myCF', PREFETCH_BLOCKS_ON_OPEN => 'true' } ---- -==== .Enable Prefetch Using the API ==== diff --git a/src/main/asciidoc/_chapters/pv2.adoc b/src/main/asciidoc/_chapters/pv2.adoc new file mode 100644 index 00000000000..5ecad3fdc88 --- /dev/null +++ b/src/main/asciidoc/_chapters/pv2.adoc @@ -0,0 +1,163 @@ +//// +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +//// +[[pv2]] += Procedure Framework (Pv2): link:https://issues.apache.org/jira/browse/HBASE-12439[HBASE-12439] +:doctype: book +:numbered: +:toc: left +:icons: font +:experimental: + + +_Procedure v2 ...aims to provide a unified way to build...multi-step procedures with a rollback/roll-forward ability in case of failure (e.g. create/delete table) -- Matteo Bertozzi, the author of Pv2._ + +With Pv2 you can build and run state machines. It was built by Matteo to make distributed state transitions in HBase resilient in the face of process failures. Previous to Pv2, state transition handling was spread about the codebase with implementation varying by transition-type and context. Pv2 was inspired by link:https://accumulo.apache.org/1.8/accumulo_user_manual.html#_fault_tolerant_executor_fate[FATE], of Apache Accumulo. + + +Early Pv2 aspects have been shipping in HBase with a good while now but it has continued to evolve as it takes on more involved scenarios. What we have now is powerful but intricate in operation and incomplete, in need of cleanup and hardening. In this doc we have given overview on the system so you can make use of it (and help with its polishing). + +This system has the awkward name of Pv2 because HBase already had the notion of a Procedure used in snapshots (see hbase-server _org.apache.hadoop.hbase.procedure_ as opposed to hbase-procedure _org.apache.hadoop.hbase.procedure2_). Pv2 supercedes and is to replace Procedure. + +== Procedures + +A Procedure is a transform made on an HBase entity. Examples of HBase entities would be Regions and Tables. + +Procedures are run by a ProcedureExecutor instance. Procedure current state is kept in the ProcedureStore. + +The ProcedureExecutor has but a primitive view on what goes on inside a Procedure. From its PoV, Procedures are submitted and then the ProcedureExecutor keeps calling _#execute(Object)_ until the Procedure is done. Execute may be called multiple times in the case of failure or restart, so Procedure code must be idempotent yielding the same result each time it run. Procedure code can also implement _rollback_ so steps can be undone if failure. A call to _execute()_ can result in one of following possibilities: + +* _execute()_ returns +** _null_: indicates we are done. +** _this_: indicates there is more to do so, persist current procedure state and re-_execute()_. +** _Array_ of sub-procedures: indicates a set of procedures needed to be run to completion before we can proceed (after which we expect the framework to call our execute again). +* _execute()_ throws exception +** _suspend_: indicates execution of procedure is suspended and can be resumed due to some external event. The procedure state is persisted. +** _yield_: procedure is added back to scheduler. The procedure state is not persisted. +** _interrupted_: currently same as _yield_. +** Any _exception_ not listed above: Procedure _state_ is changed to _FAILED_ (after which we expect the framework will attempt rollback). + +The ProcedureExecutor stamps the frameworks notions of Procedure State into the Procedure itself; e.g. it marks Procedures as INITIALIZING on submit. It moves the state to RUNNABLE when it goes to execute. When done, a Procedure gets marked FAILED or SUCCESS depending. Here is the list of all states as of this writing: + +* *_INITIALIZING_* Procedure in construction, not yet added to the executor +* *_RUNNABLE_* Procedure added to the executor, and ready to be executed. +* *_WAITING_* The procedure is waiting on children (subprocedures) to be completed +* *_WAITING_TIMEOUT_* The procedure is waiting a timeout or an external event +* *_ROLLEDBACK_* The procedure failed and was rolledback. +* *_SUCCESS_* The procedure execution completed successfully. +* *_FAILED_* The procedure execution failed, may need to rollback. + +After each execute, the Procedure state is persisted to the ProcedureStore. Hooks are invoked on Procedures so they can preserve custom state. Post-fault, the ProcedureExecutor re-hydrates its pre-crash state by replaying the content of the ProcedureStore. This makes the Procedure Framework resilient against process failure. + +=== Implementation + +In implementation, Procedures tend to divide transforms into finer-grained tasks and while some of these work items are handed off to sub-procedures, +the bulk are done as processing _steps_ in-Procedure; each invocation of the execute is used to perform a single step, and then the Procedure relinquishes returning to the framework. The Procedure does its own tracking of where it is in the processing. + +What comprises a sub-task, or _step_ in the execution is up to the Procedure author but generally it is a small piece of work that cannot be further decomposed and that moves the processing forward toward its end state. Having procedures made of many small steps rather than a few large ones allows the Procedure framework give out insight on where we are in the processing. It also allows the framework be more fair in its execution. As stated per above, each step may be called multiple times (failure/restart) so steps must be implemented idempotent. + +It is easy to confuse the state that the Procedure itself is keeping with that of the Framework itself. Try to keep them distinct. + + +=== Rollback + +Rollback is called when the procedure or one of the sub-procedures has failed. The rollback step is supposed to cleanup the resources created during the execute() step. In case of failure and restart, rollback() may be called multiple times, so again the code must be idempotent. + +=== Metrics + +There are hooks for collecting metrics on submit of the procedure and on finish. + +* updateMetricsOnSubmit() +* updateMetricsOnFinish() + +Individual procedures can override these methods to collect procedure specific metrics. The default implementations of these methods try to get an object implementing an interface ProcedureMetrics which encapsulates following set of generic metrics: + +* SubmittedCount (Counter): Total number of procedure instances submitted of a type. +* Time (Histogram): Histogram of runtime for procedure instances. +* FailedCount (Counter): Total number of failed procedure instances. + +Individual procedures can implement this object and define these generic set of metrics. + +=== Baggage + +Procedures can carry baggage. One example is the _step_ the procedure last attained (see previous section); procedures persist the enum that marks where they are currently. Other examples might be the Region or Server name the Procedure is currently working. After each call to execute, the Procedure#serializeStateData is called. Procedures can persist whatever. + +=== Result/State and Queries + +(From Matteo’s https://issues.apache.org/jira/secure/attachment/12693273/Procedurev2Notification-Bus.pdf[ProcedureV2 and Notification Bus] doc) + +In the case of asynchronous operations, the result must be kept around until the client asks for it. Once we receive a “get” of the result we can schedule the delete of the record. For some operations the result may be “unnecessary” especially in case of failure (e.g. if the create table fail, we can query the operation result or we can just do a list table to see if it was created) so in some cases we can schedule the delete after a timeout. On the client side the operation will return a “Procedure ID”, this ID can be used to wait until the procedure is completed and get the result/exception. + + +[source] +---- +Admin.doOperation() { longprocId=master.doOperation(); master.waitCompletion(procId); } + +---- + +If the master goes down while performing the operation the backup master will pickup the half in­progress operation and complete it. The client will not notice the failure. + +== Subprocedures + +Subprocedures are _Procedure_ instances created and returned by _#execute(Object)_ method of a procedure instance (parent procedure). As subprocedures are of type _Procedure_, they can instantiate their own subprocedures. As its a recursive, procedure stack is maintained by the framework. The framework makes sure that the parent procedure does not proceed till all sub-procedures and their subprocedures in a procedure stack are successfully finished. + +== ProcedureExecutor + +_ProcedureExecutor_ uses _ProcedureStore_ and _ProcedureScheduler_ and executes procedures submitted to it. Some of the basic operations supported are: + +* _abort(procId)_: aborts specified procedure if its not finished +* _submit(Procedure)_: submits procedure for execution +* _retrieve:_ list of get methods to get _Procedure_ instances and results +* _register/ unregister_ listeners: for listening on Procedure related notifications + +When _ProcedureExecutor_ starts it loads procedure instances persisted in _ProcedureStore_ from previous run. All unfinished procedures are resumed from the last stored state. + +== Nonces + +You can pass the nonce that came in with the RPC to the Procedure on submit at the executor. This nonce will then be serialized along w/ the Procedure on persist. If a crash, on reload, the nonce will be put back into a map of nonces to pid in case a client tries to run same procedure for a second time (it will be rejected). See the base Procedure and how nonce is a base data member. + +== Wait/Wake/Suspend/Yield + +‘suspend’ means stop processing a procedure because we can make no more progress until a condition changes; i.e. we sent RPC and need to wait on response. The way this works is that a Procedure throws a suspend exception from down in its guts as a GOTO the end-of-the-current-processing step. Suspend also puts the Procedure back on the scheduler. Problematic is we do some accounting on our way out even on suspend making it so it can take time exiting (We have to update state in the WAL). + +RegionTransitionProcedure#reportTransition is called on receipt of a report from a RS. For Assign and Unassign, this event response from the server we sent an RPC wakes up suspended Assign/Unassigns. + +== Locking + +Procedure Locks are not about concurrency! They are about giving a Procedure read/write access to an HBase Entity such as a Table or Region so that is possible to shut out other Procedures from making modifications to an HBase Entity state while the current one is running. + +Locking is optional, up to the Procedure implementor but if an entity is being operated on by a Procedure, all transforms need to be done via Procedures using the same locking scheme else havoc. + +Two ProcedureExecutor Worker threads can actually end up both processing the same Procedure instance. If it happens, the threads are meant to be running different parts of the one Procedure -- changes that do not stamp on each other (This gets awkward around the procedure frameworks notion of ‘suspend’. More on this below). + +Locks optionally may be held for the life of a Procedure. For example, if moving a Region, you probably want to have exclusive access to the HBase Region until the Region completes (or fails). This is used in conjunction with {@link #holdLock(Object)}. If {@link #holdLock(Object)} returns true, the procedure executor will call acquireLock() once and thereafter not call {@link #releaseLock(Object)} until the Procedure is done (Normally, it calls release/acquire around each invocation of {@link #execute(Object)}. + +Locks also may live the life of a procedure; i.e. once an Assign Procedure starts, we do not want another procedure meddling w/ the region under assignment. Procedures that hold the lock for the life of the procedure set Procedure#holdLock to true. AssignProcedure does this as do Split and Move (If in the middle of a Region move, you do not want it Splitting). + +Locking can be for life of Procedure. + +Some locks have a hierarchy. For example, taking a region lock also takes (read) lock on its containing table and namespace to prevent another Procedure obtaining an exclusive lock on the hosting table (or namespace). + +== Procedure Types + +=== StateMachineProcedure + +One can consider each call to _#execute(Object)_ method as transitioning from one state to another in a state machine. Abstract class _StateMachineProcedure_ is wrapper around base _Procedure_ class which provides constructs for implementing a state machine as a _Procedure_. After each state transition current state is persisted so that, in case of crash/ restart, the state transition can be resumed from the previous state of a procedure before crash/ restart. Individual procedures need to define initial and terminus states and hooks _executeFromState()_ and _setNextState()_ are provided for state transitions. + +=== RemoteProcedureDispatcher + +A new RemoteProcedureDispatcher (+ subclass RSProcedureDispatcher) primitive takes care of running the Procedure-based Assignments ‘remote’ component. This dispatcher knows about ‘servers’. It does aggregation of assignments by time on a time/count basis so can send procedures in batches rather than one per RPC. Procedure status comes back on the back of the RegionServer heartbeat reporting online/offline regions (No more notifications via ZK). The response is passed to the AMv2 to ‘process’. It will check against the in-memory state. If there is a mismatch, it fences out the RegionServer on the assumption that something went wrong on the RS side. Timeouts trigger retries (Not Yet Implemented!). The Procedure machine ensures only one operation at a time on any one Region/Table using entity _locking_ and smarts about what is serial and what can be run concurrently (Locking was zk-based -- you’d put a znode in zk for a table -- but now has been converted to be procedure-based as part of this project). + +== References + +* Matteo had a slide deck on what it the Procedure Framework would look like and the problems it addresses initially link:https://issues.apache.org/jira/secure/attachment/12845124/ProcedureV2b.pdf[attached to the Pv2 issue.] +* link:https://issues.apache.org/jira/secure/attachment/12693273/Procedurev2Notification-Bus.pdf[A good doc by Matteo] on problem and how Pv2 addresses it w/ roadmap (from the Pv2 JIRA). We should go back to the roadmap to do the Notification Bus, convertion of log splitting to Pv2, etc. diff --git a/src/main/asciidoc/_chapters/schema_design.adoc b/src/main/asciidoc/_chapters/schema_design.adoc index 4cd7656ad62..b7a69363056 100644 --- a/src/main/asciidoc/_chapters/schema_design.adoc +++ b/src/main/asciidoc/_chapters/schema_design.adoc @@ -504,11 +504,9 @@ Deleted cells are still subject to TTL and there will never be more than "maximu A new "raw" scan options returns all deleted rows and the delete markers. .Change the Value of `KEEP_DELETED_CELLS` Using HBase Shell -==== ---- hbase> hbase> alter ‘t1′, NAME => ‘f1′, KEEP_DELETED_CELLS => true ---- -==== .Change the Value of `KEEP_DELETED_CELLS` Using the API ==== @@ -1148,16 +1146,41 @@ Detect regionserver failure as fast as reasonable. Set the following parameters: - `dfs.namenode.avoid.read.stale.datanode = true` - `dfs.namenode.avoid.write.stale.datanode = true` +[[shortcircuit.reads]] === Optimize on the Server Side for Low Latency +Skip the network for local blocks when the RegionServer goes to read from HDFS by exploiting HDFS's +link:https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/ShortCircuitLocalReads.html[Short-Circuit Local Reads] facility. +Note how setup must be done both at the datanode and on the dfsclient ends of the conneciton -- i.e. at the RegionServer +and how both ends need to have loaded the hadoop native `.so` library. +After configuring your hadoop setting _dfs.client.read.shortcircuit_ to _true_ and configuring +the _dfs.domain.socket.path_ path for the datanode and dfsclient to share and restarting, next configure +the regionserver/dfsclient side. -* Skip the network for local blocks. In `hbase-site.xml`, set the following parameters: +* In `hbase-site.xml`, set the following parameters: - `dfs.client.read.shortcircuit = true` -- `dfs.client.read.shortcircuit.buffer.size = 131072` (Important to avoid OOME) +- `dfs.client.read.shortcircuit.skip.checksum = true` so we don't double checksum (HBase does its own checksumming to save on i/os. See <> for more on this. +- `dfs.domain.socket.path` to match what was set for the datanodes. +- `dfs.client.read.shortcircuit.buffer.size = 131072` Important to avoid OOME -- hbase has a default it uses if unset, see `hbase.dfs.client.read.shortcircuit.buffer.size`; its default is 131072. * Ensure data locality. In `hbase-site.xml`, set `hbase.hstore.min.locality.to.skip.major.compact = 0.7` (Meaning that 0.7 \<= n \<= 1) * Make sure DataNodes have enough handlers for block transfers. In `hdfs-site.xml`, set the following parameters: - `dfs.datanode.max.xcievers >= 8192` - `dfs.datanode.handler.count =` number of spindles +Check the RegionServer logs after restart. You should only see complaint if misconfiguration. +Otherwise, shortcircuit read operates quietly in background. It does not provide metrics so +no optics on how effective it is but read latencies should show a marked improvement, especially if +good data locality, lots of random reads, and dataset is larger than available cache. + +Other advanced configurations that you might play with, especially if shortcircuit functionality +is complaining in the logs, include `dfs.client.read.shortcircuit.streams.cache.size` and +`dfs.client.socketcache.capacity`. Documentation is sparse on these options. You'll have to +read source code. + +For more on short-circuit reads, see Colin's old blog on rollout, +link:http://blog.cloudera.com/blog/2013/08/how-improved-short-circuit-local-reads-bring-better-performance-and-security-to-hadoop/[How Improved Short-Circuit Local Reads Bring Better Performance and Security to Hadoop]. +The link:https://issues.apache.org/jira/browse/HDFS-347[HDFS-347] issue also makes for an +interesting read showing the HDFS community at its best (caveat a few comments). + === JVM Tuning ==== Tune JVM GC for low collection latencies diff --git a/src/main/asciidoc/_chapters/security.adoc b/src/main/asciidoc/_chapters/security.adoc index ef7d6c46b5c..dae6c539523 100644 --- a/src/main/asciidoc/_chapters/security.adoc +++ b/src/main/asciidoc/_chapters/security.adoc @@ -662,6 +662,7 @@ You also need to enable the DataBlockEncoder for the column family, for encoding You can enable compression of each tag in the WAL, if WAL compression is also enabled, by setting the value of `hbase.regionserver.wal.tags.enablecompression` to `true` in _hbase-site.xml_. Tag compression uses dictionary encoding. +Coprocessors that run server-side on RegionServers can perform get and set operations on cell Tags. Tags are stripped out at the RPC layer before the read response is sent back, so clients do not see these tags. Tag compression is not supported when using WAL encryption. [[hbase.accesscontrol.configuration]] @@ -1086,7 +1087,6 @@ public static void revokeFromTable(final HBaseTestingUtility util, final String . Showing a User's Effective Permissions + .HBase Shell -==== ---- hbase> user_permission 'user' @@ -1094,7 +1094,6 @@ hbase> user_permission '.*' hbase> user_permission JAVA_REGEX ---- -==== .API ==== @@ -1234,11 +1233,9 @@ Refer to the official API for usage instructions. . Define the List of Visibility Labels + .HBase Shell -==== ---- hbase> add_labels [ 'admin', 'service', 'developer', 'test' ] ---- -==== + .Java API ==== @@ -1265,7 +1262,6 @@ public static void addLabels() throws Exception { . Associate Labels with Users + .HBase Shell -==== ---- hbase> set_auths 'service', [ 'service' ] ---- @@ -1281,7 +1277,6 @@ hbase> set_auths 'qa', [ 'test', 'developer' ] ---- hbase> set_auths '@qagroup', [ 'test' ] ---- -==== + .Java API ==== @@ -1305,7 +1300,6 @@ public void testSetAndGetUserAuths() throws Throwable { . Clear Labels From Users + .HBase Shell -==== ---- hbase> clear_auths 'service', [ 'service' ] ---- @@ -1321,7 +1315,6 @@ hbase> clear_auths 'qa', [ 'test', 'developer' ] ---- hbase> clear_auths '@qagroup', [ 'test', 'developer' ] ---- -==== + .Java API ==== @@ -1345,7 +1338,6 @@ The label is only applied when data is written. The label is associated with a given version of the cell. + .HBase Shell -==== ---- hbase> set_visibility 'user', 'admin|service|developer', { COLUMNS => 'i' } ---- @@ -1357,7 +1349,6 @@ hbase> set_visibility 'user', 'admin|service', { COLUMNS => 'pii' } ---- hbase> set_visibility 'user', 'test', { COLUMNS => [ 'i', 'pii' ], FILTER => "(PrefixFilter ('test'))" } ---- -==== + NOTE: HBase Shell support for applying labels or permissions to cells is for testing and verification support, and should not be employed for production use because it won't apply the labels to cells that don't exist yet. The correct way to apply cell level labels is to do so in the application code when storing the values. @@ -1408,12 +1399,10 @@ set as an additional filter. It will further filter your results, rather than giving you additional authorization. .HBase Shell -==== ---- hbase> get_auths 'myUser' hbase> scan 'table1', AUTHORIZATIONS => ['private'] ---- -==== .Java API ==== diff --git a/src/main/asciidoc/_chapters/shell.adoc b/src/main/asciidoc/_chapters/shell.adoc index 13b8dd1b808..5612e1dab46 100644 --- a/src/main/asciidoc/_chapters/shell.adoc +++ b/src/main/asciidoc/_chapters/shell.adoc @@ -145,7 +145,6 @@ For instance, if your script creates a table, but returns a non-zero exit value, You can enter HBase Shell commands into a text file, one command per line, and pass that file to the HBase Shell. .Example Command File -==== ---- create 'test', 'cf' list 'test' @@ -158,7 +157,6 @@ get 'test', 'row1' disable 'test' enable 'test' ---- -==== .Directing HBase Shell to Execute the Commands ==== @@ -227,7 +225,7 @@ The table reference can be used to perform data read write operations such as pu For example, previously you would always specify a table name: ---- -hbase(main):000:0> create ‘t’, ‘f’ +hbase(main):000:0> create 't', 'f' 0 row(s) in 1.0970 seconds hbase(main):001:0> put 't', 'rold', 'f', 'v' 0 row(s) in 0.0080 seconds @@ -291,7 +289,7 @@ hbase(main):012:0> tab = get_table 't' 0 row(s) in 0.0010 seconds => Hbase::Table - t -hbase(main):013:0> tab.put ‘r1’ ,’f’, ‘v’ +hbase(main):013:0> tab.put 'r1' ,'f', 'v' 0 row(s) in 0.0100 seconds hbase(main):014:0> tab.scan ROW COLUMN+CELL @@ -305,7 +303,7 @@ You can then use jruby to script table operations based on these names. The list_snapshots command also acts similarly. ---- -hbase(main):016 > tables = list(‘t.*’) +hbase(main):016 > tables = list('t.*') TABLE t 1 row(s) in 0.1040 seconds diff --git a/src/main/asciidoc/_chapters/tracing.adoc b/src/main/asciidoc/_chapters/tracing.adoc index 8bd1962bd5c..7305aa8ea02 100644 --- a/src/main/asciidoc/_chapters/tracing.adoc +++ b/src/main/asciidoc/_chapters/tracing.adoc @@ -30,8 +30,10 @@ :icons: font :experimental: -link:https://issues.apache.org/jira/browse/HBASE-6449[HBASE-6449] added support for tracing requests through HBase, using the open source tracing library, link:https://htrace.incubator.apache.org/[HTrace]. -Setting up tracing is quite simple, however it currently requires some very minor changes to your client code (it would not be very difficult to remove this requirement). +HBase includes facilities for tracing requests using the open source tracing library, link:https://htrace.incubator.apache.org/[Apache HTrace]. +Setting up tracing is quite simple, however it currently requires some very minor changes to your client code (this requirement may be removed in the future). + +Support for this feature using HTrace 3 in HBase was added in link:https://issues.apache.org/jira/browse/HBASE-6449[HBASE-6449]. Starting with HBase 2.0, there was a non-compatible update to HTrace 4 via link:https://issues.apache.org/jira/browse/HBASE-18601[HBASE-18601]. The examples provided in this section will be using HTrace 4 package names, syntax, and conventions. For older examples, please consult previous versions of this guide. [[tracing.spanreceivers]] === SpanReceivers diff --git a/src/main/asciidoc/_chapters/troubleshooting.adoc b/src/main/asciidoc/_chapters/troubleshooting.adoc index eb62b338c11..0340105a89f 100644 --- a/src/main/asciidoc/_chapters/troubleshooting.adoc +++ b/src/main/asciidoc/_chapters/troubleshooting.adoc @@ -102,9 +102,9 @@ To disable, set the logging level back to `INFO` level. === JVM Garbage Collection Logs [NOTE] ----- +==== All example Garbage Collection logs in this section are based on Java 8 output. The introduction of Unified Logging in Java 9 and newer will result in very different looking logs. ----- +==== HBase is memory intensive, and using the default GC you can see long pauses in all threads including the _Juliet Pause_ aka "GC of Death". To help debug this or confirm this is happening GC logging can be turned on in the Java virtual machine. @@ -806,10 +806,12 @@ The HDFS directory structure of HBase tables in the cluster is... ---- /hbase - / (Tables in the cluster) - / (Regions for the table) - / (ColumnFamilies for the Region for the table) - / (StoreFiles for the ColumnFamily for the Regions for the table) + /data + / (Namespaces in the cluster) + /
(Tables in the cluster) + / (Regions for the table) + / (ColumnFamilies for the Region for the table) + / (StoreFiles for the ColumnFamily for the Regions for the table) ---- The HDFS directory structure of HBase WAL is.. @@ -817,7 +819,7 @@ The HDFS directory structure of HBase WAL is.. ---- /hbase - /.logs + /WALs / (RegionServers) / (WAL files for the RegionServer) ---- @@ -827,7 +829,7 @@ See the link:https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hd [[trouble.namenode.0size.hlogs]] ==== Zero size WALs with data in them -Problem: when getting a listing of all the files in a RegionServer's _.logs_ directory, one file has a size of 0 but it contains data. +Problem: when getting a listing of all the files in a RegionServer's _WALs_ directory, one file has a size of 0 but it contains data. Answer: It's an HDFS quirk. A file that's currently being written to will appear to have a size of 0 but once it's closed it will show its true size @@ -941,6 +943,96 @@ java.lang.UnsatisfiedLinkError: no gplcompression in java.library.path \... then there is a path issue with the compression libraries. See the Configuration section on link:[LZO compression configuration]. +[[trouble.rs.startup.hsync]] +==== RegionServer aborts due to lack of hsync for filesystem + +In order to provide data durability for writes to the cluster HBase relies on the ability to durably save state in a write ahead log. When using a version of Apache Hadoop Common's filesystem API that supports checking on the availability of needed calls, HBase will proactively abort the cluster if it finds it can't operate safely. + +For RegionServer roles, the failure will show up in logs like this: + +---- +2018-04-05 11:36:22,785 ERROR [regionserver/192.168.1.123:16020] wal.AsyncFSWALProvider: The RegionServer async write ahead log provider relies on the ability to call hflush and hsync for proper operation during component failures, but the current FileSystem does not support doing so. Please check the config value of 'hbase.wal.dir' and ensure it points to a FileSystem mount that has suitable capabilities for output streams. +2018-04-05 11:36:22,799 ERROR [regionserver/192.168.1.123:16020] regionserver.HRegionServer: ***** ABORTING region server 192.168.1.123,16020,1522946074234: Unhandled: cannot get log writer ***** +java.io.IOException: cannot get log writer + at org.apache.hadoop.hbase.wal.AsyncFSWALProvider.createAsyncWriter(AsyncFSWALProvider.java:112) + at org.apache.hadoop.hbase.regionserver.wal.AsyncFSWAL.createWriterInstance(AsyncFSWAL.java:612) + at org.apache.hadoop.hbase.regionserver.wal.AsyncFSWAL.createWriterInstance(AsyncFSWAL.java:124) + at org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL.rollWriter(AbstractFSWAL.java:759) + at org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL.rollWriter(AbstractFSWAL.java:489) + at org.apache.hadoop.hbase.regionserver.wal.AsyncFSWAL.(AsyncFSWAL.java:251) + at org.apache.hadoop.hbase.wal.AsyncFSWALProvider.createWAL(AsyncFSWALProvider.java:69) + at org.apache.hadoop.hbase.wal.AsyncFSWALProvider.createWAL(AsyncFSWALProvider.java:44) + at org.apache.hadoop.hbase.wal.AbstractFSWALProvider.getWAL(AbstractFSWALProvider.java:138) + at org.apache.hadoop.hbase.wal.AbstractFSWALProvider.getWAL(AbstractFSWALProvider.java:57) + at org.apache.hadoop.hbase.wal.WALFactory.getWAL(WALFactory.java:252) + at org.apache.hadoop.hbase.regionserver.HRegionServer.getWAL(HRegionServer.java:2105) + at org.apache.hadoop.hbase.regionserver.HRegionServer.buildServerLoad(HRegionServer.java:1326) + at org.apache.hadoop.hbase.regionserver.HRegionServer.tryRegionServerReport(HRegionServer.java:1191) + at org.apache.hadoop.hbase.regionserver.HRegionServer.run(HRegionServer.java:1007) + at java.lang.Thread.run(Thread.java:745) +Caused by: org.apache.hadoop.hbase.util.CommonFSUtils$StreamLacksCapabilityException: hflush and hsync + at org.apache.hadoop.hbase.io.asyncfs.AsyncFSOutputHelper.createOutput(AsyncFSOutputHelper.java:69) + at org.apache.hadoop.hbase.regionserver.wal.AsyncProtobufLogWriter.initOutput(AsyncProtobufLogWriter.java:168) + at org.apache.hadoop.hbase.regionserver.wal.AbstractProtobufLogWriter.init(AbstractProtobufLogWriter.java:167) + at org.apache.hadoop.hbase.wal.AsyncFSWALProvider.createAsyncWriter(AsyncFSWALProvider.java:99) + ... 15 more + +---- + +If you are attempting to run in standalone mode and see this error, please walk back through the section <> and ensure you have included *all* the given configuration settings. + +[[trouble.rs.startup.asyncfs]] +==== RegionServer aborts due to can not initialize access to HDFS + +We will try to use _AsyncFSWAL_ for HBase-2.x as it has better performance while consuming less resources. But the problem for _AsyncFSWAL_ is that it hacks into the internal of the DFSClient implementation, so it will easily be broken when upgrading hadoop, even for a simple patch release. + +If you do not specify the wal provider, we will try to fall back to the old _FSHLog_ if we fail to initialize _AsyncFSWAL_, but it may not always work. The failure will show up in logs like this: + +---- +18/07/02 18:51:06 WARN concurrent.DefaultPromise: An exception was +thrown by org.apache.hadoop.hbase.io.asyncfs.FanOutOneBlockAsyncDFSOutputHelper$13.operationComplete() +java.lang.Error: Couldn't properly initialize access to HDFS +internals. Please update your WAL Provider to not make use of the +'asyncfs' provider. See HBASE-16110 for more information. + at org.apache.hadoop.hbase.io.asyncfs.FanOutOneBlockAsyncDFSOutputSaslHelper.(FanOutOneBlockAsyncDFSOutputSaslHelper.java:268) + at org.apache.hadoop.hbase.io.asyncfs.FanOutOneBlockAsyncDFSOutputHelper.initialize(FanOutOneBlockAsyncDFSOutputHelper.java:661) + at org.apache.hadoop.hbase.io.asyncfs.FanOutOneBlockAsyncDFSOutputHelper.access$300(FanOutOneBlockAsyncDFSOutputHelper.java:118) + at org.apache.hadoop.hbase.io.asyncfs.FanOutOneBlockAsyncDFSOutputHelper$13.operationComplete(FanOutOneBlockAsyncDFSOutputHelper.java:720) + at org.apache.hadoop.hbase.io.asyncfs.FanOutOneBlockAsyncDFSOutputHelper$13.operationComplete(FanOutOneBlockAsyncDFSOutputHelper.java:715) + at org.apache.hbase.thirdparty.io.netty.util.concurrent.DefaultPromise.notifyListener0(DefaultPromise.java:507) + at org.apache.hbase.thirdparty.io.netty.util.concurrent.DefaultPromise.notifyListeners0(DefaultPromise.java:500) + at org.apache.hbase.thirdparty.io.netty.util.concurrent.DefaultPromise.notifyListenersNow(DefaultPromise.java:479) + at org.apache.hbase.thirdparty.io.netty.util.concurrent.DefaultPromise.notifyListeners(DefaultPromise.java:420) + at org.apache.hbase.thirdparty.io.netty.util.concurrent.DefaultPromise.trySuccess(DefaultPromise.java:104) + at org.apache.hbase.thirdparty.io.netty.channel.DefaultChannelPromise.trySuccess(DefaultChannelPromise.java:82) + at org.apache.hbase.thirdparty.io.netty.channel.epoll.AbstractEpollChannel$AbstractEpollUnsafe.fulfillConnectPromise(AbstractEpollChannel.java:638) + at org.apache.hbase.thirdparty.io.netty.channel.epoll.AbstractEpollChannel$AbstractEpollUnsafe.finishConnect(AbstractEpollChannel.java:676) + at org.apache.hbase.thirdparty.io.netty.channel.epoll.AbstractEpollChannel$AbstractEpollUnsafe.epollOutReady(AbstractEpollChannel.java:552) + at org.apache.hbase.thirdparty.io.netty.channel.epoll.EpollEventLoop.processReady(EpollEventLoop.java:394) + at org.apache.hbase.thirdparty.io.netty.channel.epoll.EpollEventLoop.run(EpollEventLoop.java:304) + at org.apache.hbase.thirdparty.io.netty.util.concurrent.SingleThreadEventExecutor$5.run(SingleThreadEventExecutor.java:858) + at org.apache.hbase.thirdparty.io.netty.util.concurrent.DefaultThreadFactory$DefaultRunnableDecorator.run(DefaultThreadFactory.java:138) + at java.lang.Thread.run(Thread.java:748) + Caused by: java.lang.NoSuchMethodException: +org.apache.hadoop.hdfs.DFSClient.decryptEncryptedDataEncryptionKey(org.apache.hadoop.fs.FileEncryptionInfo) + at java.lang.Class.getDeclaredMethod(Class.java:2130) + at org.apache.hadoop.hbase.io.asyncfs.FanOutOneBlockAsyncDFSOutputSaslHelper.createTransparentCryptoHelper(FanOutOneBlockAsyncDFSOutputSaslHelper.java:232) + at org.apache.hadoop.hbase.io.asyncfs.FanOutOneBlockAsyncDFSOutputSaslHelper.(FanOutOneBlockAsyncDFSOutputSaslHelper.java:262) + ... 18 more +---- + +If you hit this error, please specify _FSHLog_, i.e, _filesystem_, explicitly in your config file. + +[source,xml] +---- + + hbase.wal.provider + filesystem + +---- + +And do not forget to send an email to the user@hbase.apache.org or dev@hbase.apache.org to report the failure and also your hadoop version, we will try to fix the problem ASAP in the next release. + [[trouble.rs.runtime]] === Runtime Errors @@ -1127,6 +1219,29 @@ Sure fire solution is to just use Hadoop dfs to delete the HBase root and let HB If you have many regions on your cluster and you see an error like that reported above in this sections title in your logs, see link:https://issues.apache.org/jira/browse/HBASE-4246[HBASE-4246 Cluster with too many regions cannot withstand some master failover scenarios]. +[[trouble.master.startup.hsync]] +==== Master fails to become active due to lack of hsync for filesystem + +HBase's internal framework for cluster operations requires the ability to durably save state in a write ahead log. When using a version of Apache Hadoop Common's filesystem API that supports checking on the availability of needed calls, HBase will proactively abort the cluster if it finds it can't operate safely. + +For Master roles, the failure will show up in logs like this: + +---- +2018-04-05 11:18:44,653 ERROR [Thread-21] master.HMaster: Failed to become active master +java.lang.IllegalStateException: The procedure WAL relies on the ability to hsync for proper operation during component failures, but the underlying filesystem does not support doing so. Please check the config value of 'hbase.procedure.store.wal.use.hsync' to set the desired level of robustness and ensure the config value of 'hbase.wal.dir' points to a FileSystem mount that can provide it. + at org.apache.hadoop.hbase.procedure2.store.wal.WALProcedureStore.rollWriter(WALProcedureStore.java:1034) + at org.apache.hadoop.hbase.procedure2.store.wal.WALProcedureStore.recoverLease(WALProcedureStore.java:374) + at org.apache.hadoop.hbase.procedure2.ProcedureExecutor.start(ProcedureExecutor.java:530) + at org.apache.hadoop.hbase.master.HMaster.startProcedureExecutor(HMaster.java:1267) + at org.apache.hadoop.hbase.master.HMaster.startServiceThreads(HMaster.java:1173) + at org.apache.hadoop.hbase.master.HMaster.finishActiveMasterInitialization(HMaster.java:881) + at org.apache.hadoop.hbase.master.HMaster.startActiveMasterManager(HMaster.java:2048) + at org.apache.hadoop.hbase.master.HMaster.lambda$run$0(HMaster.java:568) + at java.lang.Thread.run(Thread.java:745) +---- + +If you are attempting to run in standalone mode and see this error, please walk back through the section <> and ensure you have included *all* the given configuration settings. + [[trouble.master.shutdown]] === Shutdown Errors diff --git a/src/main/asciidoc/_chapters/unit_testing.adoc b/src/main/asciidoc/_chapters/unit_testing.adoc index e503f816cae..3329a75b68c 100644 --- a/src/main/asciidoc/_chapters/unit_testing.adoc +++ b/src/main/asciidoc/_chapters/unit_testing.adoc @@ -327,7 +327,5 @@ A record is inserted, a Get is performed from the same table, and the insertion NOTE: Starting the mini-cluster takes about 20-30 seconds, but that should be appropriate for integration testing. -To use an HBase mini-cluster on Microsoft Windows, you need to use a Cygwin environment. - See the paper at link:http://blog.sematext.com/2010/08/30/hbase-case-study-using-hbasetestingutility-for-local-testing-development/[HBase Case-Study: Using HBaseTestingUtility for Local Testing and Development] (2010) for more information about HBaseTestingUtility. diff --git a/src/main/asciidoc/_chapters/upgrading.adoc b/src/main/asciidoc/_chapters/upgrading.adoc index ef20c7d90b6..bc2ec1c103b 100644 --- a/src/main/asciidoc/_chapters/upgrading.adoc +++ b/src/main/asciidoc/_chapters/upgrading.adoc @@ -314,6 +314,411 @@ Quitting... == Upgrade Paths +[[upgrade2.0]] +=== Upgrading from 1.x to 2.x + +In this section we will first call out significant changes compared to the prior stable HBase release and then go over the upgrade process. Be sure to read the former with care so you avoid suprises. + +==== Changes of Note! + +First we'll cover deployment / operational changes that you might hit when upgrading to HBase 2.0+. After that we'll call out changes for downstream applications. Please note that Coprocessors are covered in the operational section. Also note that this section is not meant to convey information about new features that may be of interest to you. For a complete summary of changes, please see the CHANGES.txt file in the source release artifact for the version you are planning to upgrade to. + +[[upgrade2.0.basic.requirements]] +.Update to basic prerequisite minimums in HBase 2.0+ +As noted in the section <>, HBase 2.0+ requires a minimum of Java 8 and Hadoop 2.6. The HBase community recommends ensuring you have already completed any needed upgrades in prerequisites prior to upgrading your HBase version. + +[[upgrade2.0.hbck]] +.HBCK must match HBase server version +You *must not* use an HBase 1.x version of HBCK against an HBase 2.0+ cluster. HBCK is strongly tied to the HBase server version. Using the HBCK tool from an earlier release against an HBase 2.0+ cluster will destructively alter said cluster in unrecoverable ways. + +As of HBase 2.0, HBCK is a read-only tool that can report the status of some non-public system internals. You should not rely on the format nor content of these internals to remain consistent across HBase releases. + +//// +Link to a ref guide section on HBCK in 2.0 that explains use and calls out the inability of clients and server sides to detect version of each other. +//// + +[[upgrade2.0.removed.configs]] +.Configuration settings no longer in HBase 2.0+ + +The following configuration settings are no longer applicable or available. For details, please see the detailed release notes. + +* hbase.config.read.zookeeper.config (see <> for migration details) +* hbase.zookeeper.useMulti (HBase now always uses ZK's multi functionality) +* hbase.rpc.client.threads.max +* hbase.rpc.client.nativetransport +* hbase.fs.tmp.dir +// These next two seem worth a call out section? +* hbase.bucketcache.combinedcache.enabled +* hbase.bucketcache.ioengine no longer supports the 'heap' value. +* hbase.bulkload.staging.dir +* hbase.balancer.tablesOnMaster wasn't removed, strictly speaking, but its meaning has fundamentally changed and users should not set it. See the section <> for details. +* hbase.master.distributed.log.replay See the section <> for details +* hbase.regionserver.disallow.writes.when.recovering See the section <> for details +* hbase.regionserver.wal.logreplay.batch.size See the section <> for details +* hbase.master.catalog.timeout +* hbase.regionserver.catalog.timeout +* hbase.metrics.exposeOperationTimes +* hbase.metrics.showTableName +* hbase.online.schema.update.enable (HBase now always supports this) +* hbase.thrift.htablepool.size.max + +[[upgrade2.0.renamed.configs]] +.Configuration properties that were renamed in HBase 2.0+ + +The following properties have been renamed. Attempts to set the old property will be ignored at run time. + +.Renamed properties +[options="header"] +|============================================================================================================ +|Old name |New name +|hbase.rpc.server.nativetransport |hbase.netty.nativetransport +|hbase.netty.rpc.server.worker.count |hbase.netty.worker.count +|hbase.hfile.compactions.discharger.interval |hbase.hfile.compaction.discharger.interval +|hbase.hregion.percolumnfamilyflush.size.lower.bound |hbase.hregion.percolumnfamilyflush.size.lower.bound.min +|============================================================================================================ + +[[upgrade2.0.changed.defaults]] +.Configuration settings with different defaults in HBase 2.0+ + +The following configuration settings changed their default value. Where applicable, the value to set to restore the behavior of HBase 1.2 is given. + +* hbase.security.authorization now defaults to false. set to true to restore same behavior as previous default. +* hbase.client.retries.number is now set to 10. Previously it was 35. Downstream users are advised to use client timeouts as described in section <> instead. +* hbase.client.serverside.retries.multiplier is now set to 3. Previously it was 10. Downstream users are advised to use client timesout as describe in section <> instead. +* hbase.master.fileSplitTimeout is now set to 10 minutes. Previously it was 30 seconds. +* hbase.regionserver.logroll.multiplier is now set to 0.5. Previously it was 0.95. This change is tied with the following doubling of block size. Combined, these two configuration changes should make for WALs of about the same size as those in hbase-1.x but there should be less incidence of small blocks because we fail to roll the WAL before we hit the blocksize threshold. See link:https://issues.apache.org/jira/browse/HBASE-19148[HBASE-19148] for discussion. +* hbase.regionserver.hlog.blocksize defaults to 2x the HDFS default block size for the WAL dir. Previously it was equal to the HDFS default block size for the WAL dir. +* hbase.client.start.log.errors.counter changed to 5. Previously it was 9. +* hbase.ipc.server.callqueue.type changed to 'fifo'. In HBase versions 1.0 - 1.2 it was 'deadline'. In prior and later 1.x versions it already defaults to 'fifo'. +* hbase.hregion.memstore.chunkpool.maxsize is 1.0 by default. Previously it was 0.0. Effectively, this means previously we would not use a chunk pool when our memstore is onheap and now we will. See the section <> for more infromation about the MSLAB chunk pool. +* hbase.master.cleaner.interval is now set to 10 minutes. Previously it was 1 minute. +* hbase.master.procedure.threads will now default to 1/4 of the number of available CPUs, but not less than 16 threads. Previously it would be number of threads equal to number of CPUs. +* hbase.hstore.blockingStoreFiles is now 16. Previously it was 10. +* hbase.http.max.threads is now 16. Previously it was 10. +* hbase.client.max.perserver.tasks is now 2. Previously it was 5. +* hbase.normalizer.period is now 5 minutes. Previously it was 30 minutes. +* hbase.regionserver.region.split.policy is now SteppingSplitPolicy. Previously it was IncreasingToUpperBoundRegionSplitPolicy. +* replication.source.ratio is now 0.5. Previously it was 0.1. + +[[upgrade2.0.regions.on.master]] +."Master hosting regions" feature broken and unsupported + +The feature "Master acts as region server" and associated follow-on work available in HBase 1.y is non-functional in HBase 2.y and should not be used in a production setting due to deadlock on Master initialization. Downstream users are advised to treat related configuration settings as experimental and the feature as inappropriate for production settings. + +A brief summary of related changes: + +* Master no longer carries regions by default +* hbase.balancer.tablesOnMaster is a boolean, default false (if it holds an HBase 1.x list of tables, will default to false) +* hbase.balancer.tablesOnMaster.systemTablesOnly is boolean to keep user tables off master. default false +* those wishing to replicate old list-of-servers config should deploy a stand-alone RegionServer process and then rely on Region Server Groups + +[[upgrade2.0.distributed.log.replay]] +."Distributed Log Replay" feature broken and removed + +The Distributed Log Replay feature was broken and has been removed from HBase 2.y+. As a consequence all related configs, metrics, RPC fields, and logging have also been removed. Note that this feature was found to be unreliable in the run up to HBase 1.0, defaulted to being unused, and was effectively removed in HBase 1.2.0 when we started ignoring the config that turns it on (link:https://issues.apache.org/jira/browse/HBASE-14465[HBASE-14465]). If you are currently using the feature, be sure to perform a clean shutdown, ensure all DLR work is complete, and disable the feature prior to upgrading. + +[[upgrade2.0.prefix-tree.removed]] +._prefix-tree_ encoding removed + +The prefix-tree encoding was removed from HBase 2.0.0 (link:https://issues.apache.org/jira/browse/HBASE-19179[HBASE-19179]). +It was (late!) deprecated in hbase-1.2.7, hbase-1.4.0, and hbase-1.3.2. + +This feature was removed because it as not being actively maintained. If interested in reviving this +sweet facility which improved random read latencies at the expensive of slowed writes, +write the HBase developers list at _dev at hbase dot apache dot org_. + +The prefix-tree encoding needs to be removed from all tables before upgrading to HBase 2.0+. +To do that first you need to change the encoding from PREFIX_TREE to something else that is supported in HBase 2.0. +After that you have to major compact the tables that were using PREFIX_TREE encoding before. +To check which column families are using incompatible data block encoding you can use <>. + +[[upgrade2.0.metrics]] +.Changed metrics + +The following metrics have changed names: + +* Metrics previously published under the name "AssignmentManger" [sic] are now published under the name "AssignmentManager" + +The following metrics have changed their meaning: + +* The metric 'blockCacheEvictionCount' published on a per-region server basis no longer includes blocks removed from the cache due to the invalidation of the hfiles they are from (e.g. via compaction). +* The metric 'totalRequestCount' increments once per request; previously it incremented by the number of `Actions` carried in the request; e.g. if a request was a `multi` made of four Gets and two Puts, we'd increment 'totalRequestCount' by six; now we increment by one regardless. Expect to see lower values for this metric in hbase-2.0.0. +* The 'readRequestCount' now counts reads that return a non-empty row where in older hbases, we'd increment 'readRequestCount' whether a Result or not. This change will flatten the profile of the read-requests graphs if requests for non-existent rows. A YCSB read-heavy workload can do this dependent on how the database was loaded. + +The following metrics have been removed: + +* Metrics related to the Distributed Log Replay feature are no longer present. They were previsouly found in the region server context under the name 'replay'. See the section <> for details. + +The following metrics have been added: + +* 'totalRowActionRequestCount' is a count of region row actions summing reads and writes. + +[[upgrade2.0.logging]] +.Changed logging +HBase-2.0.0 now uses link:https://www.slf4j.org/[slf4j] as its logging frontend. +Prevously, we used link:http://logging.apache.org/log4j/1.2/[log4j (1.2)]. +For most the transition should be seamless; slf4j does a good job interpreting +_log4j.properties_ logging configuration files such that you should not notice +any difference in your log system emissions. + +That said, your _log4j.properties_ may need freshening. See link:https://issues.apache.org/jira/browse/HBASE-20351[HBASE-20351] +for example, where a stale log configuration file manifest as netty configuration +being dumped at DEBUG level as preamble on every shell command invocation. + +[[upgrade2.0.zkconfig]] +.ZooKeeper configs no longer read from zoo.cfg + +HBase no longer optionally reads the 'zoo.cfg' file for ZooKeeper related configuration settings. If you previously relied on the 'hbase.config.read.zookeeper.config' config for this functionality, you should migrate any needed settings to the hbase-site.xml file while adding the prefix 'hbase.zookeeper.property.' to each property name. + +[[upgrade2.0.permissions]] +.Changes in permissions +The following permission related changes either altered semantics or defaults: + +* Permissions granted to a user now merge with existing permissions for that user, rather than over-writing them. (see link:https://issues.apache.org/jira/browse/HBASE-17472[the release note on HBASE-17472] for details) +* Region Server Group commands (added in 1.4.0) now require admin privileges. + +[[upgrade2.0.admin.commands]] +.Most Admin APIs don't work against an HBase 2.0+ cluster from pre-HBase 2.0 clients + +A number of admin commands are known to not work when used from a pre-HBase 2.0 client. This includes an HBase Shell that has the library jars from pre-HBase 2.0. You will need to plan for an outage of use of admin APIs and commands until you can also update to the needed client version. + +The following client operations do not work against HBase 2.0+ cluster when executed from a pre-HBase 2.0 client: + +* list_procedures +* split +* merge_region +* list_quotas +* enable_table_replication +* disable_table_replication +* Snapshot related commands + +.Deprecated in 1.0 admin commands have been removed. + +The following commands that were deprecated in 1.0 have been removed. Where applicable the replacement command is listed. + +* The 'hlog' command has been removed. Downstream users should rely on the 'wal' command instead. + +[[upgrade2.0.memory]] +.Region Server memory consumption changes. + +Users upgrading from versions prior to HBase 1.4 should read the instructions in section <>. + +Additionally, HBase 2.0 has changed how memstore memory is tracked for flushing decisions. Previously, both the data size and overhead for storage were used to calculate utilization against the flush threashold. Now, only data size is used to make these per-region decisions. Globally the addition of the storage overhead is used to make decisions about forced flushes. + +[[upgrade2.0.ui.splitmerge.by.row]] +.Web UI for splitting and merging operate on row prefixes + +Previously, the Web UI included functionality on table status pages to merge or split based on an encoded region name. In HBase 2.0, instead this functionality works by taking a row prefix. + +[[upgrade2.0.replication]] +.Special upgrading for Replication users from pre-HBase 1.4 + +User running versions of HBase prior to the 1.4.0 release that make use of replication should be sure to read the instructions in the section <>. + +[[upgrade2.0.shell]] +.HBase shell changes + +The HBase shell command relies on a bundled JRuby instance. This bundled JRuby been updated from version 1.6.8 to version 9.1.10.0. The represents a change from Ruby 1.8 to Ruby 2.3.3, which introduces non-compatible language changes for user scripts. + +The HBase shell command now ignores the '--return-values' flag that was present in early HBase 1.4 releases. Instead the shell always behaves as though that flag were passed. If you wish to avoid having expression results printed in the console you should alter your IRB configuration as noted in the section <>. + +[[upgrade2.0.coprocessors]] +.Coprocessor APIs have changed in HBase 2.0+ + +All Coprocessor APIs have been refactored to improve supportability around binary API compatibility for future versions of HBase. If you or applications you rely on have custom HBase coprocessors, you should read link:https://issues.apache.org/jira/browse/HBASE-18169[the release notes for HBASE-18169] for details of changes you will need to make prior to upgrading to HBase 2.0+. + +For example, if you had a BaseRegionObserver in HBase 1.2 then at a minimum you will need to update it to implement both RegionObserver and RegionCoprocessor and add the method + +[source,java] +---- +... + @Override + public Optional getRegionObserver() { + return Optional.of(this); + } +... +---- + +//// +This would be a good place to link to a coprocessor migration guide +//// + +[[upgrade2.0.hfile3.only]] +.HBase 2.0+ can no longer write HFile v2 files. + +HBase has simplified our internal HFile handling. As a result, we can no longer write HFile versions earlier than the default of version 3. Upgrading users should ensure that hfile.format.version is not set to 2 in hbase-site.xml before upgrading. Failing to do so will cause Region Server failure. HBase can still read HFiles written in the older version 2 format. + +[[upgrade2.0.pb.wal.only]] +.HBase 2.0+ can no longer read Sequence File based WAL file. + +HBase can no longer read the deprecated WAL files written in the Apache Hadoop Sequence File format. The hbase.regionserver.hlog.reader.impl and hbase.regionserver.hlog.reader.impl configuration entries should be set to use the Protobuf based WAL reader / writer classes. This implementation has been the default since HBase 0.96, so legacy WAL files should not be a concern for most downstream users. + +A clean cluster shutdown should ensure there are no WAL files. If you are unsure of a given WAL file's format you can use the `hbase wal` command to parse files while the HBase cluster is offline. In HBase 2.0+, this command will not be able to read a Sequence File based WAL. For more information on the tool see the section <>. + +[[upgrade2.0.filters]] +.Change in behavior for filters + +The Filter ReturnCode NEXT_ROW has been redefined as skipping to next row in current family, not to next row in all family. it’s more reasonable, because ReturnCode is a concept in store level, not in region level. + +[[upgrade2.0.shaded.client.preferred]] +.Downstream HBase 2.0+ users should use the shaded client +Downstream users are strongly urged to rely on the Maven coordinates org.apache.hbase:hbase-shaded-client for their runtime use. This artifact contains all the needed implementation details for talking to an HBase cluster while minimizing the number of third party dependencies exposed. + +Note that this artifact exposes some classes in the org.apache.hadoop package space (e.g. o.a.h.configuration.Configuration) so that we can maintain source compatibility with our public API. Those classes are included so that they can be altered to use the same relocated third party dependencies as the rest of the HBase client code. In the event that you need to *also* use Hadoop in your code, you should ensure all Hadoop related jars precede the HBase client jar in your classpath. + +[[upgrade2.0.mapreduce.module]] +.Downstream HBase 2.0+ users of MapReduce must switch to new artifact +Downstream users of HBase's integration for Apache Hadoop MapReduce must switch to relying on the org.apache.hbase:hbase-shaded-mapreduce module for their runtime use. Historically, downstream users relied on either the org.apache.hbase:hbase-server or org.apache.hbase:hbase-shaded-server artifacts for these classes. Both uses are no longer supported and in the vast majority of cases will fail at runtime. + +Note that this artifact exposes some classes in the org.apache.hadoop package space (e.g. o.a.h.configuration.Configuration) so that we can maintain source compatibility with our public API. Those classes are included so that they can be altered to use the same relocated third party dependencies as the rest of the HBase client code. In the event that you need to *also* use Hadoop in your code, you should ensure all Hadoop related jars precede the HBase client jar in your classpath. + +[[upgrade2.0.dependencies]] +.Significant changes to runtime classpath +A number of internal dependencies for HBase were updated or removed from the runtime classpath. Downstream client users who do not follow the guidance in <> will have to examine the set of dependencies Maven pulls in for impact. Downstream users of LimitedPrivate Coprocessor APIs will need to examine the runtime environment for impact. For details on our new handling of third party libraries that have historically been a problem with respect to harmonizing compatible runtime versions, see the reference guide section <>. + +[[upgrade2.0.public.api]] +.Multiple breaking changes to source and binary compatibility for client API +The Java client API for HBase has a number of changes that break both source and binary compatibility for details see the Compatibility Check Report for the release you'll be upgrading to. + +[[upgrade2.0.tracing]] +.Tracing implementation changes +The backing implementation of HBase's tracing features was updated from Apache HTrace 3 to HTrace 4, which includes several breaking changes. While HTrace 3 and 4 can coexist in the same runtime, they will not integrate with each other, leading to disjoint trace information. + +The internal changes to HBase during this upgrade were sufficient for compilation, but it has not been confirmed that there are no regressions in tracing functionality. Please consider this feature expiremental for the immediate future. + +If you previously relied on client side tracing integrated with HBase operations, it is recommended that you upgrade your usage to HTrace 4 as well. + +[[upgrade2.0.perf]] +.Performance + +You will likely see a change in the performance profile on upgrade to hbase-2.0.0 given +read and write paths have undergone significant change. On release, writes may be +slower with reads about the same or much better, dependent on context. Be prepared +to spend time re-tuning (See <>). +Performance is also an area that is now under active review so look forward to +improvement in coming releases (See +link:https://issues.apache.org/jira/browse/HBASE-20188[HBASE-20188 TESTING Performance]). + +//// +This would be a good place to link to an appendix on migrating applications +//// + +[[upgrade2.0.coprocessors]] +==== Upgrading Coprocessors to 2.0 +Coprocessors have changed substantially in 2.0 ranging from top level design changes in class +hierarchies to changed/removed methods, interfaces, etc. +(Parent jira: link:https://issues.apache.org/jira/browse/HBASE-18169[HBASE-18169 Coprocessor fix +and cleanup before 2.0.0 release]). Some of the reasons for such widespread changes: + +. Pass Interfaces instead of Implementations; e.g. TableDescriptor instead of HTableDescriptor and +Region instead of HRegion (link:https://issues.apache.org/jira/browse/HBASE-18241[HBASE-18241] +Change client.Table and client.Admin to not use HTableDescriptor). +. Design refactor so implementers need to fill out less boilerplate and so we can do more +compile-time checking (link:https://issues.apache.org/jira/browse/HBASE-17732[HBASE-17732]) +. Purge Protocol Buffers from Coprocessor API +(link:https://issues.apache.org/jira/browse/HBASE-18859[HBASE-18859], +link:https://issues.apache.org/jira/browse/HBASE-16769[HBASE-16769], etc) +. Cut back on what we expose to Coprocessors removing hooks on internals that were too private to + expose (for eg. link:https://issues.apache.org/jira/browse/HBASE-18453[HBASE-18453] + CompactionRequest should not be exposed to user directly; + link:https://issues.apache.org/jira/browse/HBASE-18298[HBASE-18298] RegionServerServices Interface + cleanup for CP expose; etc) + +To use coprocessors in 2.0, they should be rebuilt against new API otherwise they will fail to +load and HBase processes will die. + +Suggested order of changes to upgrade the coprocessors: + +. Directly implement observer interfaces instead of extending Base*Observer classes. Change + `Foo extends BaseXXXObserver` to `Foo implements XXXObserver`. + (link:https://issues.apache.org/jira/browse/HBASE-17312[HBASE-17312]). +. Adapt to design change from Inheritence to Composition + (link:https://issues.apache.org/jira/browse/HBASE-17732[HBASE-17732]) by following + link:https://github.com/apache/hbase/blob/master/dev-support/design-docs/Coprocessor_Design_Improvements-Use_composition_instead_of_inheritance-HBASE-17732.adoc#migrating-existing-cps-to-new-design[this + example]. +. getTable() has been removed from the CoprocessorEnvrionment, coprocessors should self-manage + Table instances. + +Some examples of writing coprocessors with new API can be found in hbase-example module +link:https://github.com/apache/hbase/tree/branch-2.0/hbase-examples/src/main/java/org/apache/hadoop/hbase/coprocessor/example[here] . + +Lastly, if an api has been changed/removed that breaks you in an irreparable way, and if there's a +good justification to add it back, bring it our notice (dev@hbase.apache.org). + +[[upgrade2.0.rolling.upgrades]] +==== Rolling Upgrade from 1.x to 2.x + +Rolling upgrades are currently an experimental feature. +They have had limited testing. There are likely corner +cases as yet uncovered in our +limited experience so you should be careful if you go this +route. The stop/upgrade/start described in the next section, +<>, is the safest route. + +That said, the below is a prescription for a +rolling upgrade of a 1.4 cluster. + +.Pre-Requirements +* Upgrade to the latest 1.4.x release. Pre 1.4 releases may also work but are not tested, so please upgrade to 1.4.3+ before upgrading to 2.x, unless you are an expert and familiar with the region assignment and crash processing. See the section <> on how to upgrade to 1.4.x. +* Make sure that the zk-less assignment is enabled, i.e, set `hbase.assignment.usezk` to `false`. This is the most important thing. It allows the 1.x master to assign/unassign regions to/from 2.x region servers. See the release note section of link:https://issues.apache.org/jira/browse/HBASE-11059[HBASE-11059] on how to migrate from zk based assignment to zk less assignment. +* We have tested rolling upgrading from 1.4.3 to 2.1.0, but it should also work if you want to upgrade to 2.0.x. + +.Instructions +. Unload a region server and upgrade it to 2.1.0. With link:https://issues.apache.org/jira/browse/HBASE-17931[HBASE-17931] in place, the meta region and regions for other system tables will be moved to this region server immediately. If not, please move them manually to the new region server. This is very important because +** The schema of meta region is hard coded, if meta is on an old region server, then the new region servers can not access it as it does not have some families, for example, table state. +** Client with lower version can communicate with server with higher version, but not vice versa. If the meta region is on an old region server, the new region server will use a client with higher version to communicate with a server with lower version, this may introduce strange problems. +. Rolling upgrade all other region servers. +. Upgrading masters. + +It is OK that during the rolling upgrading there are region server crashes. The 1.x master can assign regions to both 1.x and 2.x region servers, and link:https://issues.apache.org/jira/browse/HBASE-19166[HBASE-19166] fixed a problem so that 1.x region server can also read the WALs written by 2.x region server and split them. + +NOTE: please read the <> section carefully before rolling upgrading. Make sure that you do not use the removed features in 2.0, for example, the prefix-tree encoding, the old hfile format, etc. They could both fail the upgrading and leave the cluster in an intermediate state and hard to recover. + +NOTE: If you have success running this prescription, please notify the dev list with a note on your experience and/or update the above with any deviations you may have taken so others going this route can benefit from your efforts. + +[[upgrade2.0.process]] +==== Upgrade process from 1.x to 2.x + +To upgrade an existing HBase 1.x cluster, you should: + +* Clean shutdown of existing 1.x cluster +* Update coprocessors +* Upgrade Master roles first +* Upgrade RegionServers +* (Eventually) Upgrade Clients + +[[upgrade1.4]] +=== Upgrading from pre-1.4 to 1.4+ + +[[upgrade1.4.memory]] +==== Region Server memory consumption changes. + +Users upgrading from versions prior to HBase 1.4 should be aware that the estimates of heap usage by the memstore objects (KeyValue, object and array header sizes, etc) have been made more accurate for heap sizes up to 32G (using CompressedOops), resulting in them dropping by 10-50% in practice. This also results in less number of flushes and compactions due to "fatter" flushes. YMMV. As a result, the actual heap usage of the memstore before being flushed may increase by up to 100%. If configured memory limits for the region server had been tuned based on observed usage, this change could result in worse GC behavior or even OutOfMemory errors. Set the environment property (not hbase-site.xml) "hbase.memorylayout.use.unsafe" to false to disable. + + +[[upgrade1.4.replication]] +==== Replication peer's TableCFs config + +Before 1.4, the table name can't include namespace for replication peer's TableCFs config. It was fixed by add TableCFs to ReplicationPeerConfig which was stored on Zookeeper. So when upgrade to 1.4, you have to update the original ReplicationPeerConfig data on Zookeeper firstly. There are four steps to upgrade when your cluster have a replication peer with TableCFs config. + +* Disable the replication peer. +* If master has permission to write replication peer znode, then rolling update master directly. If not, use TableCFsUpdater tool to update the replication peer's config. +[source,bash] +---- +$ bin/hbase org.apache.hadoop.hbase.replication.master.TableCFsUpdater update +---- +* Rolling update regionservers. +* Enable the replication peer. + +Notes: + +* Can't use the old client(before 1.4) to change the replication peer's config. Because the client will write config to Zookeeper directly, the old client will miss TableCFs config. And the old client write TableCFs config to the old tablecfs znode, it will not work for new version regionserver. + +[[upgrade1.4.rawscan]] +==== Raw scan now ignores TTL + +Doing a raw scan will now return results that have expired according to TTL settings. + [[upgrade1.0]] === Upgrading to 1.x diff --git a/src/main/asciidoc/book.adoc b/src/main/asciidoc/book.adoc index 0a21e7bf05c..764d7b4abc9 100644 --- a/src/main/asciidoc/book.adoc +++ b/src/main/asciidoc/book.adoc @@ -63,7 +63,6 @@ include::_chapters/security.adoc[] include::_chapters/architecture.adoc[] include::_chapters/hbase_mob.adoc[] include::_chapters/inmemory_compaction.adoc[] -include::_chapters/backup_restore.adoc[] include::_chapters/hbase_apis.adoc[] include::_chapters/external_apis.adoc[] include::_chapters/thrift_filter_language.adoc[] @@ -75,6 +74,8 @@ include::_chapters/ops_mgt.adoc[] include::_chapters/developer.adoc[] include::_chapters/unit_testing.adoc[] include::_chapters/protobuf.adoc[] +include::_chapters/pv2.adoc[] +include::_chapters/amv2.adoc[] include::_chapters/zookeeper.adoc[] include::_chapters/community.adoc[] @@ -94,3 +95,4 @@ include::_chapters/asf.adoc[] include::_chapters/orca.adoc[] include::_chapters/tracing.adoc[] include::_chapters/rpc.adoc[] +include::_chapters/appendix_hbase_incompatibilities.adoc[] diff --git a/src/main/asciidoc/images b/src/main/asciidoc/images index 06d04d0edbb..02e8e9402df 120000 --- a/src/main/asciidoc/images +++ b/src/main/asciidoc/images @@ -1 +1 @@ -../site/resources/images \ No newline at end of file +../../site/resources/images/ \ No newline at end of file diff --git a/src/main/site/asciidoc/acid-semantics.adoc b/src/site/asciidoc/acid-semantics.adoc similarity index 99% rename from src/main/site/asciidoc/acid-semantics.adoc rename to src/site/asciidoc/acid-semantics.adoc index 00389017943..0b56aa8e136 100644 --- a/src/main/site/asciidoc/acid-semantics.adoc +++ b/src/site/asciidoc/acid-semantics.adoc @@ -115,4 +115,4 @@ All of the above guarantees must be possible within Apache HBase. For users who == More Information -For more information, see the link:book.html#client[client architecture] and link:book.html#datamodel[data model] sections in the Apache HBase Reference Guide. +For more information, see the link:book.html#client[client architecture] and link:book.html#datamodel[data model] sections in the Apache HBase Reference Guide. diff --git a/src/main/site/asciidoc/bulk-loads.adoc b/src/site/asciidoc/bulk-loads.adoc similarity index 99% rename from src/main/site/asciidoc/bulk-loads.adoc rename to src/site/asciidoc/bulk-loads.adoc index fc320d88fde..8fc9a1a1a5e 100644 --- a/src/main/site/asciidoc/bulk-loads.adoc +++ b/src/site/asciidoc/bulk-loads.adoc @@ -20,4 +20,3 @@ under the License. = Bulk Loads in Apache HBase (TM) This page has been retired. The contents have been moved to the link:book.html#arch.bulk.load[Bulk Loading] section in the Reference Guide. - diff --git a/src/main/site/asciidoc/cygwin.adoc b/src/site/asciidoc/cygwin.adoc similarity index 99% rename from src/main/site/asciidoc/cygwin.adoc rename to src/site/asciidoc/cygwin.adoc index 11c4df4103d..5b6d5b4a587 100644 --- a/src/main/site/asciidoc/cygwin.adoc +++ b/src/site/asciidoc/cygwin.adoc @@ -30,10 +30,10 @@ This document explains the *intricacies* of running Apache HBase on Windows usin == Installation -For running Apache HBase on Windows, 3 technologies are required: +For running Apache HBase on Windows, 3 technologies are required: * Java * Cygwin -* SSH +* SSH The following paragraphs detail the installation of each of the aforementioned technologies. @@ -104,7 +104,7 @@ LN -s /cygdrive/c/Program\ Files/Java/*_jre name_*/usr/local/*_jre name_* ---- . Test your java installation by changing directories to your Java folder `CD /usr/local/_jre name_` and issueing the command `./bin/java -version`. This should output your version of the chosen JRE. -=== SSH +=== SSH Configuring *SSH *is quite elaborate, but primarily a question of launching it by default as a* Windows service*. @@ -194,4 +194,3 @@ put 'test', 'row3', 'data:3', 'value3' == Conclusion Now your *HBase *server is running, *start coding* and build that next killer app on this particular, but scalable datastore! - diff --git a/src/main/site/asciidoc/export_control.adoc b/src/site/asciidoc/export_control.adoc similarity index 97% rename from src/main/site/asciidoc/export_control.adoc rename to src/site/asciidoc/export_control.adoc index 1bbefb50a2a..f6e5e181838 100644 --- a/src/main/site/asciidoc/export_control.adoc +++ b/src/site/asciidoc/export_control.adoc @@ -29,11 +29,11 @@ encryption software, to see if this is permitted. See the link:http://www.wassenaar.org/[Wassenaar Arrangement] for more information. -The U.S. Government Department of Commerce, Bureau of Industry and Security -(BIS), has classified this software as Export Commodity Control Number (ECCN) -5D002.C.1, which includes information security software using or performing +The U.S. Government Department of Commerce, Bureau of Industry and Security +(BIS), has classified this software as Export Commodity Control Number (ECCN) +5D002.C.1, which includes information security software using or performing cryptographic functions with asymmetric algorithms. The form and manner of this -Apache Software Foundation distribution makes it eligible for export under the +Apache Software Foundation distribution makes it eligible for export under the License Exception ENC Technology Software Unrestricted (TSU) exception (see the BIS Export Administration Regulations, Section 740.13) for both object code and source code. diff --git a/src/main/site/asciidoc/index.adoc b/src/site/asciidoc/index.adoc similarity index 100% rename from src/main/site/asciidoc/index.adoc rename to src/site/asciidoc/index.adoc diff --git a/src/main/site/asciidoc/metrics.adoc b/src/site/asciidoc/metrics.adoc similarity index 98% rename from src/main/site/asciidoc/metrics.adoc rename to src/site/asciidoc/metrics.adoc index be7d9a52b6e..41db2a05b88 100644 --- a/src/main/site/asciidoc/metrics.adoc +++ b/src/site/asciidoc/metrics.adoc @@ -41,9 +41,9 @@ The _jvm_ context is useful for long-term stats on running hbase jvms -- memory == Using with JMX -In addition to the standard output contexts supported by the Hadoop -metrics package, you can also export HBase metrics via Java Management -Extensions (JMX). This will allow viewing HBase stats in JConsole or +In addition to the standard output contexts supported by the Hadoop +metrics package, you can also export HBase metrics via Java Management +Extensions (JMX). This will allow viewing HBase stats in JConsole or any other JMX client. === Enable HBase stats collection @@ -67,7 +67,7 @@ rpc.period=60 === Setup JMX Remote Access For remote access, you will need to configure JMX remote passwords and access profiles. Create the files: -`$HBASE_HOME/conf/jmxremote.passwd` (set permissions +`$HBASE_HOME/conf/jmxremote.passwd` (set permissions to 600):: + ---- monitorRole monitorpass @@ -98,5 +98,4 @@ After restarting the processes you want to monitor, you should now be able to ru == Understanding HBase Metrics -For more information on understanding HBase metrics, see the link:book.html#hbase_metrics[metrics section] in the Apache HBase Reference Guide. - +For more information on understanding HBase metrics, see the link:book.html#hbase_metrics[metrics section] in the Apache HBase Reference Guide. diff --git a/src/main/site/asciidoc/old_news.adoc b/src/site/asciidoc/old_news.adoc similarity index 99% rename from src/main/site/asciidoc/old_news.adoc rename to src/site/asciidoc/old_news.adoc index fd0e2558486..75179e0114a 100644 --- a/src/main/site/asciidoc/old_news.adoc +++ b/src/site/asciidoc/old_news.adoc @@ -97,7 +97,7 @@ October 12th, 2010:: HBase-related presentations by core contributors and users October 11th, 2010:: link:http://www.meetup.com/hbaseusergroup/calendar/14606174/[HUG-NYC: HBase User Group NYC Edition] (Night before Hadoop World) June 30th, 2010:: link:http://www.meetup.com/hbaseusergroup/calendar/13562846/[Apache HBase Contributor Workshop] (Day after Hadoop Summit) -May 10th, 2010:: Apache HBase graduates from Hadoop sub-project to Apache Top Level Project +May 10th, 2010:: Apache HBase graduates from Hadoop sub-project to Apache Top Level Project April 19, 2010:: Signup for link:http://www.meetup.com/hbaseusergroup/calendar/12689490/[HBase User Group Meeting, HUG10] hosted by Trend Micro @@ -118,4 +118,3 @@ June, 2009:: HBase at HadoopSummit2009 and at NOSQL: See the link:https://hbase March 3rd, 2009 :: HUG6 -- link:http://www.meetup.com/hbaseusergroup/calendar/9764004/[HBase User Group 6] January 30th, 2009:: LA Hbackathon: link:http://www.meetup.com/hbasela/calendar/9450876/[HBase January Hackathon Los Angeles] at link:http://streamy.com[Streamy] in Manhattan Beach - diff --git a/src/main/site/asciidoc/pseudo-distributed.adoc b/src/site/asciidoc/pseudo-distributed.adoc similarity index 99% rename from src/main/site/asciidoc/pseudo-distributed.adoc rename to src/site/asciidoc/pseudo-distributed.adoc index d13c63b0836..ec6f53de74b 100644 --- a/src/main/site/asciidoc/pseudo-distributed.adoc +++ b/src/site/asciidoc/pseudo-distributed.adoc @@ -20,4 +20,3 @@ under the License. = Running Apache HBase (TM) in pseudo-distributed mode This page has been retired. The contents have been moved to the link:book.html#distributed[Distributed Operation: Pseudo- and Fully-distributed modes] section in the Reference Guide. - diff --git a/src/main/site/asciidoc/replication.adoc b/src/site/asciidoc/replication.adoc similarity index 100% rename from src/main/site/asciidoc/replication.adoc rename to src/site/asciidoc/replication.adoc diff --git a/src/main/site/asciidoc/resources.adoc b/src/site/asciidoc/resources.adoc similarity index 99% rename from src/main/site/asciidoc/resources.adoc rename to src/site/asciidoc/resources.adoc index fef217e4287..5f2d5d4a28f 100644 --- a/src/main/site/asciidoc/resources.adoc +++ b/src/site/asciidoc/resources.adoc @@ -24,4 +24,3 @@ HBase: The Definitive Guide:: link:http://shop.oreilly.com/product/0636920014348 HBase In Action:: link:http://www.manning.com/dimidukkhurana[HBase In Action] By Nick Dimiduk and Amandeep Khurana. Publisher: Manning, MEAP Began: January 2012, Softbound print: Fall 2012, Pages: 350. HBase Administration Cookbook:: link:http://www.packtpub.com/hbase-administration-for-optimum-database-performance-cookbook/book[HBase Administration Cookbook] by Yifeng Jiang. Publisher: PACKT Publishing, Release: Expected August 2012, Pages: 335. - diff --git a/src/main/site/asciidoc/sponsors.adoc b/src/site/asciidoc/sponsors.adoc similarity index 97% rename from src/main/site/asciidoc/sponsors.adoc rename to src/site/asciidoc/sponsors.adoc index 4d7ebf38832..bf93557b9c7 100644 --- a/src/main/site/asciidoc/sponsors.adoc +++ b/src/site/asciidoc/sponsors.adoc @@ -23,7 +23,7 @@ First off, thanks to link:http://www.apache.org/foundation/thanks.html[all who s The below companies have been gracious enough to provide their commerical tool offerings free of charge to the Apache HBase(TM) project. -* The crew at link:http://www.ej-technologies.com/[ej-technologies] have been letting us use link:http://www.ej-technologies.com/products/jprofiler/overview.html[JProfiler] for years now. +* The crew at link:http://www.ej-technologies.com/[ej-technologies] have been letting us use link:http://www.ej-technologies.com/products/jprofiler/overview.html[JProfiler] for years now. * The lads at link:http://headwaysoftware.com/[headway software] have given us a license for link:http://headwaysoftware.com/products/?code=Restructure101[Restructure101] so we can untangle our interdependency mess. @@ -33,4 +33,3 @@ The below companies have been gracious enough to provide their commerical tool o == Sponsoring the Apache Software Foundation"> To contribute to the Apache Software Foundation, a good idea in our opinion, see the link:http://www.apache.org/foundation/sponsorship.html[ASF Sponsorship] page. - diff --git a/src/main/site/custom/project-info-report.properties b/src/site/custom/project-info-report.properties similarity index 100% rename from src/main/site/custom/project-info-report.properties rename to src/site/custom/project-info-report.properties diff --git a/src/main/site/resources/.htaccess b/src/site/resources/.htaccess similarity index 100% rename from src/main/site/resources/.htaccess rename to src/site/resources/.htaccess diff --git a/src/main/site/resources/book/.empty b/src/site/resources/book/.empty similarity index 100% rename from src/main/site/resources/book/.empty rename to src/site/resources/book/.empty diff --git a/src/main/site/resources/css/site.css b/src/site/resources/css/site.css similarity index 100% rename from src/main/site/resources/css/site.css rename to src/site/resources/css/site.css diff --git a/src/main/site/resources/doap_Hbase.rdf b/src/site/resources/doap_Hbase.rdf similarity index 98% rename from src/main/site/resources/doap_Hbase.rdf rename to src/site/resources/doap_Hbase.rdf index 46082a16d06..86e22bdbe6e 100644 --- a/src/main/site/resources/doap_Hbase.rdf +++ b/src/site/resources/doap_Hbase.rdf @@ -1,8 +1,8 @@