HBASE-7372 Check in the generated website so can point apache infrastructure at what to publish as our hbase.apache.org
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1423766 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
0e717a51bf
commit
17a99e939f
|
@ -1,7 +0,0 @@
|
|||
The content of this directory is generated. Do not manually edit.
|
||||
To regenerate the content, run:
|
||||
|
||||
$ mvn site site:deploy -DstagingDirectory=${basedir}/hbase.apache.org
|
||||
|
||||
Committing the content of this directory will cause the commit
|
||||
to show at http://hbase.apache.org.
|
|
@ -1,396 +0,0 @@
|
|||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
|
||||
<!-- Generated by Apache Maven Doxia at Dec 18, 2012 -->
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
<title>HBase -
|
||||
|
||||
Apache HBase (TM) ACID Properties
|
||||
</title>
|
||||
<style type="text/css" media="all">
|
||||
@import url("./css/maven-base.css");
|
||||
@import url("./css/maven-theme.css");
|
||||
@import url("./css/site.css");
|
||||
</style>
|
||||
<link rel="stylesheet" href="./css/print.css" type="text/css" media="print" />
|
||||
<link rel="shortcut icon" href="/images/favicon.ico" />
|
||||
<meta name="Date-Revision-yyyymmdd" content="20121218" />
|
||||
<meta http-equiv="Content-Language" content="en" />
|
||||
<!--Google Analytics-->
|
||||
<script type="text/javascript">
|
||||
|
||||
var _gaq = _gaq || [];
|
||||
_gaq.push(['_setAccount', 'UA-30210968-1']);
|
||||
_gaq.push(['_trackPageview']);
|
||||
|
||||
(function() {
|
||||
var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
|
||||
ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
|
||||
var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
|
||||
})();
|
||||
|
||||
</script>
|
||||
</head>
|
||||
<body class="composite">
|
||||
<div id="banner">
|
||||
<a href="./" id="bannerLeft">
|
||||
<img src="images/hbase_logo.png" alt="Apache HBase" />
|
||||
</a>
|
||||
<!-- Commented out since we do not use it. St.Ack 20110906
|
||||
-->
|
||||
<div class="clear">
|
||||
<hr/>
|
||||
</div>
|
||||
</div>
|
||||
<div id="breadcrumbs">
|
||||
<div class="xright" style="padding-left: 8px; margin-top: -4px;">
|
||||
<form method="GET" action="http://search-hadoop.com/">
|
||||
<input type="text" style="width: 192px; height: 15px; font-size: inherit; border: 1px solid darkgray" name="q" value="Search wiki, mailing lists & more" onfocus="this.value=''"/>
|
||||
<input type="hidden" name="fc_project" value="HBase"/>
|
||||
<button style="height: 20px; width: 60px;">Search</button>
|
||||
</form>
|
||||
</div>
|
||||
<div class="clear">
|
||||
<hr/>
|
||||
</div>
|
||||
</div>
|
||||
<div id="leftColumn">
|
||||
<div id="navcolumn">
|
||||
|
||||
|
||||
<h5>Apache HBase Project</h5>
|
||||
<ul>
|
||||
<li class="none">
|
||||
<a href="index.html" title="Overview">Overview</a>
|
||||
</li>
|
||||
<li class="none">
|
||||
<a href="license.html" title="License">License</a>
|
||||
</li>
|
||||
<li class="none">
|
||||
<a href="http://www.apache.org/dyn/closer.cgi/hbase/" class="externalLink" title="Downloads">Downloads</a>
|
||||
</li>
|
||||
<li class="none">
|
||||
<a href="https://issues.apache.org/jira/browse/HBASE?report=com.atlassian.jira.plugin.system.project:changelog-panel#selectedTab=com.atlassian.jira.plugin.system.project%3Achangelog-panel" class="externalLink" title="Release Notes">Release Notes</a>
|
||||
</li>
|
||||
<li class="none">
|
||||
<a href="issue-tracking.html" title="Issue Tracking">Issue Tracking</a>
|
||||
</li>
|
||||
<li class="none">
|
||||
<a href="mail-lists.html" title="Mailing Lists">Mailing Lists</a>
|
||||
</li>
|
||||
<li class="none">
|
||||
<a href="source-repository.html" title="Source Repository">Source Repository</a>
|
||||
</li>
|
||||
<li class="none">
|
||||
<a href="https://reviews.apache.org" class="externalLink" title="ReviewBoard">ReviewBoard</a>
|
||||
</li>
|
||||
<li class="none">
|
||||
<a href="team-list.html" title="Team">Team</a>
|
||||
</li>
|
||||
<li class="none">
|
||||
<a href="sponsors.html" title="Thanks">Thanks</a>
|
||||
</li>
|
||||
<li class="none">
|
||||
<a href="http://blogs.apache.org/hbase/" class="externalLink" title="Blog">Blog</a>
|
||||
</li>
|
||||
<li class="none">
|
||||
<a href="resources.html" title="Other resources">Other resources</a>
|
||||
</li>
|
||||
</ul>
|
||||
<h5>Documentation</h5>
|
||||
<ul>
|
||||
<li class="none">
|
||||
<a href="book/quickstart.html" title="Getting Started">Getting Started</a>
|
||||
</li>
|
||||
<li class="none">
|
||||
<a href="apidocs/index.html" title="API">API</a>
|
||||
</li>
|
||||
<li class="none">
|
||||
<a href="book/book.html" title="Ref Guide (multi-page)">Ref Guide (multi-page)</a>
|
||||
</li>
|
||||
<li class="none">
|
||||
<a href="book.html" title="Ref Guide (single-page)">Ref Guide (single-page)</a>
|
||||
</li>
|
||||
<li class="none">
|
||||
<a href="book/faq.html" title="FAQ">FAQ</a>
|
||||
</li>
|
||||
<li class="none">
|
||||
<a href="book.html#other.info" title="Videos/Presentations">Videos/Presentations</a>
|
||||
</li>
|
||||
<li class="none">
|
||||
<a href="http://wiki.apache.org/hadoop/Hbase" class="externalLink" title="Wiki">Wiki</a>
|
||||
</li>
|
||||
<li class="none">
|
||||
<strong>ACID Semantics</strong>
|
||||
</li>
|
||||
<li class="none">
|
||||
<a href="book.html#arch.bulk.load" title="Bulk Loads">Bulk Loads</a>
|
||||
</li>
|
||||
<li class="none">
|
||||
<a href="metrics.html" title="Metrics">Metrics</a>
|
||||
</li>
|
||||
<li class="none">
|
||||
<a href="cygwin.html" title="HBase on Windows">HBase on Windows</a>
|
||||
</li>
|
||||
<li class="none">
|
||||
<a href="replication.html" title="Cluster replication">Cluster replication</a>
|
||||
</li>
|
||||
</ul>
|
||||
<h5>ASF</h5>
|
||||
<ul>
|
||||
<li class="none">
|
||||
<a href="http://www.apache.org/foundation/" class="externalLink" title="Apache Software Foundation">Apache Software Foundation</a>
|
||||
</li>
|
||||
<li class="none">
|
||||
<a href="http://www.apache.org/foundation/how-it-works.html" class="externalLink" title="How Apache Works">How Apache Works</a>
|
||||
</li>
|
||||
<li class="none">
|
||||
<a href="http://www.apache.org/foundation/sponsorship.html" class="externalLink" title="Sponsoring Apache">Sponsoring Apache</a>
|
||||
</li>
|
||||
</ul>
|
||||
<a href="http://maven.apache.org/" title="Built by Maven" class="poweredBy">
|
||||
<img class="poweredBy" alt="Built by Maven" src="./images/logos/maven-feather.png" />
|
||||
</a>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<div id="bodyColumn">
|
||||
<div id="contentBox">
|
||||
<!-- Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. -->
|
||||
|
||||
<div class="section"><h2>About this Document<a name="About_this_Document"></a></h2>
|
||||
<p>Apache HBase (TM) is not an ACID compliant database. However, it does guarantee certain specific
|
||||
properties.</p>
|
||||
<p>This specification enumerates the ACID properties of HBase.</p>
|
||||
</div>
|
||||
<div class="section"><h2>Definitions<a name="Definitions"></a></h2>
|
||||
<p>For the sake of common vocabulary, we define the following terms:</p>
|
||||
<dl>
|
||||
<dt>Atomicity</dt>
|
||||
<dd>an operation is atomic if it either completes entirely or not at all</dd>
|
||||
|
||||
<dt>Consistency</dt>
|
||||
<dd>
|
||||
all actions cause the table to transition from one valid state directly to another
|
||||
(eg a row will not disappear during an update, etc)
|
||||
</dd>
|
||||
|
||||
<dt>Isolation</dt>
|
||||
<dd>
|
||||
an operation is isolated if it appears to complete independently of any other concurrent transaction
|
||||
</dd>
|
||||
|
||||
<dt>Durability</dt>
|
||||
<dd>any update that reports "successful" to the client will not be lost</dd>
|
||||
|
||||
<dt>Visibility</dt>
|
||||
<dd>an update is considered visible if any subsequent read will see the update as having been committed</dd>
|
||||
</dl>
|
||||
<p>
|
||||
The terms <i>must</i> and <i>may</i> are used as specified by RFC 2119.
|
||||
In short, the word "must" implies that, if some case exists where the statement
|
||||
is not true, it is a bug. The word "may" implies that, even if the guarantee
|
||||
is provided in a current release, users should not rely on it.
|
||||
</p>
|
||||
</div>
|
||||
<div class="section"><h2>APIs to consider<a name="APIs_to_consider"></a></h2>
|
||||
<ul>
|
||||
<li>Read APIs
|
||||
<ul>
|
||||
<li>get</li>
|
||||
<li>scan</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li>Write APIs</li>
|
||||
<ul>
|
||||
<li>put</li>
|
||||
<li>batch put</li>
|
||||
<li>delete</li>
|
||||
</ul>
|
||||
<li>Combination (read-modify-write) APIs</li>
|
||||
<ul>
|
||||
<li>incrementColumnValue</li>
|
||||
<li>checkAndPut</li>
|
||||
</ul>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="section"><h2>Guarantees Provided<a name="Guarantees_Provided"></a></h2>
|
||||
|
||||
<div class="section"><h2>Atomicity<a name="Atomicity"></a></h2>
|
||||
|
||||
<ol style="list-style-type: decimal">
|
||||
<li>All mutations are atomic within a row. Any put will either wholely succeed or wholely fail.[3]</li>
|
||||
<ol style="list-style-type: decimal">
|
||||
<li>An operation that returns a "success" code has completely succeeded.</li>
|
||||
<li>An operation that returns a "failure" code has completely failed.</li>
|
||||
<li>An operation that times out may have succeeded and may have failed. However,
|
||||
it will not have partially succeeded or failed.</li>
|
||||
</ol>
|
||||
<li> This is true even if the mutation crosses multiple column families within a row.</li>
|
||||
<li> APIs that mutate several rows will _not_ be atomic across the multiple rows.
|
||||
For example, a multiput that operates on rows 'a','b', and 'c' may return having
|
||||
mutated some but not all of the rows. In such cases, these APIs will return a list
|
||||
of success codes, each of which may be succeeded, failed, or timed out as described above.</li>
|
||||
<li> The checkAndPut API happens atomically like the typical compareAndSet (CAS) operation
|
||||
found in many hardware architectures.</li>
|
||||
<li> The order of mutations is seen to happen in a well-defined order for each row, with no
|
||||
interleaving. For example, if one writer issues the mutation "a=1,b=1,c=1" and
|
||||
another writer issues the mutation "a=2,b=2,c=2", the row must either
|
||||
be "a=1,b=1,c=1" or "a=2,b=2,c=2" and must <i>not</i> be something
|
||||
like "a=1,b=2,c=1".</li>
|
||||
<ol style="list-style-type: decimal">
|
||||
<li>Please note that this is not true _across rows_ for multirow batch mutations.</li>
|
||||
</ol>
|
||||
</ol>
|
||||
</div>
|
||||
<div class="section"><h2>Consistency and Isolation<a name="Consistency_and_Isolation"></a></h2>
|
||||
<ol style="list-style-type: decimal">
|
||||
<li>All rows returned via any access API will consist of a complete row that existed at
|
||||
some point in the table's history.</li>
|
||||
<li>This is true across column families - i.e a get of a full row that occurs concurrent
|
||||
with some mutations 1,2,3,4,5 will return a complete row that existed at some point in time
|
||||
between mutation i and i+1 for some i between 1 and 5.</li>
|
||||
<li>The state of a row will only move forward through the history of edits to it.</li>
|
||||
</ol>
|
||||
|
||||
<div class="section"><h2>Consistency of Scans<a name="Consistency_of_Scans"></a></h2>
|
||||
<p>
|
||||
A scan is <b>not</b> a consistent view of a table. Scans do
|
||||
<b>not</b> exhibit <i>snapshot isolation</i>.
|
||||
</p>
|
||||
<p>
|
||||
Rather, scans have the following properties:
|
||||
</p>
|
||||
|
||||
<ol style="list-style-type: decimal">
|
||||
<li>
|
||||
Any row returned by the scan will be a consistent view (i.e. that version
|
||||
of the complete row existed at some point in time) [1]
|
||||
</li>
|
||||
<li>
|
||||
A scan will always reflect a view of the data <i>at least as new as</i>
|
||||
the beginning of the scan. This satisfies the visibility guarantees
|
||||
enumerated below.</li>
|
||||
<ol style="list-style-type: decimal">
|
||||
<li>For example, if client A writes data X and then communicates via a side
|
||||
channel to client B, any scans started by client B will contain data at least
|
||||
as new as X.</li>
|
||||
<li>A scan _must_ reflect all mutations committed prior to the construction
|
||||
of the scanner, and _may_ reflect some mutations committed subsequent to the
|
||||
construction of the scanner.</li>
|
||||
<li>Scans must include <i>all</i> data written prior to the scan (except in
|
||||
the case where data is subsequently mutated, in which case it _may_ reflect
|
||||
the mutation)</li>
|
||||
</ol>
|
||||
</ol>
|
||||
<p>
|
||||
Those familiar with relational databases will recognize this isolation level as "read committed".
|
||||
</p>
|
||||
<p>
|
||||
Please note that the guarantees listed above regarding scanner consistency
|
||||
are referring to "transaction commit time", not the "timestamp"
|
||||
field of each cell. That is to say, a scanner started at time <i>t</i> may see edits
|
||||
with a timestamp value greater than <i>t</i>, if those edits were committed with a
|
||||
"forward dated" timestamp before the scanner was constructed.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
<div class="section"><h2>Visibility<a name="Visibility"></a></h2>
|
||||
<ol style="list-style-type: decimal">
|
||||
<li> When a client receives a "success" response for any mutation, that
|
||||
mutation is immediately visible to both that client and any client with whom it
|
||||
later communicates through side channels. [3]</li>
|
||||
<li> A row must never exhibit so-called "time-travel" properties. That
|
||||
is to say, if a series of mutations moves a row sequentially through a series of
|
||||
states, any sequence of concurrent reads will return a subsequence of those states.</li>
|
||||
<ol style="list-style-type: decimal">
|
||||
<li>For example, if a row's cells are mutated using the "incrementColumnValue"
|
||||
API, a client must never see the value of any cell decrease.</li>
|
||||
<li>This is true regardless of which read API is used to read back the mutation.</li>
|
||||
</ol>
|
||||
<li> Any version of a cell that has been returned to a read operation is guaranteed to
|
||||
be durably stored.</li>
|
||||
</ol>
|
||||
|
||||
</div>
|
||||
<div class="section"><h2>Durability<a name="Durability"></a></h2>
|
||||
<ol style="list-style-type: decimal">
|
||||
<li> All visible data is also durable data. That is to say, a read will never return
|
||||
data that has not been made durable on disk[2]</li>
|
||||
<li> Any operation that returns a "success" code (eg does not throw an exception)
|
||||
will be made durable.[3]</li>
|
||||
<li> Any operation that returns a "failure" code will not be made durable
|
||||
(subject to the Atomicity guarantees above)</li>
|
||||
<li> All reasonable failure scenarios will not affect any of the guarantees of this document.</li>
|
||||
|
||||
</ol>
|
||||
</div>
|
||||
<div class="section"><h2>Tunability<a name="Tunability"></a></h2>
|
||||
<p>All of the above guarantees must be possible within Apache HBase. For users who would like to trade
|
||||
off some guarantees for performance, HBase may offer several tuning options. For example:</p>
|
||||
<ul>
|
||||
<li>Visibility may be tuned on a per-read basis to allow stale reads or time travel.</li>
|
||||
<li>Durability may be tuned to only flush data to disk on a periodic basis</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
<div class="section"><h2>More Information<a name="More_Information"></a></h2>
|
||||
<p>
|
||||
For more information, see the <a href="book.html#client">client architecture</a> or <a href="book.html#datamodel">data model</a> sections in the Apache HBase Reference Guide.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div class="section"><h2>Footnotes<a name="Footnotes"></a></h2>
|
||||
<p>[1] A consistent view is not guaranteed intra-row scanning -- i.e. fetching a portion of
|
||||
a row in one RPC then going back to fetch another portion of the row in a subsequent RPC.
|
||||
Intra-row scanning happens when you set a limit on how many values to return per Scan#next
|
||||
(See <a class="externalLink" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Scan.html#setBatch(int)">Scan#setBatch(int)</a>).
|
||||
</p>
|
||||
|
||||
<p>[2] In the context of Apache HBase, "durably on disk" implies an hflush() call on the transaction
|
||||
log. This does not actually imply an fsync() to magnetic media, but rather just that the data has been
|
||||
written to the OS cache on all replicas of the log. In the case of a full datacenter power loss, it is
|
||||
possible that the edits are not truly durable.</p>
|
||||
<p>[3] Puts will either wholely succeed or wholely fail, provided that they are actually sent
|
||||
to the RegionServer. If the writebuffer is used, Puts will not be sent until the writebuffer is filled
|
||||
or it is explicitly flushed.</p>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<div class="clear">
|
||||
<hr/>
|
||||
</div>
|
||||
<div id="footer">
|
||||
<div class="xright">
|
||||
|
||||
<span id="publishDate">Last Published: 2012-12-18</span>
|
||||
| <span id="projectVersion">Version: 0.95-SNAPSHOT</span>
|
||||
|
||||
</div>
|
||||
<div class="xright">Copyright ©<a href="http://www.apache.org"> 2012
|
||||
<a href="http://www.apache.org/">The Apache Software Foundation</a>.
|
||||
</a>All Rights Reserved. Apache Hadoop, Hadoop, HDFS, HBase and the HBase project logo are trademarks of the Apache Software Foundation.
|
||||
</div>
|
||||
<div class="clear">
|
||||
<hr/>
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
File diff suppressed because one or more lines are too long
|
@ -1,41 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>B.2. Inconsistencies</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="hbck.in.depth.html" title="Appendix B. hbck In Depth"><link rel="prev" href="hbck.in.depth.html" title="Appendix B. hbck In Depth"><link rel="next" href="apbs03.html" title="B.3. Localized repairs"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">B.2. Inconsistencies</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="hbck.in.depth.html">Prev</a> </td><th width="60%" align="center">Appendix B. hbck In Depth</th><td width="20%" align="right"> <a accesskey="n" href="apbs03.html">Next</a></td></tr></table><hr></div><div class="section" title="B.2. Inconsistencies"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="d2121e11399"></a>B.2. Inconsistencies</h2></div></div></div><p>
|
||||
If after several runs, inconsistencies continue to be reported, you may have encountered a
|
||||
corruption. These should be rare, but in the event they occur newer versions of HBase include
|
||||
the hbck tool enabled with automatic repair options.
|
||||
</p><p>
|
||||
There are two invariants that when violated create inconsistencies in HBase:
|
||||
</p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem">HBase’s region consistency invariant is satisfied if every region is assigned and
|
||||
deployed on exactly one region server, and all places where this state kept is in
|
||||
accordance.
|
||||
</li><li class="listitem">HBase’s table integrity invariant is satisfied if for each table, every possible row key
|
||||
resolves to exactly one region.
|
||||
</li></ul></div><p>
|
||||
Repairs generally work in three phases -- a read-only information gathering phase that identifies
|
||||
inconsistencies, a table integrity repair phase that restores the table integrity invariant, and then
|
||||
finally a region consistency repair phase that restores the region consistency invariant.
|
||||
Starting from version 0.90.0, hbck could detect region consistency problems report on a subset
|
||||
of possible table integrity problems. It also included the ability to automatically fix the most
|
||||
common inconsistency, region assignment and deployment consistency problems. This repair
|
||||
could be done by using the <code class="code">-fix</code> command line option. These problems close regions if they are
|
||||
open on the wrong server or on multiple region servers and also assigns regions to region
|
||||
servers if they are not open.
|
||||
</p><p>
|
||||
Starting from HBase versions 0.90.7, 0.92.2 and 0.94.0, several new command line options are
|
||||
introduced to aid repairing a corrupted HBase. This hbck sometimes goes by the nickname
|
||||
“uberhbck”. Each particular version of uber hbck is compatible with the HBase’s of the same
|
||||
major version (0.90.7 uberhbck can repair a 0.90.4). However, versions <=0.90.6 and versions
|
||||
<=0.92.1 may require restarting the master or failing over to a backup master.
|
||||
</p></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = '';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="hbck.in.depth.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="hbck.in.depth.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="apbs03.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">Appendix B. hbck In Depth </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> B.3. Localized repairs</td></tr></table></div></body></html>
|
|
@ -1,51 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>B.3. Localized repairs</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="hbck.in.depth.html" title="Appendix B. hbck In Depth"><link rel="prev" href="apbs02.html" title="B.2. Inconsistencies"><link rel="next" href="apbs04.html" title="B.4. Region Overlap Repairs"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">B.3. Localized repairs</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="apbs02.html">Prev</a> </td><th width="60%" align="center">Appendix B. hbck In Depth</th><td width="20%" align="right"> <a accesskey="n" href="apbs04.html">Next</a></td></tr></table><hr></div><div class="section" title="B.3. Localized repairs"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="d2121e11418"></a>B.3. Localized repairs</h2></div></div></div><p>
|
||||
When repairing a corrupted HBase, it is best to repair the lowest risk inconsistencies first.
|
||||
These are generally region consistency repairs -- localized single region repairs, that only modify
|
||||
in-memory data, ephemeral zookeeper data, or patch holes in the META table.
|
||||
Region consistency requires that the HBase instance has the state of the region’s data in HDFS
|
||||
(.regioninfo files), the region’s row in the .META. table., and region’s deployment/assignments on
|
||||
region servers and the master in accordance. Options for repairing region consistency include:
|
||||
</p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><code class="code">-fixAssignments</code> (equivalent to the 0.90 <code class="code">-fix</code> option) repairs unassigned, incorrectly
|
||||
assigned or multiply assigned regions.
|
||||
</li><li class="listitem"><code class="code">-fixMeta</code> which removes meta rows when corresponding regions are not present in
|
||||
HDFS and adds new meta rows if they regions are present in HDFS while not in META.
|
||||
</li></ul></div><p>
|
||||
To fix deployment and assignment problems you can run this command:
|
||||
</p><pre class="programlisting">
|
||||
$ ./bin/hbase hbck -fixAssignments
|
||||
</pre>
|
||||
To fix deployment and assignment problems as well as repairing incorrect meta rows you can
|
||||
run this command:.
|
||||
<pre class="programlisting">
|
||||
$ ./bin/hbase hbck -fixAssignments -fixMeta
|
||||
</pre>
|
||||
There are a few classes of table integrity problems that are low risk repairs. The first two are
|
||||
degenerate (startkey == endkey) regions and backwards regions (startkey > endkey). These are
|
||||
automatically handled by sidelining the data to a temporary directory (/hbck/xxxx).
|
||||
The third low-risk class is hdfs region holes. This can be repaired by using the:
|
||||
<div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><code class="code">-fixHdfsHoles</code> option for fabricating new empty regions on the file system.
|
||||
If holes are detected you can use -fixHdfsHoles and should include -fixMeta and -fixAssignments to make the new region consistent.
|
||||
</li></ul></div><pre class="programlisting">
|
||||
$ ./bin/hbase hbck -fixAssignments -fixMeta -fixHdfsHoles
|
||||
</pre>
|
||||
Since this is a common operation, we’ve added a the <code class="code">-repairHoles</code> flag that is equivalent to the
|
||||
previous command:
|
||||
<pre class="programlisting">
|
||||
$ ./bin/hbase hbck -repairHoles
|
||||
</pre>
|
||||
If inconsistencies still remain after these steps, you most likely have table integrity problems
|
||||
related to orphaned or overlapping regions.
|
||||
</div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = '';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="apbs02.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="hbck.in.depth.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="apbs04.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">B.2. Inconsistencies </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> B.4. Region Overlap Repairs</td></tr></table></div></body></html>
|
|
@ -1,87 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>B.4. Region Overlap Repairs</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="hbck.in.depth.html" title="Appendix B. hbck In Depth"><link rel="prev" href="apbs03.html" title="B.3. Localized repairs"><link rel="next" href="compression.html" title="Appendix C. Compression In HBase"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">B.4. Region Overlap Repairs</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="apbs03.html">Prev</a> </td><th width="60%" align="center">Appendix B. hbck In Depth</th><td width="20%" align="right"> <a accesskey="n" href="compression.html">Next</a></td></tr></table><hr></div><div class="section" title="B.4. Region Overlap Repairs"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="d2121e11456"></a>B.4. Region Overlap Repairs</h2></div></div></div>
|
||||
Table integrity problems can require repairs that deal with overlaps. This is a riskier operation
|
||||
because it requires modifications to the file system, requires some decision making, and may
|
||||
require some manual steps. For these repairs it is best to analyze the output of a <code class="code">hbck -details</code>
|
||||
run so that you isolate repairs attempts only upon problems the checks identify. Because this is
|
||||
riskier, there are safeguard that should be used to limit the scope of the repairs.
|
||||
WARNING: This is a relatively new and have only been tested on online but idle HBase instances
|
||||
(no reads/writes). Use at your own risk in an active production environment!
|
||||
The options for repairing table integrity violations include:
|
||||
<div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><code class="code">-fixHdfsOrphans</code> option for “adopting” a region directory that is missing a region
|
||||
metadata file (the .regioninfo file).
|
||||
</li><li class="listitem"><code class="code">-fixHdfsOverlaps</code> ability for fixing overlapping regions
|
||||
</li></ul></div>
|
||||
When repairing overlapping regions, a region’s data can be modified on the file system in two
|
||||
ways: 1) by merging regions into a larger region or 2) by sidelining regions by moving data to
|
||||
“sideline” directory where data could be restored later. Merging a large number of regions is
|
||||
technically correct but could result in an extremely large region that requires series of costly
|
||||
compactions and splitting operations. In these cases, it is probably better to sideline the regions
|
||||
that overlap with the most other regions (likely the largest ranges) so that merges can happen on
|
||||
a more reasonable scale. Since these sidelined regions are already laid out in HBase’s native
|
||||
directory and HFile format, they can be restored by using HBase’s bulk load mechanism.
|
||||
The default safeguard thresholds are conservative. These options let you override the default
|
||||
thresholds and to enable the large region sidelining feature.
|
||||
<div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><code class="code">-maxMerge <n></code> maximum number of overlapping regions to merge
|
||||
</li><li class="listitem"><code class="code">-sidelineBigOverlaps</code> if more than maxMerge regions are overlapping, sideline attempt
|
||||
to sideline the regions overlapping with the most other regions.
|
||||
</li><li class="listitem"><code class="code">-maxOverlapsToSideline <n></code> if sidelining large overlapping regions, sideline at most n
|
||||
regions.
|
||||
</li></ul></div>
|
||||
|
||||
Since often times you would just want to get the tables repaired, you can use this option to turn
|
||||
on all repair options:
|
||||
<div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><code class="code">-repair</code> includes all the region consistency options and only the hole repairing table
|
||||
integrity options.
|
||||
</li></ul></div>
|
||||
Finally, there are safeguards to limit repairs to only specific tables. For example the following
|
||||
command would only attempt to check and repair table TableFoo and TableBar.
|
||||
<pre class="programlisting">
|
||||
$ ./bin/hbase/ hbck -repair TableFoo TableBar
|
||||
</pre><div class="section" title="B.4.1. Special cases: Meta is not properly assigned"><div class="titlepage"><div><div><h3 class="title"><a name="d2121e11495"></a>B.4.1. Special cases: Meta is not properly assigned</h3></div></div></div>
|
||||
There are a few special cases that hbck can handle as well.
|
||||
Sometimes the meta table’s only region is inconsistently assigned or deployed. In this case
|
||||
there is a special <code class="code">-fixMetaOnly</code> option that can try to fix meta assignments.
|
||||
<pre class="programlisting">
|
||||
$ ./bin/hbase hbck -fixMetaOnly -fixAssignments
|
||||
</pre></div><div class="section" title="B.4.2. Special cases: HBase version file is missing"><div class="titlepage"><div><div><h3 class="title"><a name="d2121e11504"></a>B.4.2. Special cases: HBase version file is missing</h3></div></div></div>
|
||||
HBase’s data on the file system requires a version file in order to start. If this flie is missing, you
|
||||
can use the <code class="code">-fixVersionFile</code> option to fabricating a new HBase version file. This assumes that
|
||||
the version of hbck you are running is the appropriate version for the HBase cluster.
|
||||
</div><div class="section" title="B.4.3. Special case: Root and META are corrupt."><div class="titlepage"><div><div><h3 class="title"><a name="d2121e11511"></a>B.4.3. Special case: Root and META are corrupt.</h3></div></div></div>
|
||||
The most drastic corruption scenario is the case where the ROOT or META is corrupted and
|
||||
HBase will not start. In this case you can use the OfflineMetaRepair tool create new ROOT
|
||||
and META regions and tables.
|
||||
This tool assumes that HBase is offline. It then marches through the existing HBase home
|
||||
directory, loads as much information from region metadata files (.regioninfo files) as possible
|
||||
from the file system. If the region metadata has proper table integrity, it sidelines the original root
|
||||
and meta table directories, and builds new ones with pointers to the region directories and their
|
||||
data.
|
||||
<pre class="programlisting">
|
||||
$ ./bin/hbase org.apache.hadoop.hbase.util.OfflineMetaRepair
|
||||
</pre>
|
||||
NOTE: This tool is not as clever as uberhbck but can be used to bootstrap repairs that uberhbck
|
||||
can complete.
|
||||
If the tool succeeds you should be able to start hbase and run online repairs if necessary.
|
||||
</div><div class="section" title="B.4.4. Special cases: Offline split parent"><div class="titlepage"><div><div><h3 class="title"><a name="d2121e11518"></a>B.4.4. Special cases: Offline split parent</h3></div></div></div><p>
|
||||
Once a region is split, the offline parent will be cleaned up automatically. Sometimes, daughter regions
|
||||
are split again before their parents are cleaned up. HBase can clean up parents in the right order. However,
|
||||
there could be some lingering offline split parents sometimes. They are in META, in HDFS, and not deployed.
|
||||
But HBase can't clean them up. In this case, you can use the <code class="code">-fixSplitParents</code> option to reset
|
||||
them in META to be online and not split. Therefore, hbck can merge them with other regions if fixing
|
||||
overlapping regions option is used.
|
||||
</p><p>
|
||||
This option should not normally be used, and it is not in <code class="code">-fixAll</code>.
|
||||
</p></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = '';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="apbs03.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="hbck.in.depth.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="compression.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">B.3. Localized repairs </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> Appendix C. Compression In HBase</td></tr></table></div></body></html>
|
|
@ -1,14 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>Appendix D. YCSB: The Yahoo! Cloud Serving Benchmark and HBase</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="prev" href="changing.compression.html" title="C.6. Changing Compression Schemes"><link rel="next" href="hfilev2.html" title="Appendix E. HFile format version 2"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">Appendix D. YCSB: The Yahoo! Cloud Serving Benchmark and HBase</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="changing.compression.html">Prev</a> </td><th width="60%" align="center"> </th><td width="20%" align="right"> <a accesskey="n" href="hfilev2.html">Next</a></td></tr></table><hr></div><div class="appendix" title="Appendix D. YCSB: The Yahoo! Cloud Serving Benchmark and HBase"><div class="titlepage"><div><div><h2 class="title"><a name="d2121e11657"></a>Appendix D. <a class="link" href="https://github.com/brianfrankcooper/YCSB/" target="_top">YCSB: The Yahoo! Cloud Serving Benchmark</a> and HBase</h2></div></div></div><p>TODO: Describe how YCSB is poor for putting up a decent cluster load.</p><p>TODO: Describe setup of YCSB for HBase</p><p>Ted Dunning redid YCSB so it's mavenized and added facility for verifying workloads. See <a class="link" href="https://github.com/tdunning/YCSB" target="_top">Ted Dunning's YCSB</a>.</p></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = '';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="changing.compression.html">Prev</a> </td><td width="20%" align="center"> </td><td width="40%" align="right"> <a accesskey="n" href="hfilev2.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">C.6. Changing Compression Schemes </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> Appendix E. HFile format version 2</td></tr></table></div></body></html>
|
|
@ -1,22 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>E.2. HFile format version 1 overview</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="hfilev2.html" title="Appendix E. HFile format version 2"><link rel="prev" href="hfilev2.html" title="Appendix E. HFile format version 2"><link rel="next" href="apes03.html" title="E.3. HBase file format with inline blocks (version 2)"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">E.2. HFile format version 1 overview </th></tr><tr><td width="20%" align="left"><a accesskey="p" href="hfilev2.html">Prev</a> </td><th width="60%" align="center">Appendix E. HFile format version 2</th><td width="20%" align="right"> <a accesskey="n" href="apes03.html">Next</a></td></tr></table><hr></div><div class="section" title="E.2. HFile format version 1 overview"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="d2121e11687"></a>E.2. HFile format version 1 overview </h2></div></div></div><p>As we will be discussing the changes we are making to the HFile format, it is useful to give a short overview of the previous (HFile version 1) format. An HFile in the existing format is structured as follows:
|
||||
<span class="inlinemediaobject"><img src="../images/hfile.png" align="middle" alt="HFile Version 1"></span>
|
||||
<sup>[<a name="d2121e11702" href="#ftn.d2121e11702" class="footnote">36</a>]</sup>
|
||||
</p><div class="section" title="E.2.1. Block index format in version 1"><div class="titlepage"><div><div><h3 class="title"><a name="d2121e11709"></a>E.2.1. Block index format in version 1 </h3></div></div></div><p>The block index in version 1 is very straightforward. For each entry, it contains: </p><div class="orderedlist"><ol class="orderedlist" type="1"><li class="listitem"><p>Offset (long)</p></li><li class="listitem"><p>Uncompressed size (int)</p></li><li class="listitem"><p>Key (a serialized byte array written using Bytes.writeByteArray) </p><div class="orderedlist"><ol class="orderedlist" type="a"><li class="listitem"><p>Key length as a variable-length integer (VInt)
|
||||
</p></li><li class="listitem"><p>
|
||||
Key bytes
|
||||
</p></li></ol></div></li></ol></div><p>The number of entries in the block index is stored in the fixed file trailer, and has to be passed in to the method that reads the block index. One of the limitations of the block index in version 1 is that it does not provide the compressed size of a block, which turns out to be necessary for decompression. Therefore, the HFile reader has to infer this compressed size from the offset difference between blocks. We fix this limitation in version 2, where we store on-disk block size instead of uncompressed size, and get uncompressed size from the block header.</p></div><div class="footnotes"><br><hr width="100" align="left"><div class="footnote"><p><sup>[<a id="ftn.d2121e11702" href="#d2121e11702" class="para">36</a>] </sup>Image courtesy of Lars George, <a class="link" href="http://www.larsgeorge.com/2009/10/hbase-architecture-101-storage.html" target="_top">hbase-architecture-101-storage.html</a>.</p></div></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = '';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="hfilev2.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="hfilev2.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="apes03.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">Appendix E. HFile format version 2 </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> E.3.
|
||||
HBase file format with inline blocks (version 2)
|
||||
</td></tr></table></div></body></html>
|
|
@ -1,146 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>E.3. HBase file format with inline blocks (version 2)</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="hfilev2.html" title="Appendix E. HFile format version 2"><link rel="prev" href="apes02.html" title="E.2. HFile format version 1 overview"><link rel="next" href="other.info.html" title="Appendix F. Other Information About HBase"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">E.3.
|
||||
HBase file format with inline blocks (version 2)
|
||||
</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="apes02.html">Prev</a> </td><th width="60%" align="center">Appendix E. HFile format version 2</th><td width="20%" align="right"> <a accesskey="n" href="other.info.html">Next</a></td></tr></table><hr></div><div class="section" title="E.3. HBase file format with inline blocks (version 2)"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="d2121e11733"></a>E.3.
|
||||
HBase file format with inline blocks (version 2)
|
||||
</h2></div></div></div><div class="section" title="E.3.1. Overview"><div class="titlepage"><div><div><h3 class="title"><a name="d2121e11736"></a>E.3.1. Overview</h3></div></div></div><p>The version of HBase introducing the above features reads both version 1 and 2 HFiles, but only writes version 2 HFiles. A version 2 HFile is structured as follows:
|
||||
<span class="inlinemediaobject"><img src="../images/hfilev2.png" align="middle" alt="HFile Version 2"></span>
|
||||
|
||||
</p></div><div class="section" title="E.3.2. Unified version 2 block format"><div class="titlepage"><div><div><h3 class="title"><a name="d2121e11751"></a>E.3.2. Unified version 2 block format</h3></div></div></div><p>In the version 2 every block in the data section contains the following fields: </p><div class="orderedlist"><ol class="orderedlist" type="1"><li class="listitem"><p>8 bytes: Block type, a sequence of bytes equivalent to version 1's "magic records". Supported block types are: </p><div class="orderedlist"><ol class="orderedlist" type="a"><li class="listitem"><p>DATA – data blocks
|
||||
</p></li><li class="listitem"><p>
|
||||
LEAF_INDEX – leaf-level index blocks in a multi-level-block-index
|
||||
</p></li><li class="listitem"><p>
|
||||
BLOOM_CHUNK – Bloom filter chunks
|
||||
</p></li><li class="listitem"><p>
|
||||
META – meta blocks (not used for Bloom filters in version 2 anymore)
|
||||
</p></li><li class="listitem"><p>
|
||||
INTERMEDIATE_INDEX – intermediate-level index blocks in a multi-level blockindex
|
||||
</p></li><li class="listitem"><p>
|
||||
ROOT_INDEX – root>level index blocks in a multi>level block index
|
||||
</p></li><li class="listitem"><p>
|
||||
FILE_INFO – the “file info” block, a small key>value map of metadata
|
||||
</p></li><li class="listitem"><p>
|
||||
BLOOM_META – a Bloom filter metadata block in the load>on>open section
|
||||
</p></li><li class="listitem"><p>
|
||||
TRAILER – a fixed>size file trailer. As opposed to the above, this is not an
|
||||
HFile v2 block but a fixed>size (for each HFile version) data structure
|
||||
</p></li><li class="listitem"><p>
|
||||
INDEX_V1 – this block type is only used for legacy HFile v1 block
|
||||
</p></li></ol></div></li><li class="listitem"><p>Compressed size of the block's data, not including the header (int).
|
||||
</p><p>
|
||||
Can be used for skipping the current data block when scanning HFile data.
|
||||
</p></li><li class="listitem"><p>Uncompressed size of the block's data, not including the header (int)</p><p>
|
||||
This is equal to the compressed size if the compression algorithm is NON
|
||||
</p></li><li class="listitem"><p>File offset of the previous block of the same type (long)</p><p>
|
||||
Can be used for seeking to the previous data/index block
|
||||
</p></li><li class="listitem"><p>Compressed data (or uncompressed data if the compression algorithm is NONE).</p></li></ol></div><p>The above format of blocks is used in the following HFile sections:</p><div class="orderedlist"><ol class="orderedlist" type="1"><li class="listitem"><p>Scanned block section. The section is named so because it contains all data blocks that need to be read when an HFile is scanned sequentially. Also contains leaf block index and Bloom chunk blocks. </p></li><li class="listitem"><p>Non-scanned block section. This section still contains unified-format v2 blocks but it does not have to be read when doing a sequential scan. This section contains “meta” blocks and intermediate-level index blocks.
|
||||
</p></li></ol></div><p>We are supporting “meta” blocks in version 2 the same way they were supported in version 1, even though we do not store Bloom filter data in these blocks anymore. </p></div><div class="section" title="E.3.3. Block index in version 2"><div class="titlepage"><div><div><h3 class="title"><a name="d2121e11820"></a>E.3.3. Block index in version 2</h3></div></div></div><p>There are three types of block indexes in HFile version 2, stored in two different formats (root and non-root): </p><div class="orderedlist"><ol class="orderedlist" type="1"><li class="listitem"><p>Data index — version 2 multi-level block index, consisting of:</p><div class="orderedlist"><ol class="orderedlist" type="a"><li class="listitem"><p>
|
||||
Version 2 root index, stored in the data block index section of the file
|
||||
</p></li><li class="listitem"><p>
|
||||
Optionally, version 2 intermediate levels, stored in the non%root format in the data index section of the file. Intermediate levels can only be present if leaf level blocks are present
|
||||
</p></li><li class="listitem"><p>
|
||||
Optionally, version 2 leaf levels, stored in the non%root format inline with data blocks
|
||||
</p></li></ol></div></li><li class="listitem"><p>Meta index — version 2 root index format only, stored in the meta index section of the file</p></li><li class="listitem"><p>Bloom index — version 2 root index format only, stored in the “load-on-open” section as part of Bloom filter metadata.</p></li></ol></div></div><div class="section" title="E.3.4. Root block index format in version 2"><div class="titlepage"><div><div><h3 class="title"><a name="d2121e11845"></a>E.3.4.
|
||||
Root block index format in version 2</h3></div></div></div><p>This format applies to:</p><div class="orderedlist"><ol class="orderedlist" type="1"><li class="listitem"><p>Root level of the version 2 data index</p></li><li class="listitem"><p>Entire meta and Bloom indexes in version 2, which are always single-level. </p></li></ol></div><p>A version 2 root index block is a sequence of entries of the following format, similar to entries of a version 1 block index, but storing on-disk size instead of uncompressed size. </p><div class="orderedlist"><ol class="orderedlist" type="1"><li class="listitem"><p>Offset (long) </p><p>
|
||||
This offset may point to a data block or to a deeper>level index block.
|
||||
</p></li><li class="listitem"><p>On-disk size (int) </p></li><li class="listitem"><p>Key (a serialized byte array stored using Bytes.writeByteArray) </p><div class="orderedlist"><ol class="orderedlist" type="a"><li class="listitem"><p>Key (VInt)
|
||||
</p></li><li class="listitem"><p>Key bytes
|
||||
</p></li></ol></div></li></ol></div><p>A single-level version 2 block index consists of just a single root index block. To read a root index block of version 2, one needs to know the number of entries. For the data index and the meta index the number of entries is stored in the trailer, and for the Bloom index it is stored in the compound Bloom filter metadata.</p><p>For a multi-level block index we also store the following fields in the root index block in the load-on-open section of the HFile, in addition to the data structure described above:</p><div class="orderedlist"><ol class="orderedlist" type="1"><li class="listitem"><p>Middle leaf index block offset</p></li><li class="listitem"><p>Middle leaf block on-disk size (meaning the leaf index block containing the reference to the “middle” data block of the file) </p></li><li class="listitem"><p>The index of the mid-key (defined below) in the middle leaf-level block.</p></li></ol></div><p></p><p>These additional fields are used to efficiently retrieve the mid-key of the HFile used in HFile splits, which we define as the first key of the block with a zero-based index of (n – 1) / 2, if the total number of blocks in the HFile is n. This definition is consistent with how the mid-key was determined in HFile version 1, and is reasonable in general, because blocks are likely to be the same size on average, but we don’t have any estimates on individual key/value pair sizes. </p><p></p><p>When writing a version 2 HFile, the total number of data blocks pointed to by every leaf-level index block is kept track of. When we finish writing and the total number of leaf-level blocks is determined, it is clear which leaf-level block contains the mid-key, and the fields listed above are computed. When reading the HFile and the mid-key is requested, we retrieve the middle leaf index block (potentially from the block cache) and get the mid-key value from the appropriate position inside that leaf block.</p></div><div class="section" title="E.3.5. Non-root block index format in version 2"><div class="titlepage"><div><div><h3 class="title"><a name="d2121e11898"></a>E.3.5.
|
||||
Non-root block index format in version 2</h3></div></div></div><p>This format applies to intermediate-level and leaf index blocks of a version 2 multi-level data block index. Every non-root index block is structured as follows. </p><div class="orderedlist"><ol class="orderedlist" type="1"><li class="listitem"><p>numEntries: the number of entries (int). </p></li><li class="listitem"><p>entryOffsets: the “secondary index” of offsets of entries in the block, to facilitate a quick binary search on the key (numEntries + 1 int values). The last value is the total length of all entries in this index block. For example, in a non-root index block with entry sizes 60, 80, 50 the “secondary index” will contain the following int array: {0, 60, 140, 190}.</p></li><li class="listitem"><p>Entries. Each entry contains: </p><div class="orderedlist"><ol class="orderedlist" type="a"><li class="listitem"><p>
|
||||
Offset of the block referenced by this entry in the file (long)
|
||||
</p></li><li class="listitem"><p>
|
||||
On>disk size of the referenced block (int)
|
||||
</p></li><li class="listitem"><p>
|
||||
Key. The length can be calculated from entryOffsets.
|
||||
</p></li></ol></div></li></ol></div></div><div class="section" title="E.3.6. Bloom filters in version 2"><div class="titlepage"><div><div><h3 class="title"><a name="d2121e11923"></a>E.3.6.
|
||||
Bloom filters in version 2</h3></div></div></div><p>In contrast with version 1, in a version 2 HFile Bloom filter metadata is stored in the load-on-open section of the HFile for quick startup. </p><div class="orderedlist"><ol class="orderedlist" type="1"><li class="listitem"><p>A compound Bloom filter. </p><div class="orderedlist"><ol class="orderedlist" type="a"><li class="listitem"><p>
|
||||
Bloom filter version = 3 (int). There used to be a DynamicByteBloomFilter class that had the Bloom filter version number 2
|
||||
</p></li><li class="listitem"><p>
|
||||
The total byte size of all compound Bloom filter chunks (long)
|
||||
</p></li><li class="listitem"><p>
|
||||
Number of hash functions (int
|
||||
</p></li><li class="listitem"><p>
|
||||
Type of hash functions (int)
|
||||
</p></li><li class="listitem"><p>
|
||||
The total key count inserted into the Bloom filter (long)
|
||||
</p></li><li class="listitem"><p>
|
||||
The maximum total number of keys in the Bloom filter (long)
|
||||
</p></li><li class="listitem"><p>
|
||||
The number of chunks (int)
|
||||
</p></li><li class="listitem"><p>
|
||||
Comparator class used for Bloom filter keys, a UTF>8 encoded string stored using Bytes.writeByteArray
|
||||
</p></li><li class="listitem"><p>
|
||||
Bloom block index in the version 2 root block index format
|
||||
</p></li></ol></div></li></ol></div></div><div class="section" title="E.3.7. File Info format in versions 1 and 2"><div class="titlepage"><div><div><h3 class="title"><a name="d2121e11960"></a>E.3.7. File Info format in versions 1 and 2</h3></div></div></div><p>The file info block is a serialized <a class="ulink" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/io/HbaseMapWritable.html" target="_top">HbaseMapWritable</a> (essentially a map from byte arrays to byte arrays) with the following keys, among others. StoreFile-level logic adds more keys to this.</p><div class="informaltable"><table border="1"><colgroup><col><col></colgroup><tbody><tr><td>
|
||||
<p>hfile.LASTKEY </p>
|
||||
</td><td>
|
||||
<p>The last key of the file (byte array) </p>
|
||||
</td></tr><tr><td>
|
||||
<p>hfile.AVG_KEY_LEN </p>
|
||||
</td><td>
|
||||
<p>The average key length in the file (int) </p>
|
||||
</td></tr><tr><td>
|
||||
<p>hfile.AVG_VALUE_LEN </p>
|
||||
</td><td>
|
||||
<p>The average value length in the file (int) </p>
|
||||
</td></tr></tbody></table></div><p>File info format did not change in version 2. However, we moved the file info to the final section of the file, which can be loaded as one block at the time the HFile is being opened. Also, we do not store comparator in the version 2 file info anymore. Instead, we store it in the fixed file trailer. This is because we need to know the comparator at the time of parsing the load-on-open section of the HFile.</p></div><div class="section" title="E.3.8. Fixed file trailer format differences between versions 1 and 2"><div class="titlepage"><div><div><h3 class="title"><a name="d2121e12006"></a>E.3.8.
|
||||
Fixed file trailer format differences between versions 1 and 2</h3></div></div></div><p>The following table shows common and different fields between fixed file trailers in versions 1 and 2. Note that the size of the trailer is different depending on the version, so it is “fixed” only within one version. However, the version is always stored as the last four-byte integer in the file. </p><p></p><div class="informaltable"><table border="1"><colgroup><col class="c1"><col class="c2"></colgroup><tbody><tr><td>
|
||||
<p>Version 1 </p>
|
||||
</td><td>
|
||||
<p>Version 2 </p>
|
||||
</td></tr><tr><td colspan="2" align="center">
|
||||
<p>File info offset (long) </p>
|
||||
</td></tr><tr><td>
|
||||
<p>Data index offset (long) </p>
|
||||
</td><td>
|
||||
<p>loadOnOpenOffset (long)</p>
|
||||
<p><span class="emphasis"><em>The offset of the section that we need toload when opening the file.</em></span></p>
|
||||
</td></tr><tr><td colspan="2" align="center">
|
||||
<p>Number of data index entries (int) </p>
|
||||
</td></tr><tr><td>
|
||||
<p>metaIndexOffset (long)</p>
|
||||
<p>This field is not being used by the version 1 reader, so we removed it from version 2.</p>
|
||||
</td><td>
|
||||
<p>uncompressedDataIndexSize (long)</p>
|
||||
<p>The total uncompressed size of the whole data block index, including root-level, intermediate-level, and leaf-level blocks.</p>
|
||||
</td></tr><tr><td colspan="2" align="center">
|
||||
<p>Number of meta index entries (int) </p>
|
||||
</td></tr><tr><td colspan="2" align="center">
|
||||
<p>Total uncompressed bytes (long) </p>
|
||||
</td></tr><tr><td>
|
||||
<p>numEntries (int) </p>
|
||||
</td><td>
|
||||
<p>numEntries (long) </p>
|
||||
</td></tr><tr><td colspan="2" align="center">
|
||||
<p>Compression codec: 0 = LZO, 1 = GZ, 2 = NONE (int) </p>
|
||||
</td></tr><tr><td>
|
||||
<p></p>
|
||||
</td><td>
|
||||
<p>The number of levels in the data block index (int) </p>
|
||||
</td></tr><tr><td>
|
||||
<p></p>
|
||||
</td><td>
|
||||
<p>firstDataBlockOffset (long)</p>
|
||||
<p>The offset of the first first data block. Used when scanning. </p>
|
||||
</td></tr><tr><td>
|
||||
<p></p>
|
||||
</td><td>
|
||||
<p>lastDataBlockEnd (long)</p>
|
||||
<p>The offset of the first byte after the last key/value data block. We don't need to go beyond this offset when scanning. </p>
|
||||
</td></tr><tr><td>
|
||||
<p>Version: 1 (int) </p>
|
||||
</td><td>
|
||||
<p>Version: 2 (int) </p>
|
||||
</td></tr></tbody></table></div><p></p></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = '';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="apes02.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="hfilev2.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="other.info.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">E.2. HFile format version 1 overview </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> Appendix F. Other Information About HBase</td></tr></table></div></body></html>
|
|
@ -1,91 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>9.8. Bulk Loading</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="architecture.html" title="Chapter 9. Architecture"><link rel="prev" href="regions.arch.html" title="9.7. Regions"><link rel="next" href="arch.hdfs.html" title="9.9. HDFS"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">9.8. Bulk Loading</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="regions.arch.html">Prev</a> </td><th width="60%" align="center">Chapter 9. Architecture</th><td width="20%" align="right"> <a accesskey="n" href="arch.hdfs.html">Next</a></td></tr></table><hr></div><div class="section" title="9.8. Bulk Loading"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="arch.bulk.load"></a>9.8. Bulk Loading</h2></div></div></div><div class="section" title="9.8.1. Overview"><div class="titlepage"><div><div><h3 class="title"><a name="arch.bulk.load.overview"></a>9.8.1. Overview</h3></div></div></div><p>
|
||||
HBase includes several methods of loading data into tables.
|
||||
The most straightforward method is to either use the <code class="code">TableOutputFormat</code>
|
||||
class from a MapReduce job, or use the normal client APIs; however,
|
||||
these are not always the most efficient methods.
|
||||
</p><p>
|
||||
The bulk load feature uses a MapReduce job to output table data in HBase's internal
|
||||
data format, and then directly loads the generated StoreFiles into a running
|
||||
cluster. Using bulk load will use less CPU and network resources than
|
||||
simply using the HBase API.
|
||||
</p></div><div class="section" title="9.8.2. Bulk Load Architecture"><div class="titlepage"><div><div><h3 class="title"><a name="arch.bulk.load.arch"></a>9.8.2. Bulk Load Architecture</h3></div></div></div><p>
|
||||
The HBase bulk load process consists of two main steps.
|
||||
</p><div class="section" title="9.8.2.1. Preparing data via a MapReduce job"><div class="titlepage"><div><div><h4 class="title"><a name="arch.bulk.load.prep"></a>9.8.2.1. Preparing data via a MapReduce job</h4></div></div></div><p>
|
||||
The first step of a bulk load is to generate HBase data files (StoreFiles) from
|
||||
a MapReduce job using <code class="code">HFileOutputFormat</code>. This output format writes
|
||||
out data in HBase's internal storage format so that they can be
|
||||
later loaded very efficiently into the cluster.
|
||||
</p><p>
|
||||
In order to function efficiently, <code class="code">HFileOutputFormat</code> must be
|
||||
configured such that each output HFile fits within a single region.
|
||||
In order to do this, jobs whose output will be bulk loaded into HBase
|
||||
use Hadoop's <code class="code">TotalOrderPartitioner</code> class to partition the map output
|
||||
into disjoint ranges of the key space, corresponding to the key
|
||||
ranges of the regions in the table.
|
||||
</p><p>
|
||||
<code class="code">HFileOutputFormat</code> includes a convenience function,
|
||||
<code class="code">configureIncrementalLoad()</code>, which automatically sets up
|
||||
a <code class="code">TotalOrderPartitioner</code> based on the current region boundaries of a
|
||||
table.
|
||||
</p></div><div class="section" title="9.8.2.2. Completing the data load"><div class="titlepage"><div><div><h4 class="title"><a name="arch.bulk.load.complete"></a>9.8.2.2. Completing the data load</h4></div></div></div><p>
|
||||
After the data has been prepared using
|
||||
<code class="code">HFileOutputFormat</code>, it is loaded into the cluster using
|
||||
<code class="code">completebulkload</code>. This command line tool iterates
|
||||
through the prepared data files, and for each one determines the
|
||||
region the file belongs to. It then contacts the appropriate Region
|
||||
Server which adopts the HFile, moving it into its storage directory
|
||||
and making the data available to clients.
|
||||
</p><p>
|
||||
If the region boundaries have changed during the course of bulk load
|
||||
preparation, or between the preparation and completion steps, the
|
||||
<code class="code">completebulkloads</code> utility will automatically split the
|
||||
data files into pieces corresponding to the new boundaries. This
|
||||
process is not optimally efficient, so users should take care to
|
||||
minimize the delay between preparing a bulk load and importing it
|
||||
into the cluster, especially if other clients are simultaneously
|
||||
loading data through other means.
|
||||
</p></div></div><div class="section" title="9.8.3. Importing the prepared data using the completebulkload tool"><div class="titlepage"><div><div><h3 class="title"><a name="arch.bulk.load.import"></a>9.8.3. Importing the prepared data using the completebulkload tool</h3></div></div></div><p>
|
||||
After a data import has been prepared, either by using the
|
||||
<code class="code">importtsv</code> tool with the
|
||||
"<code class="code">importtsv.bulk.output</code>" option or by some other MapReduce
|
||||
job using the <code class="code">HFileOutputFormat</code>, the
|
||||
<code class="code">completebulkload</code> tool is used to import the data into the
|
||||
running cluster.
|
||||
</p><p>
|
||||
The <code class="code">completebulkload</code> tool simply takes the output path
|
||||
where <code class="code">importtsv</code> or your MapReduce job put its results, and
|
||||
the table name to import into. For example:
|
||||
</p><code class="code">$ hadoop jar hbase-VERSION.jar completebulkload [-c /path/to/hbase/config/hbase-site.xml] /user/todd/myoutput mytable</code><p>
|
||||
The <code class="code">-c config-file</code> option can be used to specify a file
|
||||
containing the appropriate hbase parameters (e.g., hbase-site.xml) if
|
||||
not supplied already on the CLASSPATH (In addition, the CLASSPATH must
|
||||
contain the directory that has the zookeeper configuration file if
|
||||
zookeeper is NOT managed by HBase).
|
||||
</p><p>
|
||||
Note: If the target table does not already exist in HBase, this
|
||||
tool will create the table automatically.</p><p>
|
||||
This tool will run quickly, after which point the new data will be visible in
|
||||
the cluster.
|
||||
</p></div><div class="section" title="9.8.4. See Also"><div class="titlepage"><div><div><h3 class="title"><a name="arch.bulk.load.also"></a>9.8.4. See Also</h3></div></div></div><p>For more information about the referenced utilities, see <a class="xref" href="ops_mgt.html#importtsv" title="14.1.9. ImportTsv">Section 14.1.9, “ImportTsv”</a> and <a class="xref" href="ops_mgt.html#completebulkload" title="14.1.10. CompleteBulkLoad">Section 14.1.10, “CompleteBulkLoad”</a>.
|
||||
</p></div><div class="section" title="9.8.5. Advanced Usage"><div class="titlepage"><div><div><h3 class="title"><a name="arch.bulk.load.adv"></a>9.8.5. Advanced Usage</h3></div></div></div><p>
|
||||
Although the <code class="code">importtsv</code> tool is useful in many cases, advanced users may
|
||||
want to generate data programatically, or import data from other formats. To get
|
||||
started doing so, dig into <code class="code">ImportTsv.java</code> and check the JavaDoc for
|
||||
HFileOutputFormat.
|
||||
</p><p>
|
||||
The import step of the bulk load can also be done programatically. See the
|
||||
<code class="code">LoadIncrementalHFiles</code> class for more information.
|
||||
</p></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'arch.bulk.load';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="regions.arch.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="architecture.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="arch.hdfs.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">9.7. Regions </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> 9.9. HDFS</td></tr></table></div></body></html>
|
|
@ -1,38 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>9.2. Catalog Tables</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="architecture.html" title="Chapter 9. Architecture"><link rel="prev" href="architecture.html" title="Chapter 9. Architecture"><link rel="next" href="client.html" title="9.3. Client"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">9.2. Catalog Tables</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="architecture.html">Prev</a> </td><th width="60%" align="center">Chapter 9. Architecture</th><td width="20%" align="right"> <a accesskey="n" href="client.html">Next</a></td></tr></table><hr></div><div class="section" title="9.2. Catalog Tables"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="arch.catalog"></a>9.2. Catalog Tables</h2></div></div></div><p>The catalog tables -ROOT- and .META. exist as HBase tables. They are filtered out
|
||||
of the HBase shell's <code class="code">list</code> command, but they are in fact tables just like any other.
|
||||
</p><div class="section" title="9.2.1. ROOT"><div class="titlepage"><div><div><h3 class="title"><a name="arch.catalog.root"></a>9.2.1. ROOT</h3></div></div></div><p>-ROOT- keeps track of where the .META. table is. The -ROOT- table structure is as follows:
|
||||
</p><p>Key:
|
||||
</p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem">.META. region key (<code class="code">.META.,,1</code>)</li></ul></div><p>
|
||||
</p><p>Values:
|
||||
</p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><code class="code">info:regioninfo</code> (serialized <a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/HRegionInfo.html" target="_top">HRegionInfo</a>
|
||||
instance of .META.)</li><li class="listitem"><code class="code">info:server</code> (server:port of the RegionServer holding .META.)</li><li class="listitem"><code class="code">info:serverstartcode</code> (start-time of the RegionServer process holding .META.)</li></ul></div><p>
|
||||
</p></div><div class="section" title="9.2.2. META"><div class="titlepage"><div><div><h3 class="title"><a name="arch.catalog.meta"></a>9.2.2. META</h3></div></div></div><p>The .META. table keeps a list of all regions in the system. The .META. table structure is as follows:
|
||||
</p><p>Key:
|
||||
</p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem">Region key of the format (<code class="code">[table],[region start key],[region id]</code>)</li></ul></div><p>
|
||||
</p><p>Values:
|
||||
</p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><code class="code">info:regioninfo</code> (serialized <a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/HRegionInfo.html" target="_top">
|
||||
HRegionInfo</a> instance for this region)
|
||||
</li><li class="listitem"><code class="code">info:server</code> (server:port of the RegionServer containing this region)</li><li class="listitem"><code class="code">info:serverstartcode</code> (start-time of the RegionServer process containing this region)</li></ul></div><p>
|
||||
</p><p>When a table is in the process of splitting two other columns will be created, <code class="code">info:splitA</code> and <code class="code">info:splitB</code>
|
||||
which represent the two daughter regions. The values for these columns are also serialized HRegionInfo instances.
|
||||
After the region has been split eventually this row will be deleted.
|
||||
</p><p>Notes on HRegionInfo: the empty key is used to denote table start and table end. A region with an empty start key
|
||||
is the first region in a table. If region has both an empty start and an empty end key, it's the only region in the table
|
||||
</p><p>In the (hopefully unlikely) event that programmatic processing of catalog metadata is required, see the
|
||||
<a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/util/Writables.html#getHRegionInfo%28byte[]%29" target="_top">Writables</a> utility.
|
||||
</p></div><div class="section" title="9.2.3. Startup Sequencing"><div class="titlepage"><div><div><h3 class="title"><a name="arch.catalog.startup"></a>9.2.3. Startup Sequencing</h3></div></div></div><p>The META location is set in ROOT first. Then META is updated with server and startcode values.
|
||||
</p><p>For information on region-RegionServer assignment, see <a class="xref" href="regions.arch.html#regions.arch.assignment" title="9.7.2. Region-RegionServer Assignment">Section 9.7.2, “Region-RegionServer Assignment”</a>.
|
||||
</p></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'arch.catalog';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="architecture.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="architecture.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="client.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">Chapter 9. Architecture </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> 9.3. Client</td></tr></table></div></body></html>
|
|
@ -1,23 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>9.9. HDFS</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="architecture.html" title="Chapter 9. Architecture"><link rel="prev" href="arch.bulk.load.html" title="9.8. Bulk Loading"><link rel="next" href="external_apis.html" title="Chapter 10. Apache HBase (TM) External APIs"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">9.9. HDFS</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="arch.bulk.load.html">Prev</a> </td><th width="60%" align="center">Chapter 9. Architecture</th><td width="20%" align="right"> <a accesskey="n" href="external_apis.html">Next</a></td></tr></table><hr></div><div class="section" title="9.9. HDFS"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="arch.hdfs"></a>9.9. HDFS</h2></div></div></div><p>As HBase runs on HDFS (and each StoreFile is written as a file on HDFS),
|
||||
it is important to have an understanding of the HDFS Architecture
|
||||
especially in terms of how it stores files, handles failovers, and replicates blocks.
|
||||
</p><p>See the Hadoop documentation on <a class="link" href="http://hadoop.apache.org/common/docs/current/hdfs_design.html" target="_top">HDFS Architecture</a>
|
||||
for more information.
|
||||
</p><div class="section" title="9.9.1. NameNode"><div class="titlepage"><div><div><h3 class="title"><a name="arch.hdfs.nn"></a>9.9.1. NameNode</h3></div></div></div><p>The NameNode is responsible for maintaining the filesystem metadata. See the above HDFS Architecture link
|
||||
for more information.
|
||||
</p></div><div class="section" title="9.9.2. DataNode"><div class="titlepage"><div><div><h3 class="title"><a name="arch.hdfs.dn"></a>9.9.2. DataNode</h3></div></div></div><p>The DataNodes are responsible for storing HDFS blocks. See the above HDFS Architecture link
|
||||
for more information.
|
||||
</p></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'arch.hdfs';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="arch.bulk.load.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="architecture.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="external_apis.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">9.8. Bulk Loading </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> Chapter 10. Apache HBase (TM) External APIs</td></tr></table></div></body></html>
|
File diff suppressed because one or more lines are too long
|
@ -1,18 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>Appendix H. HBase and the Apache Software Foundation</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="prev" href="hbase.history.html" title="Appendix G. HBase History"><link rel="next" href="asf.reporting.html" title="H.2. ASF Board Reporting"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">Appendix H. HBase and the Apache Software Foundation</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="hbase.history.html">Prev</a> </td><th width="60%" align="center"> </th><td width="20%" align="right"> <a accesskey="n" href="asf.reporting.html">Next</a></td></tr></table><hr></div><div class="appendix" title="Appendix H. HBase and the Apache Software Foundation"><div class="titlepage"><div><div><h2 class="title"><a name="asf"></a>Appendix H. HBase and the Apache Software Foundation</h2></div></div></div><div class="toc"><p><b>Table of Contents</b></p><dl><dt><span class="section"><a href="asf.html#asf.devprocess">H.1. ASF Development Process</a></span></dt><dt><span class="section"><a href="asf.reporting.html">H.2. ASF Board Reporting</a></span></dt></dl></div><p>HBase is a project in the Apache Software Foundation and as such there are responsibilities to the ASF to ensure
|
||||
a healthy project.</p><div class="section" title="H.1. ASF Development Process"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="asf.devprocess"></a>H.1. ASF Development Process</h2></div></div></div><p>See the <a class="link" href="http://www.apache.org/dev/#committers" target="_top">Apache Development Process page</a>
|
||||
for all sorts of information on how the ASF is structured (e.g., PMC, committers, contributors), to tips on contributing
|
||||
and getting involved, and how open-source works at ASF.
|
||||
</p></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'asf';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="hbase.history.html">Prev</a> </td><td width="20%" align="center"> </td><td width="40%" align="right"> <a accesskey="n" href="asf.reporting.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">Appendix G. HBase History </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> H.2. ASF Board Reporting</td></tr></table></div></body></html>
|
|
@ -1,16 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>H.2. ASF Board Reporting</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="asf.html" title="Appendix H. HBase and the Apache Software Foundation"><link rel="prev" href="asf.html" title="Appendix H. HBase and the Apache Software Foundation"><link rel="next" href="tracing.html" title="Appendix I. Enabling Dapper-like Tracing in HBase"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">H.2. ASF Board Reporting</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="asf.html">Prev</a> </td><th width="60%" align="center">Appendix H. HBase and the Apache Software Foundation</th><td width="20%" align="right"> <a accesskey="n" href="tracing.html">Next</a></td></tr></table><hr></div><div class="section" title="H.2. ASF Board Reporting"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="asf.reporting"></a>H.2. ASF Board Reporting</h2></div></div></div><p>Once a quarter, each project in the ASF portfolio submits a report to the ASF board. This is done by the HBase project
|
||||
lead and the committers. See <a class="link" href="http://www.apache.org/foundation/board/reporting" target="_top">ASF board reporting</a> for more information.
|
||||
</p></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'asf.reporting';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="asf.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="asf.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="tracing.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">Appendix H. HBase and the Apache Software Foundation </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> Appendix I. Enabling Dapper-like Tracing in HBase</td></tr></table></div></body></html>
|
File diff suppressed because one or more lines are too long
|
@ -1,22 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>Index</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="prev" href="tracing.client.modifications.html" title="I.2. Client Modifications"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">Index</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="tracing.client.modifications.html">Prev</a> </td><th width="60%" align="center"> </th><td width="20%" align="right"> </td></tr></table><hr></div><div class="index" title="Index"><div class="titlepage"><div><div><h2 class="title"><a name="book_index"></a>Index</h2></div></div></div><div class="index"><div class="indexdiv"><h3>C</h3><dl><dt>Cells, <a class="indexterm" href="cells.html">Cells</a></dt><dt>Column Family, <a class="indexterm" href="columnfamily.html">Column Family</a></dt><dt>Column Family Qualifier, <a class="indexterm" href="columnfamily.html">Column Family</a></dt><dt>Compression, <a class="indexterm" href="compression.html">Compression In HBase</a></dt></dl></div><div class="indexdiv"><h3>H</h3><dl><dt>Hadoop, <a class="indexterm" href="configuration.html#hadoop">Hadoop</a></dt></dl></div><div class="indexdiv"><h3>I</h3><dl><dt>IntegrationTests, <a class="indexterm" href="hbase.tests.html#hbase.unittests.integration">Integration Tests</a></dt></dl></div><div class="indexdiv"><h3>L</h3><dl><dt>LargeTests, <a class="indexterm" href="hbase.tests.html#hbase.unittests.large">Large Tests</a></dt></dl></div><div class="indexdiv"><h3>M</h3><dl><dt>MediumTests, <a class="indexterm" href="hbase.tests.html#hbase.unittests.medium">Medium Tests</a></dt><dt>MSLAB, <a class="indexterm" href="jvm.html#gcpause">Long GC pauses</a></dt></dl></div><div class="indexdiv"><h3>N</h3><dl><dt>nproc, <a class="indexterm" href="configuration.html#ulimit">
|
||||
ulimit
|
||||
and
|
||||
nproc
|
||||
</a></dt></dl></div><div class="indexdiv"><h3>S</h3><dl><dt>SmallTests, <a class="indexterm" href="hbase.tests.html#hbase.unittests.small">Small Tests</a></dt></dl></div><div class="indexdiv"><h3>T</h3><dl><dt>Test Resource Checker, <a class="indexterm" href="hbase.tests.html#hbase.unittests.resource.checker">Test Resource Checker</a></dt></dl></div><div class="indexdiv"><h3>U</h3><dl><dt>ulimit, <a class="indexterm" href="configuration.html#ulimit">
|
||||
ulimit
|
||||
and
|
||||
nproc
|
||||
</a></dt></dl></div><div class="indexdiv"><h3>V</h3><dl><dt>Versions, <a class="indexterm" href="versions.html">Versions</a></dt></dl></div><div class="indexdiv"><h3>X</h3><dl><dt>xcievers, <a class="indexterm" href="configuration.html#dfs.datanode.max.xcievers">dfs.datanode.max.xcievers</a></dt></dl></div><div class="indexdiv"><h3>Z</h3><dl><dt>ZooKeeper, <a class="indexterm" href="zookeeper.html">ZooKeeper</a></dt></dl></div></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'book_index';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="tracing.client.modifications.html">Prev</a> </td><td width="20%" align="center"> </td><td width="40%" align="right"> </td></tr><tr><td width="40%" align="left" valign="top">I.2. Client Modifications </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> </td></tr></table></div></body></html>
|
|
@ -1,31 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>15.3. Building Apache HBase</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="developer.html" title="Chapter 15. Building and Developing Apache HBase (TM)"><link rel="prev" href="ides.html" title="15.2. IDEs"><link rel="next" href="mvn_repo.html" title="15.4. Adding an Apache HBase release to Apache's Maven Repository"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">15.3. Building Apache HBase</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="ides.html">Prev</a> </td><th width="60%" align="center">Chapter 15. Building and Developing Apache HBase (TM)</th><td width="20%" align="right"> <a accesskey="n" href="mvn_repo.html">Next</a></td></tr></table><hr></div><div class="section" title="15.3. Building Apache HBase"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="build"></a>15.3. Building Apache HBase</h2></div></div></div><div class="section" title="15.3.1. Basic Compile"><div class="titlepage"><div><div><h3 class="title"><a name="build.basic"></a>15.3.1. Basic Compile</h3></div></div></div><p>Thanks to maven, building HBase is pretty easy. You can read about the various maven commands in <a class="xref" href="maven.build.commands.html" title="15.7. Maven Build Commands">Section 15.7, “Maven Build Commands”</a>, but the simplest command to compile HBase from its java source code is:
|
||||
</p><pre class="programlisting">
|
||||
mvn package -DskipTests
|
||||
</pre><p>
|
||||
Or, to clean up before compiling:
|
||||
</p><pre class="programlisting">
|
||||
mvn clean package -DskipTests
|
||||
</pre><p>
|
||||
With Eclipse set up as explained above in <a class="xref" href="ides.html#eclipse" title="15.2.1. Eclipse">Section 15.2.1, “Eclipse”</a>, you can also simply use the build command in Eclipse. To create the full installable HBase package takes a little bit more work, so read on.
|
||||
</p></div><div class="section" title="15.3.2. Building in snappy compression support"><div class="titlepage"><div><div><h3 class="title"><a name="build.snappy"></a>15.3.2. Building in snappy compression support</h3></div></div></div><p>Pass <code class="code">-Dsnappy</code> to trigger the snappy maven profile for building
|
||||
snappy native libs into hbase. See also <a class="xref" href="snappy.compression.html" title="C.5. SNAPPY">Section C.5, “
|
||||
SNAPPY
|
||||
”</a></p></div><div class="section" title="15.3.3. Building the HBase tarball"><div class="titlepage"><div><div><h3 class="title"><a name="build.tgz"></a>15.3.3. Building the HBase tarball</h3></div></div></div><p>Do the following to build the HBase tarball.
|
||||
Passing the -Drelease will generate javadoc and run the RAT plugin to verify licenses on source.
|
||||
</p><pre class="programlisting">% MAVEN_OPTS="-Xmx2g" mvn clean site install assembly:assembly -DskipTests -Prelease</pre><p>
|
||||
</p></div><div class="section" title="15.3.4. Build Gotchas"><div class="titlepage"><div><div><h3 class="title"><a name="build.gotchas"></a>15.3.4. Build Gotchas</h3></div></div></div><p>If you see <code class="code">Unable to find resource 'VM_global_library.vm'</code>, ignore it.
|
||||
Its not an error. It is <a class="link" href="http://jira.codehaus.org/browse/MSITE-286" target="_top">officially ugly</a> though.
|
||||
</p></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'build';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="ides.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="developer.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="mvn_repo.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">15.2. IDEs </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> 15.4. Adding an Apache HBase release to Apache's Maven Repository</td></tr></table></div></body></html>
|
|
@ -1,15 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>10.4. C/C++ Apache HBase Client</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="external_apis.html" title="Chapter 10. Apache HBase (TM) External APIs"><link rel="prev" href="thrift.html" title="10.3. Thrift"><link rel="next" href="performance.html" title="Chapter 11. Apache HBase (TM) Performance Tuning"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">10.4. C/C++ Apache HBase Client</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="thrift.html">Prev</a> </td><th width="60%" align="center">Chapter 10. Apache HBase (TM) External APIs</th><td width="20%" align="right"> <a accesskey="n" href="performance.html">Next</a></td></tr></table><hr></div><div class="section" title="10.4. C/C++ Apache HBase Client"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="c"></a>10.4. C/C++ Apache HBase Client</h2></div></div></div><p>FB's Chip Turner wrote a pure C/C++ client. <a class="link" href="https://github.com/facebook/native-cpp-hbase-client" target="_top">Check it out</a>.
|
||||
</p></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'c';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="thrift.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="external_apis.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="performance.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">10.3. Thrift </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> Chapter 11. Apache HBase (TM) Performance Tuning</td></tr></table></div></body></html>
|
|
@ -1,16 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>Chapter 13. Apache HBase (TM) Case Studies</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="prev" href="trouble.casestudy.html" title="12.14. Case Studies"><link rel="next" href="casestudies.schema.html" title="13.2. Schema Design"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">Chapter 13. Apache HBase (TM) Case Studies</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="trouble.casestudy.html">Prev</a> </td><th width="60%" align="center"> </th><td width="20%" align="right"> <a accesskey="n" href="casestudies.schema.html">Next</a></td></tr></table><hr></div><div class="chapter" title="Chapter 13. Apache HBase (TM) Case Studies"><div class="titlepage"><div><div><h2 class="title"><a name="casestudies"></a>Chapter 13. Apache HBase (TM) Case Studies</h2></div></div></div><div class="toc"><p><b>Table of Contents</b></p><dl><dt><span class="section"><a href="casestudies.html#casestudies.overview">13.1. Overview</a></span></dt><dt><span class="section"><a href="casestudies.schema.html">13.2. Schema Design</a></span></dt><dd><dl><dt><span class="section"><a href="casestudies.schema.html#casestudies.schema.listdata">13.2.1. List Data</a></span></dt></dl></dd><dt><span class="section"><a href="casestudies.perftroub.html">13.3. Performance/Troubleshooting</a></span></dt><dd><dl><dt><span class="section"><a href="casestudies.perftroub.html#casestudies.slownode">13.3.1. Case Study #1 (Performance Issue On A Single Node)</a></span></dt><dt><span class="section"><a href="casestudies.perftroub.html#casestudies.perf.1">13.3.2. Case Study #2 (Performance Research 2012)</a></span></dt><dt><span class="section"><a href="casestudies.perftroub.html#casestudies.perf.2">13.3.3. Case Study #3 (Performance Research 2010))</a></span></dt><dt><span class="section"><a href="casestudies.perftroub.html#casestudies.xceivers">13.3.4. Case Study #4 (xcievers Config)</a></span></dt></dl></dd></dl></div><div class="section" title="13.1. Overview"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="casestudies.overview"></a>13.1. Overview</h2></div></div></div><p>This chapter will describe a variety of performance and troubleshooting case studies that can
|
||||
provide a useful blueprint on diagnosing Apache HBase (TM) cluster issues.</p><p>For more information on Performance and Troubleshooting, see <a class="xref" href="performance.html" title="Chapter 11. Apache HBase (TM) Performance Tuning">Chapter 11, <i>Apache HBase (TM) Performance Tuning</i></a> and <a class="xref" href="trouble.html" title="Chapter 12. Troubleshooting and Debugging Apache HBase (TM)">Chapter 12, <i>Troubleshooting and Debugging Apache HBase (TM)</i></a>.
|
||||
</p></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'casestudies';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="trouble.casestudy.html">Prev</a> </td><td width="20%" align="center"> </td><td width="40%" align="right"> <a accesskey="n" href="casestudies.schema.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">12.14. Case Studies </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> 13.2. Schema Design</td></tr></table></div></body></html>
|
|
@ -1,98 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>13.3. Performance/Troubleshooting</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="casestudies.html" title="Chapter 13. Apache HBase (TM) Case Studies"><link rel="prev" href="casestudies.schema.html" title="13.2. Schema Design"><link rel="next" href="ops_mgt.html" title="Chapter 14. Apache HBase (TM) Operational Management"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">13.3. Performance/Troubleshooting</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="casestudies.schema.html">Prev</a> </td><th width="60%" align="center">Chapter 13. Apache HBase (TM) Case Studies</th><td width="20%" align="right"> <a accesskey="n" href="ops_mgt.html">Next</a></td></tr></table><hr></div><div class="section" title="13.3. Performance/Troubleshooting"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="casestudies.perftroub"></a>13.3. Performance/Troubleshooting</h2></div></div></div><div class="section" title="13.3.1. Case Study #1 (Performance Issue On A Single Node)"><div class="titlepage"><div><div><h3 class="title"><a name="casestudies.slownode"></a>13.3.1. Case Study #1 (Performance Issue On A Single Node)</h3></div></div></div><div class="section" title="13.3.1.1. Scenario"><div class="titlepage"><div><div><h4 class="title"><a name="d2121e8443"></a>13.3.1.1. Scenario</h4></div></div></div><p>Following a scheduled reboot, one data node began exhibiting unusual behavior. Routine MapReduce
|
||||
jobs run against HBase tables which regularly completed in five or six minutes began taking 30 or 40 minutes
|
||||
to finish. These jobs were consistently found to be waiting on map and reduce tasks assigned to the troubled data node
|
||||
(e.g., the slow map tasks all had the same Input Split).
|
||||
The situation came to a head during a distributed copy, when the copy was severely prolonged by the lagging node.
|
||||
</p></div><div class="section" title="13.3.1.2. Hardware"><div class="titlepage"><div><div><h4 class="title"><a name="d2121e8448"></a>13.3.1.2. Hardware</h4></div></div></div><p>Datanodes:
|
||||
</p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem">Two 12-core processors</li><li class="listitem">Six Enerprise SATA disks</li><li class="listitem">24GB of RAM</li><li class="listitem">Two bonded gigabit NICs</li></ul></div><p>
|
||||
</p><p>Network:
|
||||
</p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem">10 Gigabit top-of-rack switches</li><li class="listitem">20 Gigabit bonded interconnects between racks.</li></ul></div><p>
|
||||
</p></div><div class="section" title="13.3.1.3. Hypotheses"><div class="titlepage"><div><div><h4 class="title"><a name="d2121e8471"></a>13.3.1.3. Hypotheses</h4></div></div></div><div class="section" title="13.3.1.3.1. HBase "Hot Spot" Region"><div class="titlepage"><div><div><h5 class="title"><a name="d2121e8474"></a>13.3.1.3.1. HBase "Hot Spot" Region</h5></div></div></div><p>We hypothesized that we were experiencing a familiar point of pain: a "hot spot" region in an HBase table,
|
||||
where uneven key-space distribution can funnel a huge number of requests to a single HBase region, bombarding the RegionServer
|
||||
process and cause slow response time. Examination of the HBase Master status page showed that the number of HBase requests to the
|
||||
troubled node was almost zero. Further, examination of the HBase logs showed that there were no region splits, compactions, or other region transitions
|
||||
in progress. This effectively ruled out a "hot spot" as the root cause of the observed slowness.
|
||||
</p></div><div class="section" title="13.3.1.3.2. HBase Region With Non-Local Data"><div class="titlepage"><div><div><h5 class="title"><a name="d2121e8479"></a>13.3.1.3.2. HBase Region With Non-Local Data</h5></div></div></div><p>Our next hypothesis was that one of the MapReduce tasks was requesting data from HBase that was not local to the datanode, thus
|
||||
forcing HDFS to request data blocks from other servers over the network. Examination of the datanode logs showed that there were very
|
||||
few blocks being requested over the network, indicating that the HBase region was correctly assigned, and that the majority of the necessary
|
||||
data was located on the node. This ruled out the possibility of non-local data causing a slowdown.
|
||||
</p></div><div class="section" title="13.3.1.3.3. Excessive I/O Wait Due To Swapping Or An Over-Worked Or Failing Hard Disk"><div class="titlepage"><div><div><h5 class="title"><a name="d2121e8484"></a>13.3.1.3.3. Excessive I/O Wait Due To Swapping Or An Over-Worked Or Failing Hard Disk</h5></div></div></div><p>After concluding that the Hadoop and HBase were not likely to be the culprits, we moved on to troubleshooting the datanode's hardware.
|
||||
Java, by design, will periodically scan its entire memory space to do garbage collection. If system memory is heavily overcommitted, the Linux
|
||||
kernel may enter a vicious cycle, using up all of its resources swapping Java heap back and forth from disk to RAM as Java tries to run garbage
|
||||
collection. Further, a failing hard disk will often retry reads and/or writes many times before giving up and returning an error. This can manifest
|
||||
as high iowait, as running processes wait for reads and writes to complete. Finally, a disk nearing the upper edge of its performance envelope will
|
||||
begin to cause iowait as it informs the kernel that it cannot accept any more data, and the kernel queues incoming data into the dirty write pool in memory.
|
||||
However, using <code class="code">vmstat(1)</code> and <code class="code">free(1)</code>, we could see that no swap was being used, and the amount of disk IO was only a few kilobytes per second.
|
||||
</p></div><div class="section" title="13.3.1.3.4. Slowness Due To High Processor Usage"><div class="titlepage"><div><div><h5 class="title"><a name="d2121e8495"></a>13.3.1.3.4. Slowness Due To High Processor Usage</h5></div></div></div><p>Next, we checked to see whether the system was performing slowly simply due to very high computational load. <code class="code">top(1)</code> showed that the system load
|
||||
was higher than normal, but <code class="code">vmstat(1)</code> and <code class="code">mpstat(1)</code> showed that the amount of processor being used for actual computation was low.
|
||||
</p></div><div class="section" title="13.3.1.3.5. Network Saturation (The Winner)"><div class="titlepage"><div><div><h5 class="title"><a name="d2121e8509"></a>13.3.1.3.5. Network Saturation (The Winner)</h5></div></div></div><p>Since neither the disks nor the processors were being utilized heavily, we moved on to the performance of the network interfaces. The datanode had two
|
||||
gigabit ethernet adapters, bonded to form an active-standby interface. <code class="code">ifconfig(8)</code> showed some unusual anomalies, namely interface errors, overruns, framing errors.
|
||||
While not unheard of, these kinds of errors are exceedingly rare on modern hardware which is operating as it should:
|
||||
</p><pre class="programlisting">
|
||||
$ /sbin/ifconfig bond0
|
||||
bond0 Link encap:Ethernet HWaddr 00:00:00:00:00:00
|
||||
inet addr:10.x.x.x Bcast:10.x.x.255 Mask:255.255.255.0
|
||||
UP BROADCAST RUNNING MASTER MULTICAST MTU:1500 Metric:1
|
||||
RX packets:2990700159 errors:12 dropped:0 overruns:1 frame:6 <--- Look Here! Errors!
|
||||
TX packets:3443518196 errors:0 dropped:0 overruns:0 carrier:0
|
||||
collisions:0 txqueuelen:0
|
||||
RX bytes:2416328868676 (2.4 TB) TX bytes:3464991094001 (3.4 TB)
|
||||
</pre><p>
|
||||
</p><p>These errors immediately lead us to suspect that one or more of the ethernet interfaces might have negotiated the wrong line speed. This was confirmed both by running an ICMP ping
|
||||
from an external host and observing round-trip-time in excess of 700ms, and by running <code class="code">ethtool(8)</code> on the members of the bond interface and discovering that the active interface
|
||||
was operating at 100Mbs/, full duplex.
|
||||
</p><pre class="programlisting">
|
||||
$ sudo ethtool eth0
|
||||
Settings for eth0:
|
||||
Supported ports: [ TP ]
|
||||
Supported link modes: 10baseT/Half 10baseT/Full
|
||||
100baseT/Half 100baseT/Full
|
||||
1000baseT/Full
|
||||
Supports auto-negotiation: Yes
|
||||
Advertised link modes: 10baseT/Half 10baseT/Full
|
||||
100baseT/Half 100baseT/Full
|
||||
1000baseT/Full
|
||||
Advertised pause frame use: No
|
||||
Advertised auto-negotiation: Yes
|
||||
Link partner advertised link modes: Not reported
|
||||
Link partner advertised pause frame use: No
|
||||
Link partner advertised auto-negotiation: No
|
||||
Speed: 100Mb/s <--- Look Here! Should say 1000Mb/s!
|
||||
Duplex: Full
|
||||
Port: Twisted Pair
|
||||
PHYAD: 1
|
||||
Transceiver: internal
|
||||
Auto-negotiation: on
|
||||
MDI-X: Unknown
|
||||
Supports Wake-on: umbg
|
||||
Wake-on: g
|
||||
Current message level: 0x00000003 (3)
|
||||
Link detected: yes
|
||||
</pre><p>
|
||||
</p><p>In normal operation, the ICMP ping round trip time should be around 20ms, and the interface speed and duplex should read, "1000MB/s", and, "Full", respectively.
|
||||
</p></div></div><div class="section" title="13.3.1.4. Resolution"><div class="titlepage"><div><div><h4 class="title"><a name="d2121e8530"></a>13.3.1.4. Resolution</h4></div></div></div><p>After determining that the active ethernet adapter was at the incorrect speed, we used the <code class="code">ifenslave(8)</code> command to make the standby interface
|
||||
the active interface, which yielded an immediate improvement in MapReduce performance, and a 10 times improvement in network throughput:
|
||||
</p><p>On the next trip to the datacenter, we determined that the line speed issue was ultimately caused by a bad network cable, which was replaced.
|
||||
</p></div></div><div class="section" title="13.3.2. Case Study #2 (Performance Research 2012)"><div class="titlepage"><div><div><h3 class="title"><a name="casestudies.perf.1"></a>13.3.2. Case Study #2 (Performance Research 2012)</h3></div></div></div><p>Investigation results of a self-described "we're not sure what's wrong, but it seems slow" problem.
|
||||
<a class="link" href="http://gbif.blogspot.com/2012/03/hbase-performance-evaluation-continued.html" target="_top">http://gbif.blogspot.com/2012/03/hbase-performance-evaluation-continued.html</a>
|
||||
</p></div><div class="section" title="13.3.3. Case Study #3 (Performance Research 2010))"><div class="titlepage"><div><div><h3 class="title"><a name="casestudies.perf.2"></a>13.3.3. Case Study #3 (Performance Research 2010))</h3></div></div></div><p>
|
||||
Investigation results of general cluster performance from 2010. Although this research is on an older version of the codebase, this writeup
|
||||
is still very useful in terms of approach.
|
||||
<a class="link" href="http://hstack.org/hbase-performance-testing/" target="_top">http://hstack.org/hbase-performance-testing/</a>
|
||||
</p></div><div class="section" title="13.3.4. Case Study #4 (xcievers Config)"><div class="titlepage"><div><div><h3 class="title"><a name="casestudies.xceivers"></a>13.3.4. Case Study #4 (xcievers Config)</h3></div></div></div><p>Case study of configuring <code class="code">xceivers</code>, and diagnosing errors from mis-configurations.
|
||||
<a class="link" href="http://www.larsgeorge.com/2012/03/hadoop-hbase-and-xceivers.html" target="_top">http://www.larsgeorge.com/2012/03/hadoop-hbase-and-xceivers.html</a>
|
||||
</p><p>See also <a class="xref" href="configuration.html#dfs.datanode.max.xcievers" title="2.1.3.2. dfs.datanode.max.xcievers">Section 2.1.3.2, “<code class="varname">dfs.datanode.max.xcievers</code>”</a>.
|
||||
</p></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'casestudies.perftroub';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="casestudies.schema.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="casestudies.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="ops_mgt.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">13.2. Schema Design </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> Chapter 14. Apache HBase (TM) Operational Management</td></tr></table></div></body></html>
|
|
@ -1,122 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>13.2. Schema Design</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="casestudies.html" title="Chapter 13. Apache HBase (TM) Case Studies"><link rel="prev" href="casestudies.html" title="Chapter 13. Apache HBase (TM) Case Studies"><link rel="next" href="casestudies.perftroub.html" title="13.3. Performance/Troubleshooting"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">13.2. Schema Design</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="casestudies.html">Prev</a> </td><th width="60%" align="center">Chapter 13. Apache HBase (TM) Case Studies</th><td width="20%" align="right"> <a accesskey="n" href="casestudies.perftroub.html">Next</a></td></tr></table><hr></div><div class="section" title="13.2. Schema Design"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="casestudies.schema"></a>13.2. Schema Design</h2></div></div></div><div class="section" title="13.2.1. List Data"><div class="titlepage"><div><div><h3 class="title"><a name="casestudies.schema.listdata"></a>13.2.1. List Data</h3></div></div></div><p>The following is an exchange from the user dist-list regarding a fairly common question:
|
||||
how to handle per-user list data in Apache HBase.
|
||||
</p><p>*** QUESTION ***</p><p>
|
||||
We're looking at how to store a large amount of (per-user) list data in
|
||||
HBase, and we were trying to figure out what kind of access pattern made
|
||||
the most sense. One option is store the majority of the data in a key, so
|
||||
we could have something like:
|
||||
</p><pre class="programlisting">
|
||||
<FixedWidthUserName><FixedWidthValueId1>:"" (no value)
|
||||
<FixedWidthUserName><FixedWidthValueId2>:"" (no value)
|
||||
<FixedWidthUserName><FixedWidthValueId3>:"" (no value)
|
||||
</pre>
|
||||
|
||||
The other option we had was to do this entirely using:
|
||||
<pre class="programlisting">
|
||||
<FixedWidthUserName><FixedWidthPageNum0>:<FixedWidthLength><FixedIdNextPageNum><ValueId1><ValueId2><ValueId3>...
|
||||
<FixedWidthUserName><FixedWidthPageNum1>:<FixedWidthLength><FixedIdNextPageNum><ValueId1><ValueId2><ValueId3>...
|
||||
</pre><p>
|
||||
where each row would contain multiple values.
|
||||
So in one case reading the first thirty values would be:
|
||||
</p><pre class="programlisting">
|
||||
scan { STARTROW => 'FixedWidthUsername' LIMIT => 30}
|
||||
</pre>
|
||||
And in the second case it would be
|
||||
<pre class="programlisting">
|
||||
get 'FixedWidthUserName\x00\x00\x00\x00'
|
||||
</pre><p>
|
||||
The general usage pattern would be to read only the first 30 values of
|
||||
these lists, with infrequent access reading deeper into the lists. Some
|
||||
users would have <= 30 total values in these lists, and some users would
|
||||
have millions (i.e. power-law distribution)
|
||||
</p><p>
|
||||
The single-value format seems like it would take up more space on HBase,
|
||||
but would offer some improved retrieval / pagination flexibility. Would
|
||||
there be any significant performance advantages to be able to paginate via
|
||||
gets vs paginating with scans?
|
||||
</p><p>
|
||||
My initial understanding was that doing a scan should be faster if our
|
||||
paging size is unknown (and caching is set appropriately), but that gets
|
||||
should be faster if we'll always need the same page size. I've ended up
|
||||
hearing different people tell me opposite things about performance. I
|
||||
assume the page sizes would be relatively consistent, so for most use cases
|
||||
we could guarantee that we only wanted one page of data in the
|
||||
fixed-page-length case. I would also assume that we would have infrequent
|
||||
updates, but may have inserts into the middle of these lists (meaning we'd
|
||||
need to update all subsequent rows).
|
||||
</p><p>
|
||||
Thanks for help / suggestions / follow-up questions.
|
||||
</p><p>*** ANSWER ***</p><p>
|
||||
If I understand you correctly, you're ultimately trying to store
|
||||
triples in the form "user, valueid, value", right? E.g., something
|
||||
like:
|
||||
</p><pre class="programlisting">
|
||||
"user123, firstname, Paul",
|
||||
"user234, lastname, Smith"
|
||||
</pre><p>
|
||||
(But the usernames are fixed width, and the valueids are fixed width).
|
||||
</p><p>
|
||||
And, your access pattern is along the lines of: "for user X, list the
|
||||
next 30 values, starting with valueid Y". Is that right? And these
|
||||
values should be returned sorted by valueid?
|
||||
</p><p>
|
||||
The tl;dr version is that you should probably go with one row per
|
||||
user+value, and not build a complicated intra-row pagination scheme on
|
||||
your own unless you're really sure it is needed.
|
||||
</p><p>
|
||||
Your two options mirror a common question people have when designing
|
||||
HBase schemas: should I go "tall" or "wide"? Your first schema is
|
||||
"tall": each row represents one value for one user, and so there are
|
||||
many rows in the table for each user; the row key is user + valueid,
|
||||
and there would be (presumably) a single column qualifier that means
|
||||
"the value". This is great if you want to scan over rows in sorted
|
||||
order by row key (thus my question above, about whether these ids are
|
||||
sorted correctly). You can start a scan at any user+valueid, read the
|
||||
next 30, and be done. What you're giving up is the ability to have
|
||||
transactional guarantees around all the rows for one user, but it
|
||||
doesn't sound like you need that. Doing it this way is generally
|
||||
recommended (see
|
||||
here <a class="link" href="http://hbase.apache.org/book.html#schema.smackdown" target="_top">http://hbase.apache.org/book.html#schema.smackdown</a>).
|
||||
</p><p>
|
||||
Your second option is "wide": you store a bunch of values in one row,
|
||||
using different qualifiers (where the qualifier is the valueid). The
|
||||
simple way to do that would be to just store ALL values for one user
|
||||
in a single row. I'm guessing you jumped to the "paginated" version
|
||||
because you're assuming that storing millions of columns in a single
|
||||
row would be bad for performance, which may or may not be true; as
|
||||
long as you're not trying to do too much in a single request, or do
|
||||
things like scanning over and returning all of the cells in the row,
|
||||
it shouldn't be fundamentally worse. The client has methods that allow
|
||||
you to get specific slices of columns.
|
||||
</p><p>
|
||||
Note that neither case fundamentally uses more disk space than the
|
||||
other; you're just "shifting" part of the identifying information for
|
||||
a value either to the left (into the row key, in option one) or to the
|
||||
right (into the column qualifiers in option 2). Under the covers,
|
||||
every key/value still stores the whole row key, and column family
|
||||
name. (If this is a bit confusing, take an hour and watch Lars
|
||||
George's excellent video about understanding HBase schema design:
|
||||
<a class="link" href="http://www.youtube.com/watch?v=_HLoH_PgrLk)" target="_top">http://www.youtube.com/watch?v=_HLoH_PgrLk)</a>.
|
||||
</p><p>
|
||||
A manually paginated version has lots more complexities, as you note,
|
||||
like having to keep track of how many things are in each page,
|
||||
re-shuffling if new values are inserted, etc. That seems significantly
|
||||
more complex. It might have some slight speed advantages (or
|
||||
disadvantages!) at extremely high throughput, and the only way to
|
||||
really know that would be to try it out. If you don't have time to
|
||||
build it both ways and compare, my advice would be to start with the
|
||||
simplest option (one row per user+value). Start simple and iterate! :)
|
||||
</p></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'casestudies.schema';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="casestudies.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="casestudies.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="casestudies.perftroub.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">Chapter 13. Apache HBase (TM) Case Studies </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> 13.3. Performance/Troubleshooting</td></tr></table></div></body></html>
|
|
@ -1,16 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>5.6. Cells</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="datamodel.html" title="Chapter 5. Data Model"><link rel="prev" href="columnfamily.html" title="5.5. Column Family"><link rel="next" href="data_model_operations.html" title="5.7. Data Model Operations"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">5.6. Cells</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="columnfamily.html">Prev</a> </td><th width="60%" align="center">Chapter 5. Data Model</th><td width="20%" align="right"> <a accesskey="n" href="data_model_operations.html">Next</a></td></tr></table><hr></div><div class="section" title="5.6. Cells"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="cells"></a>5.6. Cells<a class="indexterm" name="d2121e3180"></a></h2></div></div></div><p>A <span class="emphasis"><em>{row, column, version} </em></span>tuple exactly
|
||||
specifies a <code class="literal">cell</code> in HBase.
|
||||
Cell content is uninterrpreted bytes</p></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'cells';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="columnfamily.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="datamodel.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="data_model_operations.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">5.5. Column Family </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> 5.7. Data Model Operations</td></tr></table></div></body></html>
|
|
@ -1,29 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>6.8. Keeping Deleted Cells</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="schema.html" title="Chapter 6. HBase and Schema Design"><link rel="prev" href="ttl.html" title="6.7. Time To Live (TTL)"><link rel="next" href="secondary.indexes.html" title="6.9. Secondary Indexes and Alternate Query Paths"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">6.8.
|
||||
Keeping Deleted Cells
|
||||
</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="ttl.html">Prev</a> </td><th width="60%" align="center">Chapter 6. HBase and Schema Design</th><td width="20%" align="right"> <a accesskey="n" href="secondary.indexes.html">Next</a></td></tr></table><hr></div><div class="section" title="6.8. Keeping Deleted Cells"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="cf.keep.deleted"></a>6.8.
|
||||
Keeping Deleted Cells
|
||||
</h2></div></div></div><p>ColumnFamilies can optionally keep deleted cells. That means deleted cells can still be retrieved with
|
||||
<a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Get.html" target="_top">Get</a> or
|
||||
<a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Scan.html" target="_top">Scan</a> operations,
|
||||
as long these operations have a time range specified that ends before the timestamp of any delete that would affect the cells.
|
||||
This allows for point in time queries even in the presence of deletes.
|
||||
</p><p>
|
||||
Deleted cells are still subject to TTL and there will never be more than "maximum number of versions" deleted cells.
|
||||
A new "raw" scan options returns all deleted rows and the delete markers.
|
||||
</p><p>See <a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/HColumnDescriptor.html" target="_top">HColumnDescriptor</a> for more information.
|
||||
</p></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'cf.keep.deleted';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="ttl.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="schema.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="secondary.indexes.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">6.7. Time To Live (TTL) </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> 6.9.
|
||||
Secondary Indexes and Alternate Query Paths
|
||||
</td></tr></table></div></body></html>
|
|
@ -1,20 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>C.6. Changing Compression Schemes</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="compression.html" title="Appendix C. Compression In HBase"><link rel="prev" href="snappy.compression.html" title="C.5. SNAPPY"><link rel="next" href="apd.html" title="Appendix D. YCSB: The Yahoo! Cloud Serving Benchmark and HBase"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">C.6. Changing Compression Schemes</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="snappy.compression.html">Prev</a> </td><th width="60%" align="center">Appendix C. Compression In HBase</th><td width="20%" align="right"> <a accesskey="n" href="apd.html">Next</a></td></tr></table><hr></div><div class="section" title="C.6. Changing Compression Schemes"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="changing.compression"></a>C.6. Changing Compression Schemes</h2></div></div></div><p>A frequent question on the dist-list is how to change compression schemes for ColumnFamilies. This is actually quite simple,
|
||||
and can be done via an alter command. Because the compression scheme is encoded at the block-level in StoreFiles, the table does
|
||||
<span class="emphasis"><em>not</em></span> need to be re-created and the data does <span class="emphasis"><em>not</em></span> copied somewhere else. Just make sure
|
||||
the old codec is still available until you are sure that all of the old StoreFiles have been compacted.
|
||||
</p></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'changing.compression';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="snappy.compression.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="compression.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="apd.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">C.5.
|
||||
SNAPPY
|
||||
</td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> Appendix D. YCSB: The Yahoo! Cloud Serving Benchmark and HBase</td></tr></table></div></body></html>
|
|
@ -1,158 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>9.4. Client Request Filters</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="architecture.html" title="Chapter 9. Architecture"><link rel="prev" href="client.html" title="9.3. Client"><link rel="next" href="master.html" title="9.5. Master"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">9.4. Client Request Filters</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="client.html">Prev</a> </td><th width="60%" align="center">Chapter 9. Architecture</th><td width="20%" align="right"> <a accesskey="n" href="master.html">Next</a></td></tr></table><hr></div><div class="section" title="9.4. Client Request Filters"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="client.filter"></a>9.4. Client Request Filters</h2></div></div></div><p><a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Get.html" target="_top">Get</a> and <a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Scan.html" target="_top">Scan</a> instances can be
|
||||
optionally configured with <a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/filter/Filter.html" target="_top">filters</a> which are applied on the RegionServer.
|
||||
</p><p>Filters can be confusing because there are many different types, and it is best to approach them by understanding the groups
|
||||
of Filter functionality.
|
||||
</p><div class="section" title="9.4.1. Structural"><div class="titlepage"><div><div><h3 class="title"><a name="client.filter.structural"></a>9.4.1. Structural</h3></div></div></div><p>Structural Filters contain other Filters.</p><div class="section" title="9.4.1.1. FilterList"><div class="titlepage"><div><div><h4 class="title"><a name="client.filter.structural.fl"></a>9.4.1.1. FilterList</h4></div></div></div><p><a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/filter/FilterList.html" target="_top">FilterList</a>
|
||||
represents a list of Filters with a relationship of <code class="code">FilterList.Operator.MUST_PASS_ALL</code> or
|
||||
<code class="code">FilterList.Operator.MUST_PASS_ONE</code> between the Filters. The following example shows an 'or' between two
|
||||
Filters (checking for either 'my value' or 'my other value' on the same attribute).
|
||||
</p><pre class="programlisting">
|
||||
FilterList list = new FilterList(FilterList.Operator.MUST_PASS_ONE);
|
||||
SingleColumnValueFilter filter1 = new SingleColumnValueFilter(
|
||||
cf,
|
||||
column,
|
||||
CompareOp.EQUAL,
|
||||
Bytes.toBytes("my value")
|
||||
);
|
||||
list.add(filter1);
|
||||
SingleColumnValueFilter filter2 = new SingleColumnValueFilter(
|
||||
cf,
|
||||
column,
|
||||
CompareOp.EQUAL,
|
||||
Bytes.toBytes("my other value")
|
||||
);
|
||||
list.add(filter2);
|
||||
scan.setFilter(list);
|
||||
</pre><p>
|
||||
</p></div></div><div class="section" title="9.4.2. Column Value"><div class="titlepage"><div><div><h3 class="title"><a name="client.filter.cv"></a>9.4.2. Column Value</h3></div></div></div><div class="section" title="9.4.2.1. SingleColumnValueFilter"><div class="titlepage"><div><div><h4 class="title"><a name="client.filter.cv.scvf"></a>9.4.2.1. SingleColumnValueFilter</h4></div></div></div><p><a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/filter/SingleColumnValueFilter.html" target="_top">SingleColumnValueFilter</a>
|
||||
can be used to test column values for equivalence (<code class="code"><a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/filter/CompareFilter.CompareOp.html" target="_top">CompareOp.EQUAL</a>
|
||||
</code>), inequality (<code class="code">CompareOp.NOT_EQUAL</code>), or ranges
|
||||
(e.g., <code class="code">CompareOp.GREATER</code>). The folowing is example of testing equivalence a column to a String value "my value"...
|
||||
</p><pre class="programlisting">
|
||||
SingleColumnValueFilter filter = new SingleColumnValueFilter(
|
||||
cf,
|
||||
column,
|
||||
CompareOp.EQUAL,
|
||||
Bytes.toBytes("my value")
|
||||
);
|
||||
scan.setFilter(filter);
|
||||
</pre><p>
|
||||
</p></div></div><div class="section" title="9.4.3. Column Value Comparators"><div class="titlepage"><div><div><h3 class="title"><a name="client.filter.cvp"></a>9.4.3. Column Value Comparators</h3></div></div></div><p>There are several Comparator classes in the Filter package that deserve special mention.
|
||||
These Comparators are used in concert with other Filters, such as <a class="xref" href="client.filter.html#client.filter.cv.scvf" title="9.4.2.1. SingleColumnValueFilter">Section 9.4.2.1, “SingleColumnValueFilter”</a>.
|
||||
</p><div class="section" title="9.4.3.1. RegexStringComparator"><div class="titlepage"><div><div><h4 class="title"><a name="client.filter.cvp.rcs"></a>9.4.3.1. RegexStringComparator</h4></div></div></div><p><a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/filter/RegexStringComparator.html" target="_top">RegexStringComparator</a>
|
||||
supports regular expressions for value comparisons.
|
||||
</p><pre class="programlisting">
|
||||
RegexStringComparator comp = new RegexStringComparator("my."); // any value that starts with 'my'
|
||||
SingleColumnValueFilter filter = new SingleColumnValueFilter(
|
||||
cf,
|
||||
column,
|
||||
CompareOp.EQUAL,
|
||||
comp
|
||||
);
|
||||
scan.setFilter(filter);
|
||||
</pre><p>
|
||||
See the Oracle JavaDoc for <a class="link" href="http://download.oracle.com/javase/6/docs/api/java/util/regex/Pattern.html" target="_top">supported RegEx patterns in Java</a>.
|
||||
</p></div><div class="section" title="9.4.3.2. SubstringComparator"><div class="titlepage"><div><div><h4 class="title"><a name="client.filter.cvp.rcs"></a>9.4.3.2. SubstringComparator</h4></div></div></div><p><a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/filter/SubstringComparator.html" target="_top">SubstringComparator</a>
|
||||
can be used to determine if a given substring exists in a value. The comparison is case-insensitive.
|
||||
</p><pre class="programlisting">
|
||||
SubstringComparator comp = new SubstringComparator("y val"); // looking for 'my value'
|
||||
SingleColumnValueFilter filter = new SingleColumnValueFilter(
|
||||
cf,
|
||||
column,
|
||||
CompareOp.EQUAL,
|
||||
comp
|
||||
);
|
||||
scan.setFilter(filter);
|
||||
</pre></div><div class="section" title="9.4.3.3. BinaryPrefixComparator"><div class="titlepage"><div><div><h4 class="title"><a name="client.filter.cvp.bfp"></a>9.4.3.3. BinaryPrefixComparator</h4></div></div></div><p>See <a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/filter/BinaryPrefixComparator.html" target="_top">BinaryPrefixComparator</a>.</p></div><div class="section" title="9.4.3.4. BinaryComparator"><div class="titlepage"><div><div><h4 class="title"><a name="client.filter.cvp.bc"></a>9.4.3.4. BinaryComparator</h4></div></div></div><p>See <a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/filter/BinaryComparator.html" target="_top">BinaryComparator</a>.</p></div></div><div class="section" title="9.4.4. KeyValue Metadata"><div class="titlepage"><div><div><h3 class="title"><a name="client.filter.kvm"></a>9.4.4. KeyValue Metadata</h3></div></div></div><p>As HBase stores data internally as KeyValue pairs, KeyValue Metadata Filters evaluate the existence of keys (i.e., ColumnFamily:Column qualifiers)
|
||||
for a row, as opposed to values the previous section.
|
||||
</p><div class="section" title="9.4.4.1. FamilyFilter"><div class="titlepage"><div><div><h4 class="title"><a name="client.filter.kvm.ff"></a>9.4.4.1. FamilyFilter</h4></div></div></div><p><a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/filter/FamilyFilter.html" target="_top">FamilyFilter</a> can be used
|
||||
to filter on the ColumnFamily. It is generally a better idea to select ColumnFamilies in the Scan than to do it with a Filter.</p></div><div class="section" title="9.4.4.2. QualifierFilter"><div class="titlepage"><div><div><h4 class="title"><a name="client.filter.kvm.qf"></a>9.4.4.2. QualifierFilter</h4></div></div></div><p><a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/filter/QualifierFilter.html" target="_top">QualifierFilter</a> can be used
|
||||
to filter based on Column (aka Qualifier) name.
|
||||
</p></div><div class="section" title="9.4.4.3. ColumnPrefixFilter"><div class="titlepage"><div><div><h4 class="title"><a name="client.filter.kvm.cpf"></a>9.4.4.3. ColumnPrefixFilter</h4></div></div></div><p><a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/filter/ColumnPrefixFilter.html" target="_top">ColumnPrefixFilter</a> can be used
|
||||
to filter based on the lead portion of Column (aka Qualifier) names.
|
||||
</p><p>A ColumnPrefixFilter seeks ahead to the first column matching the prefix in each row and for each involved column family. It can be used to efficiently
|
||||
get a subset of the columns in very wide rows.
|
||||
</p><p>Note: The same column qualifier can be used in different column families. This filter returns all matching columns.
|
||||
</p><p>Example: Find all columns in a row and family that start with "abc"
|
||||
</p><pre class="programlisting">
|
||||
HTableInterface t = ...;
|
||||
byte[] row = ...;
|
||||
byte[] family = ...;
|
||||
byte[] prefix = Bytes.toBytes("abc");
|
||||
Scan scan = new Scan(row, row); // (optional) limit to one row
|
||||
scan.addFamily(family); // (optional) limit to one family
|
||||
Filter f = new ColumnPrefixFilter(prefix);
|
||||
scan.setFilter(f);
|
||||
scan.setBatch(10); // set this if there could be many columns returned
|
||||
ResultScanner rs = t.getScanner(scan);
|
||||
for (Result r = rs.next(); r != null; r = rs.next()) {
|
||||
for (KeyValue kv : r.raw()) {
|
||||
// each kv represents a column
|
||||
}
|
||||
}
|
||||
rs.close();
|
||||
</pre><p>
|
||||
</p></div><div class="section" title="9.4.4.4. MultipleColumnPrefixFilter"><div class="titlepage"><div><div><h4 class="title"><a name="client.filter.kvm.mcpf"></a>9.4.4.4. MultipleColumnPrefixFilter</h4></div></div></div><p><a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/filter/MultipleColumnPrefixFilter.html" target="_top">MultipleColumnPrefixFilter</a> behaves like ColumnPrefixFilter
|
||||
but allows specifying multiple prefixes.
|
||||
</p><p>Like ColumnPrefixFilter, MultipleColumnPrefixFilter efficiently seeks ahead to the first column matching the lowest prefix and also seeks past ranges of columns between prefixes.
|
||||
It can be used to efficiently get discontinuous sets of columns from very wide rows.
|
||||
</p><p>Example: Find all columns in a row and family that start with "abc" or "xyz"
|
||||
</p><pre class="programlisting">
|
||||
HTableInterface t = ...;
|
||||
byte[] row = ...;
|
||||
byte[] family = ...;
|
||||
byte[][] prefixes = new byte[][] {Bytes.toBytes("abc"), Bytes.toBytes("xyz")};
|
||||
Scan scan = new Scan(row, row); // (optional) limit to one row
|
||||
scan.addFamily(family); // (optional) limit to one family
|
||||
Filter f = new MultipleColumnPrefixFilter(prefixes);
|
||||
scan.setFilter(f);
|
||||
scan.setBatch(10); // set this if there could be many columns returned
|
||||
ResultScanner rs = t.getScanner(scan);
|
||||
for (Result r = rs.next(); r != null; r = rs.next()) {
|
||||
for (KeyValue kv : r.raw()) {
|
||||
// each kv represents a column
|
||||
}
|
||||
}
|
||||
rs.close();
|
||||
</pre><p>
|
||||
</p></div><div class="section" title="9.4.4.5. ColumnRangeFilter"><div class="titlepage"><div><div><h4 class="title"><a name="client.filter.kvm.crf "></a>9.4.4.5. ColumnRangeFilter</h4></div></div></div><p>A <a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/filter/ColumnRangeFilter.html" target="_top">ColumnRangeFilter</a> allows efficient intra row scanning.
|
||||
</p><p>A ColumnRangeFilter can seek ahead to the first matching column for each involved column family. It can be used to efficiently
|
||||
get a 'slice' of the columns of a very wide row.
|
||||
i.e. you have a million columns in a row but you only want to look at columns bbbb-bbdd.
|
||||
</p><p>Note: The same column qualifier can be used in different column families. This filter returns all matching columns.
|
||||
</p><p>Example: Find all columns in a row and family between "bbbb" (inclusive) and "bbdd" (inclusive)
|
||||
</p><pre class="programlisting">
|
||||
HTableInterface t = ...;
|
||||
byte[] row = ...;
|
||||
byte[] family = ...;
|
||||
byte[] startColumn = Bytes.toBytes("bbbb");
|
||||
byte[] endColumn = Bytes.toBytes("bbdd");
|
||||
Scan scan = new Scan(row, row); // (optional) limit to one row
|
||||
scan.addFamily(family); // (optional) limit to one family
|
||||
Filter f = new ColumnRangeFilter(startColumn, true, endColumn, true);
|
||||
scan.setFilter(f);
|
||||
scan.setBatch(10); // set this if there could be many columns returned
|
||||
ResultScanner rs = t.getScanner(scan);
|
||||
for (Result r = rs.next(); r != null; r = rs.next()) {
|
||||
for (KeyValue kv : r.raw()) {
|
||||
// each kv represents a column
|
||||
}
|
||||
}
|
||||
rs.close();
|
||||
</pre><p>
|
||||
</p><p>Note: Introduced in HBase 0.92</p></div></div><div class="section" title="9.4.5. RowKey"><div class="titlepage"><div><div><h3 class="title"><a name="client.filter.row"></a>9.4.5. RowKey</h3></div></div></div><div class="section" title="9.4.5.1. RowFilter"><div class="titlepage"><div><div><h4 class="title"><a name="client.filter.row.rf"></a>9.4.5.1. RowFilter</h4></div></div></div><p>It is generally a better idea to use the startRow/stopRow methods on Scan for row selection, however
|
||||
<a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/filter/RowFilter.html" target="_top">RowFilter</a> can also be used.</p></div></div><div class="section" title="9.4.6. Utility"><div class="titlepage"><div><div><h3 class="title"><a name="client.filter.utility"></a>9.4.6. Utility</h3></div></div></div><div class="section" title="9.4.6.1. FirstKeyOnlyFilter"><div class="titlepage"><div><div><h4 class="title"><a name="client.filter.utility.fkof"></a>9.4.6.1. FirstKeyOnlyFilter</h4></div></div></div><p>This is primarily used for rowcount jobs.
|
||||
See <a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/filter/FirstKeyOnlyFilter.html" target="_top">FirstKeyOnlyFilter</a>.</p></div></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'client.filter';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="client.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="architecture.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="master.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">9.3. Client </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> 9.5. Master</td></tr></table></div></body></html>
|
|
@ -1,66 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>9.3. Client</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="architecture.html" title="Chapter 9. Architecture"><link rel="prev" href="arch.catalog.html" title="9.2. Catalog Tables"><link rel="next" href="client.filter.html" title="9.4. Client Request Filters"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">9.3. Client</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="arch.catalog.html">Prev</a> </td><th width="60%" align="center">Chapter 9. Architecture</th><td width="20%" align="right"> <a accesskey="n" href="client.filter.html">Next</a></td></tr></table><hr></div><div class="section" title="9.3. Client"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="client"></a>9.3. Client</h2></div></div></div><p>The HBase client
|
||||
<a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/HTable.html" target="_top">HTable</a>
|
||||
is responsible for finding RegionServers that are serving the
|
||||
particular row range of interest. It does this by querying
|
||||
the <code class="code">.META.</code> and <code class="code">-ROOT-</code> catalog tables
|
||||
(TODO: Explain). After locating the required
|
||||
region(s), the client <span class="emphasis"><em>directly</em></span> contacts
|
||||
the RegionServer serving that region (i.e., it does not go
|
||||
through the master) and issues the read or write request.
|
||||
This information is cached in the client so that subsequent requests
|
||||
need not go through the lookup process. Should a region be reassigned
|
||||
either by the master load balancer or because a RegionServer has died,
|
||||
the client will requery the catalog tables to determine the new
|
||||
location of the user region.
|
||||
</p><p>See <a class="xref" href="master.html#master.runtime" title="9.5.2. Runtime Impact">Section 9.5.2, “Runtime Impact”</a> for more information about the impact of the Master on HBase Client
|
||||
communication.
|
||||
</p><p>Administrative functions are handled through <a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/HBaseAdmin.html" target="_top">HBaseAdmin</a>
|
||||
</p><div class="section" title="9.3.1. Connections"><div class="titlepage"><div><div><h3 class="title"><a name="client.connections"></a>9.3.1. Connections</h3></div></div></div><p>For connection configuration information, see <a class="xref" href="config.files.html#client_dependencies" title="2.3.4. Client configuration and dependencies connecting to an HBase cluster">Section 2.3.4, “Client configuration and dependencies connecting to an HBase cluster”</a>.
|
||||
</p><p><a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/HTable.html" target="_top">HTable</a>
|
||||
instances are not thread-safe. When creating HTable instances, it is advisable to use the same <a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/HBaseConfiguration" target="_top">HBaseConfiguration</a>
|
||||
instance. This will ensure sharing of ZooKeeper and socket instances to the RegionServers
|
||||
which is usually what you want. For example, this is preferred:
|
||||
</p><pre class="programlisting">HBaseConfiguration conf = HBaseConfiguration.create();
|
||||
HTable table1 = new HTable(conf, "myTable");
|
||||
HTable table2 = new HTable(conf, "myTable");</pre><p>
|
||||
as opposed to this:
|
||||
</p><pre class="programlisting">HBaseConfiguration conf1 = HBaseConfiguration.create();
|
||||
HTable table1 = new HTable(conf1, "myTable");
|
||||
HBaseConfiguration conf2 = HBaseConfiguration.create();
|
||||
HTable table2 = new HTable(conf2, "myTable");</pre><p>
|
||||
For more information about how connections are handled in the HBase client,
|
||||
see <a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/HConnectionManager.html" target="_top">HConnectionManager</a>.
|
||||
</p><div class="section" title="9.3.1.1. Connection Pooling"><div class="titlepage"><div><div><h4 class="title"><a name="client.connection.pooling"></a>9.3.1.1. Connection Pooling</h4></div></div></div><p>For applications which require high-end multithreaded access (e.g., web-servers or application servers that may serve many application threads
|
||||
in a single JVM), see <a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/HTablePool.html" target="_top">HTablePool</a>.
|
||||
</p></div></div><div class="section" title="9.3.2. WriteBuffer and Batch Methods"><div class="titlepage"><div><div><h3 class="title"><a name="client.writebuffer"></a>9.3.2. WriteBuffer and Batch Methods</h3></div></div></div><p>If <a class="xref" href="perf.writing.html#perf.hbase.client.autoflush" title="11.7.4. HBase Client: AutoFlush">Section 11.7.4, “HBase Client: AutoFlush”</a> is turned off on
|
||||
<a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/HTable.html" target="_top">HTable</a>,
|
||||
<code class="classname">Put</code>s are sent to RegionServers when the writebuffer
|
||||
is filled. The writebuffer is 2MB by default. Before an HTable instance is
|
||||
discarded, either <code class="methodname">close()</code> or
|
||||
<code class="methodname">flushCommits()</code> should be invoked so Puts
|
||||
will not be lost.
|
||||
</p><p>Note: <code class="code">htable.delete(Delete);</code> does not go in the writebuffer! This only applies to Puts.
|
||||
</p><p>For additional information on write durability, review the <a class="link" href="acid-semantics.html" target="_top">ACID semantics</a> page.
|
||||
</p><p>For fine-grained control of batching of
|
||||
<code class="classname">Put</code>s or <code class="classname">Delete</code>s,
|
||||
see the <a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/HTable.html#batch%28java.util.List%29" target="_top">batch</a> methods on HTable.
|
||||
</p></div><div class="section" title="9.3.3. External Clients"><div class="titlepage"><div><div><h3 class="title"><a name="client.external"></a>9.3.3. External Clients</h3></div></div></div><p>Information on non-Java clients and custom protocols is covered in <a class="xref" href="external_apis.html" title="Chapter 10. Apache HBase (TM) External APIs">Chapter 10, <i>Apache HBase (TM) External APIs</i></a>
|
||||
</p></div><div class="section" title="9.3.4. RowLocks"><div class="titlepage"><div><div><h3 class="title"><a name="client.rowlocks"></a>9.3.4. RowLocks</h3></div></div></div><p><a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/HTable.html#lockRow%28byte[]%29" target="_top">RowLocks</a> are still
|
||||
in the client API <span class="emphasis"><em>however</em></span> they are discouraged because if not managed properly these can
|
||||
lock up the RegionServers.
|
||||
</p><p>There is an oustanding ticket <a class="link" href="https://issues.apache.org/jira/browse/HBASE-2332" target="_top">HBASE-2332</a> to
|
||||
remove this feature from the client.
|
||||
</p></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'client';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="arch.catalog.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="architecture.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="client.filter.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">9.2. Catalog Tables </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> 9.4. Client Request Filters</td></tr></table></div></body></html>
|
|
@ -1,15 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>14.6. Cluster Replication</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="ops_mgt.html" title="Chapter 14. Apache HBase (TM) Operational Management"><link rel="prev" href="ops.monitoring.html" title="14.5. HBase Monitoring"><link rel="next" href="ops.backup.html" title="14.7. HBase Backup"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">14.6. Cluster Replication</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="ops.monitoring.html">Prev</a> </td><th width="60%" align="center">Chapter 14. Apache HBase (TM) Operational Management</th><td width="20%" align="right"> <a accesskey="n" href="ops.backup.html">Next</a></td></tr></table><hr></div><div class="section" title="14.6. Cluster Replication"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="cluster_replication"></a>14.6. Cluster Replication</h2></div></div></div><p>See <a class="link" href="http://hbase.apache.org/replication.html" target="_top">Cluster Replication</a>.
|
||||
</p></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'cluster_replication';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="ops.monitoring.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="ops_mgt.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="ops.backup.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">14.5. HBase Monitoring </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> 14.7. HBase Backup</td></tr></table></div></body></html>
|
|
@ -1,32 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>5.5. Column Family</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="datamodel.html" title="Chapter 5. Data Model"><link rel="prev" href="row.html" title="5.4. Row"><link rel="next" href="cells.html" title="5.6. Cells"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">5.5. Column Family</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="row.html">Prev</a> </td><th width="60%" align="center">Chapter 5. Data Model</th><td width="20%" align="right"> <a accesskey="n" href="cells.html">Next</a></td></tr></table><hr></div><div class="section" title="5.5. Column Family"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="columnfamily"></a>5.5. Column Family<a class="indexterm" name="d2121e3141"></a></h2></div></div></div><p>
|
||||
Columns in Apache HBase are grouped into <span class="emphasis"><em>column families</em></span>.
|
||||
All column members of a column family have the same prefix. For example, the
|
||||
columns <span class="emphasis"><em>courses:history</em></span> and
|
||||
<span class="emphasis"><em>courses:math</em></span> are both members of the
|
||||
<span class="emphasis"><em>courses</em></span> column family.
|
||||
The colon character (<code class="literal">:</code>) delimits the column family from the
|
||||
<a class="indexterm" name="d2121e3161"></a>.
|
||||
The column family prefix must be composed of
|
||||
<span class="emphasis"><em>printable</em></span> characters. The qualifying tail, the
|
||||
column family <span class="emphasis"><em>qualifier</em></span>, can be made of any
|
||||
arbitrary bytes. Column families must be declared up front
|
||||
at schema definition time whereas columns do not need to be
|
||||
defined at schema time but can be conjured on the fly while
|
||||
the table is up an running.</p><p>Physically, all column family members are stored together on the
|
||||
filesystem. Because tunings and
|
||||
storage specifications are done at the column family level, it is
|
||||
advised that all column family members have the same general access
|
||||
pattern and size characteristics.</p><p></p></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'columnfamily';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="row.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="datamodel.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="cells.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">5.4. Row </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> 5.6. Cells</td></tr></table></div></body></html>
|
|
@ -1,41 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>Chapter 17. Community</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="prev" href="zk.sasl.auth.html" title="16.2. SASL Authentication with ZooKeeper"><link rel="next" href="community.roles.html" title="17.2. Community Roles"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">Chapter 17. Community</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="zk.sasl.auth.html">Prev</a> </td><th width="60%" align="center"> </th><td width="20%" align="right"> <a accesskey="n" href="community.roles.html">Next</a></td></tr></table><hr></div><div class="chapter" title="Chapter 17. Community"><div class="titlepage"><div><div><h2 class="title"><a name="community"></a>Chapter 17. Community</h2></div></div></div><div class="toc"><p><b>Table of Contents</b></p><dl><dt><span class="section"><a href="community.html#decisions">17.1. Decisions</a></span></dt><dd><dl><dt><span class="section"><a href="community.html#feature_branches">17.1.1. Feature Branches</a></span></dt><dt><span class="section"><a href="community.html#patchplusonepolicy">17.1.2. Patch +1 Policy</a></span></dt></dl></dd><dt><span class="section"><a href="community.roles.html">17.2. Community Roles</a></span></dt><dd><dl><dt><span class="section"><a href="community.roles.html#OWNER">17.2.1. Component Owner</a></span></dt></dl></dd></dl></div><div class="section" title="17.1. Decisions"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="decisions"></a>17.1. Decisions</h2></div></div></div><div class="section" title="17.1.1. Feature Branches"><div class="titlepage"><div><div><h3 class="title"><a name="feature_branches"></a>17.1.1. Feature Branches</h3></div></div></div><p>Feature Branches are easy to make. You do not have to be a committer to make one. Just request the name of your branch be added to JIRA up on the
|
||||
developer's mailing list and a committer will add it for you. Thereafter you can file issues against your feature branch in Apache HBase (TM) JIRA. Your code you
|
||||
keep elsewhere -- it should be public so it can be observed -- and you can update dev mailing list on progress. When the feature is ready for commit,
|
||||
3 +1s from committers will get your feature merged<sup>[<a name="d2121e11103" href="#ftn.d2121e11103" class="footnote">34</a>]</sup>
|
||||
</p></div><div class="section" title="17.1.2. Patch +1 Policy"><div class="titlepage"><div><div><h3 class="title"><a name="patchplusonepolicy"></a>17.1.2. Patch +1 Policy</h3></div></div></div><p>
|
||||
The below policy is something we put in place 09/2012. It is a
|
||||
suggested policy rather than a hard requirement. We want to try it
|
||||
first to see if it works before we cast it in stone.
|
||||
</p><p>
|
||||
Apache HBase is made of
|
||||
<a class="link" href="https://issues.apache.org/jira/browse/HBASE#selectedTab=com.atlassian.jira.plugin.system.project%3Acomponents-panel" target="_top">components</a>.
|
||||
Components have one or more <a class="xref" href="community.roles.html#OWNER" title="17.2.1. Component Owner">Section 17.2.1, “Component Owner”</a>s. See the 'Description' field on the
|
||||
<a class="link" href="https://issues.apache.org/jira/browse/HBASE#selectedTab=com.atlassian.jira.plugin.system.project%3Acomponents-panel" target="_top">components</a>
|
||||
JIRA page for who the current owners are by component.
|
||||
</p><p>
|
||||
Patches that fit within the scope of a single Apache HBase component require,
|
||||
at least, a +1 by one of the component's owners before commit. If
|
||||
owners are absent -- busy or otherwise -- two +1s by non-owners will
|
||||
suffice.
|
||||
</p><p>
|
||||
Patches that span components need at least two +1s before they can be
|
||||
committed, preferably +1s by owners of components touched by the
|
||||
x-component patch (TODO: This needs tightening up but I think fine for
|
||||
first pass).
|
||||
</p><p>
|
||||
Any -1 on a patch by anyone vetos a patch; it cannot be committed
|
||||
until the justification for the -1 is addressed.
|
||||
</p></div></div><div class="footnotes"><br><hr width="100" align="left"><div class="footnote"><p><sup>[<a id="ftn.d2121e11103" href="#d2121e11103" class="para">34</a>] </sup>See <a class="link" href="http://search-hadoop.com/m/asM982C5FkS1" target="_top">HBase, mail # dev - Thoughts about large feature dev branches</a></p></div></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'community';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="zk.sasl.auth.html">Prev</a> </td><td width="20%" align="center"> </td><td width="40%" align="right"> <a accesskey="n" href="community.roles.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">16.2. SASL Authentication with ZooKeeper </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> 17.2. Community Roles</td></tr></table></div></body></html>
|
|
@ -1,34 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>17.2. Community Roles</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="community.html" title="Chapter 17. Community"><link rel="prev" href="community.html" title="Chapter 17. Community"><link rel="next" href="faq.html" title="Appendix A. FAQ"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">17.2. Community Roles</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="community.html">Prev</a> </td><th width="60%" align="center">Chapter 17. Community</th><td width="20%" align="right"> <a accesskey="n" href="faq.html">Next</a></td></tr></table><hr></div><div class="section" title="17.2. Community Roles"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="community.roles"></a>17.2. Community Roles</h2></div></div></div><div class="section" title="17.2.1. Component Owner"><div class="titlepage"><div><div><h3 class="title"><a name="OWNER"></a>17.2.1. Component Owner</h3></div></div></div><p>
|
||||
Component owners are listed in the description field on this Apache HBase JIRA <a class="link" href="https://issues.apache.org/jira/browse/HBASE#selectedTab=com.atlassian.jira.plugin.system.project%3Acomponents-panel" target="_top">components</a>
|
||||
page. The owners are listed in the 'Description' field rather than in the 'Component
|
||||
Lead' field because the latter only allows us list one individual
|
||||
whereas it is encouraged that components have multiple owners.
|
||||
</p><p>
|
||||
Owners are volunteers who are (usually, but not necessarily) expert in
|
||||
their component domain and may have an agenda on how they think their
|
||||
Apache HBase component should evolve.
|
||||
</p><p>
|
||||
Duties include:
|
||||
</p><div class="orderedlist"><ol class="orderedlist" type="1"><li class="listitem"><p>
|
||||
Owners will try and review patches that land within their component's scope.
|
||||
</p></li><li class="listitem"><p>
|
||||
If applicable, if an owner has an agenda, they will publish their
|
||||
goals or the design toward which they are driving their component
|
||||
</p></li></ol></div><p>
|
||||
</p><p>
|
||||
If you would like to be volunteer as a component owner, just write the
|
||||
dev list and we'll sign you up. Owners do not need to be committers.
|
||||
</p></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'community.roles';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="community.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="community.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="faq.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">Chapter 17. Community </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> Appendix A. FAQ</td></tr></table></div></body></html>
|
|
@ -1,34 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>Appendix C. Compression In HBase</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="prev" href="apbs04.html" title="B.4. Region Overlap Repairs"><link rel="next" href="hbase.regionserver.codecs.html" title="C.2. hbase.regionserver.codecs"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">Appendix C. Compression In HBase</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="apbs04.html">Prev</a> </td><th width="60%" align="center"> </th><td width="20%" align="right"> <a accesskey="n" href="hbase.regionserver.codecs.html">Next</a></td></tr></table><hr></div><div class="appendix" title="Appendix C. Compression In HBase"><div class="titlepage"><div><div><h2 class="title"><a name="compression"></a>Appendix C. Compression In HBase<a class="indexterm" name="d2121e11534"></a></h2></div></div></div><div class="toc"><p><b>Table of Contents</b></p><dl><dt><span class="section"><a href="compression.html#compression.test">C.1. CompressionTest Tool</a></span></dt><dt><span class="section"><a href="hbase.regionserver.codecs.html">C.2.
|
||||
<code class="varname">
|
||||
hbase.regionserver.codecs
|
||||
</code>
|
||||
</a></span></dt><dt><span class="section"><a href="lzo.compression.html">C.3.
|
||||
LZO
|
||||
</a></span></dt><dt><span class="section"><a href="gzip.compression.html">C.4.
|
||||
GZIP
|
||||
</a></span></dt><dt><span class="section"><a href="snappy.compression.html">C.5.
|
||||
SNAPPY
|
||||
</a></span></dt><dd><dl><dt><span class="section"><a href="snappy.compression.html#snappy.compression.installation">C.5.1.
|
||||
Installation
|
||||
</a></span></dt></dl></dd><dt><span class="section"><a href="changing.compression.html">C.6. Changing Compression Schemes</a></span></dt></dl></div><div class="section" title="C.1. CompressionTest Tool"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="compression.test"></a>C.1. CompressionTest Tool</h2></div></div></div><p>
|
||||
HBase includes a tool to test compression is set up properly.
|
||||
To run it, type <code class="code">/bin/hbase org.apache.hadoop.hbase.util.CompressionTest</code>.
|
||||
This will emit usage on how to run the tool.
|
||||
</p></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'compression';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="apbs04.html">Prev</a> </td><td width="20%" align="center"> </td><td width="40%" align="right"> <a accesskey="n" href="hbase.regionserver.codecs.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">B.4. Region Overlap Repairs </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> C.2.
|
||||
<code class="varname">
|
||||
hbase.regionserver.codecs
|
||||
</code>
|
||||
</td></tr></table></div></body></html>
|
|
@ -1,452 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>2.3. Configuration Files</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="configuration.html" title="Chapter 2. Apache HBase (TM) Configuration"><link rel="prev" href="standalone_dist.html" title="2.2. HBase run modes: Standalone and Distributed"><link rel="next" href="example_config.html" title="2.4. Example Configurations"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">2.3. Configuration Files</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="standalone_dist.html">Prev</a> </td><th width="60%" align="center">Chapter 2. Apache HBase (TM) Configuration</th><td width="20%" align="right"> <a accesskey="n" href="example_config.html">Next</a></td></tr></table><hr></div><div class="section" title="2.3. Configuration Files"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="config.files"></a>2.3. Configuration Files</h2></div></div></div><div class="section" title="2.3.1. hbase-site.xml and hbase-default.xml"><div class="titlepage"><div><div><h3 class="title"><a name="hbase.site"></a>2.3.1. <code class="filename">hbase-site.xml</code> and <code class="filename">hbase-default.xml</code></h3></div></div></div><p>Just as in Hadoop where you add site-specific HDFS configuration
|
||||
to the <code class="filename">hdfs-site.xml</code> file,
|
||||
for HBase, site specific customizations go into
|
||||
the file <code class="filename">conf/hbase-site.xml</code>.
|
||||
For the list of configurable properties, see
|
||||
<a class="xref" href="config.files.html#hbase_default_configurations" title="2.3.1.1. HBase Default Configuration">Section 2.3.1.1, “HBase Default Configuration”</a>
|
||||
below or view the raw <code class="filename">hbase-default.xml</code>
|
||||
source file in the HBase source code at
|
||||
<code class="filename">src/main/resources</code>.
|
||||
</p><p>
|
||||
Not all configuration options make it out to
|
||||
<code class="filename">hbase-default.xml</code>. Configuration
|
||||
that it is thought rare anyone would change can exist only
|
||||
in code; the only way to turn up such configurations is
|
||||
via a reading of the source code itself.
|
||||
</p><p>
|
||||
Currently, changes here will require a cluster restart for HBase to notice the change.
|
||||
</p><div class="section" title="2.3.1.1. HBase Default Configuration"><div class="titlepage"><div><div><h4 class="title"><a name="hbase_default_configurations"></a>2.3.1.1. HBase Default Configuration</h4></div></div></div><p></p><div class="glossary" title="HBase Default Configuration"><div class="titlepage"><div><div><h5 class="title"><a name="hbase.default.configuration"></a>HBase Default Configuration</h5></div></div></div><p>
|
||||
The documentation below is generated using the default hbase configuration file,
|
||||
<code class="filename">hbase-default.xml</code>, as source.
|
||||
</p><dl><dt><a name="hbase.rootdir"></a><code class="varname">hbase.rootdir</code></dt><dd><p>The directory shared by region servers and into
|
||||
which HBase persists. The URL should be 'fully-qualified'
|
||||
to include the filesystem scheme. For example, to specify the
|
||||
HDFS directory '/hbase' where the HDFS instance's namenode is
|
||||
running at namenode.example.org on port 9000, set this value to:
|
||||
hdfs://namenode.example.org:9000/hbase. By default HBase writes
|
||||
into /tmp. Change this configuration else all data will be lost
|
||||
on machine restart.
|
||||
</p><p>Default: <code class="varname">file:///tmp/hbase-${user.name}/hbase</code></p></dd><dt><a name="hbase.master.port"></a><code class="varname">hbase.master.port</code></dt><dd><p>The port the HBase Master should bind to.</p><p>Default: <code class="varname">60000</code></p></dd><dt><a name="hbase.cluster.distributed"></a><code class="varname">hbase.cluster.distributed</code></dt><dd><p>The mode the cluster will be in. Possible values are
|
||||
false for standalone mode and true for distributed mode. If
|
||||
false, startup will run all HBase and ZooKeeper daemons together
|
||||
in the one JVM.
|
||||
</p><p>Default: <code class="varname">false</code></p></dd><dt><a name="hbase.tmp.dir"></a><code class="varname">hbase.tmp.dir</code></dt><dd><p>Temporary directory on the local filesystem.
|
||||
Change this setting to point to a location more permanent
|
||||
than '/tmp' (The '/tmp' directory is often cleared on
|
||||
machine restart).
|
||||
</p><p>Default: <code class="varname">/tmp/hbase-${user.name}</code></p></dd><dt><a name="hbase.master.info.port"></a><code class="varname">hbase.master.info.port</code></dt><dd><p>The port for the HBase Master web UI.
|
||||
Set to -1 if you do not want a UI instance run.
|
||||
</p><p>Default: <code class="varname">60010</code></p></dd><dt><a name="hbase.master.info.bindAddress"></a><code class="varname">hbase.master.info.bindAddress</code></dt><dd><p>The bind address for the HBase Master web UI
|
||||
</p><p>Default: <code class="varname">0.0.0.0</code></p></dd><dt><a name="hbase.client.write.buffer"></a><code class="varname">hbase.client.write.buffer</code></dt><dd><p>Default size of the HTable clien write buffer in bytes.
|
||||
A bigger buffer takes more memory -- on both the client and server
|
||||
side since server instantiates the passed write buffer to process
|
||||
it -- but a larger buffer size reduces the number of RPCs made.
|
||||
For an estimate of server-side memory-used, evaluate
|
||||
hbase.client.write.buffer * hbase.regionserver.handler.count
|
||||
</p><p>Default: <code class="varname">2097152</code></p></dd><dt><a name="hbase.regionserver.port"></a><code class="varname">hbase.regionserver.port</code></dt><dd><p>The port the HBase RegionServer binds to.
|
||||
</p><p>Default: <code class="varname">60020</code></p></dd><dt><a name="hbase.regionserver.info.port"></a><code class="varname">hbase.regionserver.info.port</code></dt><dd><p>The port for the HBase RegionServer web UI
|
||||
Set to -1 if you do not want the RegionServer UI to run.
|
||||
</p><p>Default: <code class="varname">60030</code></p></dd><dt><a name="hbase.regionserver.info.port.auto"></a><code class="varname">hbase.regionserver.info.port.auto</code></dt><dd><p>Whether or not the Master or RegionServer
|
||||
UI should search for a port to bind to. Enables automatic port
|
||||
search if hbase.regionserver.info.port is already in use.
|
||||
Useful for testing, turned off by default.
|
||||
</p><p>Default: <code class="varname">false</code></p></dd><dt><a name="hbase.regionserver.info.bindAddress"></a><code class="varname">hbase.regionserver.info.bindAddress</code></dt><dd><p>The address for the HBase RegionServer web UI
|
||||
</p><p>Default: <code class="varname">0.0.0.0</code></p></dd><dt><a name="hbase.client.pause"></a><code class="varname">hbase.client.pause</code></dt><dd><p>General client pause value. Used mostly as value to wait
|
||||
before running a retry of a failed get, region lookup, etc.</p><p>Default: <code class="varname">1000</code></p></dd><dt><a name="hbase.client.retries.number"></a><code class="varname">hbase.client.retries.number</code></dt><dd><p>Maximum retries. Used as maximum for all retryable
|
||||
operations such as fetching of the root region from root region
|
||||
server, getting a cell's value, starting a row update, etc.
|
||||
Default: 10.
|
||||
</p><p>Default: <code class="varname">10</code></p></dd><dt><a name="hbase.bulkload.retries.number"></a><code class="varname">hbase.bulkload.retries.number</code></dt><dd><p>Maximum retries. This is maximum number of iterations
|
||||
to atomic bulk loads are attempted in the face of splitting operations
|
||||
0 means never give up. Default: 0.
|
||||
</p><p>Default: <code class="varname">0</code></p></dd><dt><a name="hbase.client.scanner.caching"></a><code class="varname">hbase.client.scanner.caching</code></dt><dd><p>Number of rows that will be fetched when calling next
|
||||
on a scanner if it is not served from (local, client) memory. Higher
|
||||
caching values will enable faster scanners but will eat up more memory
|
||||
and some calls of next may take longer and longer times when the cache is empty.
|
||||
Do not set this value such that the time between invocations is greater
|
||||
than the scanner timeout; i.e. hbase.client.scanner.timeout.period
|
||||
</p><p>Default: <code class="varname">100</code></p></dd><dt><a name="hbase.client.keyvalue.maxsize"></a><code class="varname">hbase.client.keyvalue.maxsize</code></dt><dd><p>Specifies the combined maximum allowed size of a KeyValue
|
||||
instance. This is to set an upper boundary for a single entry saved in a
|
||||
storage file. Since they cannot be split it helps avoiding that a region
|
||||
cannot be split any further because the data is too large. It seems wise
|
||||
to set this to a fraction of the maximum region size. Setting it to zero
|
||||
or less disables the check.
|
||||
</p><p>Default: <code class="varname">10485760</code></p></dd><dt><a name="hbase.client.scanner.timeout.period"></a><code class="varname">hbase.client.scanner.timeout.period</code></dt><dd><p>Client scanner lease period in milliseconds. Default is
|
||||
60 seconds. </p><p>Default: <code class="varname">60000</code></p></dd><dt><a name="hbase.regionserver.rowlock.timeout.period"></a><code class="varname">hbase.regionserver.rowlock.timeout.period</code></dt><dd><p>Row lock time out period in milliseconds. Default is
|
||||
60 seconds. </p><p>Default: <code class="varname">60000</code></p></dd><dt><a name="hbase.regionserver.handler.count"></a><code class="varname">hbase.regionserver.handler.count</code></dt><dd><p>Count of RPC Listener instances spun up on RegionServers.
|
||||
Same property is used by the Master for count of master handlers.
|
||||
Default is 10.
|
||||
</p><p>Default: <code class="varname">10</code></p></dd><dt><a name="hbase.regionserver.msginterval"></a><code class="varname">hbase.regionserver.msginterval</code></dt><dd><p>Interval between messages from the RegionServer to Master
|
||||
in milliseconds.
|
||||
</p><p>Default: <code class="varname">3000</code></p></dd><dt><a name="hbase.regionserver.optionallogflushinterval"></a><code class="varname">hbase.regionserver.optionallogflushinterval</code></dt><dd><p>Sync the HLog to the HDFS after this interval if it has not
|
||||
accumulated enough entries to trigger a sync. Default 1 second. Units:
|
||||
milliseconds.
|
||||
</p><p>Default: <code class="varname">1000</code></p></dd><dt><a name="hbase.regionserver.regionSplitLimit"></a><code class="varname">hbase.regionserver.regionSplitLimit</code></dt><dd><p>Limit for the number of regions after which no more region
|
||||
splitting should take place. This is not a hard limit for the number of
|
||||
regions but acts as a guideline for the regionserver to stop splitting after
|
||||
a certain limit. Default is set to MAX_INT; i.e. do not block splitting.
|
||||
</p><p>Default: <code class="varname">2147483647</code></p></dd><dt><a name="hbase.regionserver.logroll.period"></a><code class="varname">hbase.regionserver.logroll.period</code></dt><dd><p>Period at which we will roll the commit log regardless
|
||||
of how many edits it has.</p><p>Default: <code class="varname">3600000</code></p></dd><dt><a name="hbase.regionserver.logroll.errors.tolerated"></a><code class="varname">hbase.regionserver.logroll.errors.tolerated</code></dt><dd><p>The number of consecutive WAL close errors we will allow
|
||||
before triggering a server abort. A setting of 0 will cause the
|
||||
region server to abort if closing the current WAL writer fails during
|
||||
log rolling. Even a small value (2 or 3) will allow a region server
|
||||
to ride over transient HDFS errors.</p><p>Default: <code class="varname">2</code></p></dd><dt><a name="hbase.regionserver.hlog.reader.impl"></a><code class="varname">hbase.regionserver.hlog.reader.impl</code></dt><dd><p>The HLog file reader implementation.</p><p>Default: <code class="varname">org.apache.hadoop.hbase.regionserver.wal.SequenceFileLogReader</code></p></dd><dt><a name="hbase.regionserver.hlog.writer.impl"></a><code class="varname">hbase.regionserver.hlog.writer.impl</code></dt><dd><p>The HLog file writer implementation.</p><p>Default: <code class="varname">org.apache.hadoop.hbase.regionserver.wal.SequenceFileLogWriter</code></p></dd><dt><a name="hbase.regionserver.nbreservationblocks"></a><code class="varname">hbase.regionserver.nbreservationblocks</code></dt><dd><p>The number of resevoir blocks of memory release on
|
||||
OOME so we can cleanup properly before server shutdown.
|
||||
</p><p>Default: <code class="varname">4</code></p></dd><dt><a name="hbase.zookeeper.dns.interface"></a><code class="varname">hbase.zookeeper.dns.interface</code></dt><dd><p>The name of the Network Interface from which a ZooKeeper server
|
||||
should report its IP address.
|
||||
</p><p>Default: <code class="varname">default</code></p></dd><dt><a name="hbase.zookeeper.dns.nameserver"></a><code class="varname">hbase.zookeeper.dns.nameserver</code></dt><dd><p>The host name or IP address of the name server (DNS)
|
||||
which a ZooKeeper server should use to determine the host name used by the
|
||||
master for communication and display purposes.
|
||||
</p><p>Default: <code class="varname">default</code></p></dd><dt><a name="hbase.regionserver.dns.interface"></a><code class="varname">hbase.regionserver.dns.interface</code></dt><dd><p>The name of the Network Interface from which a region server
|
||||
should report its IP address.
|
||||
</p><p>Default: <code class="varname">default</code></p></dd><dt><a name="hbase.regionserver.dns.nameserver"></a><code class="varname">hbase.regionserver.dns.nameserver</code></dt><dd><p>The host name or IP address of the name server (DNS)
|
||||
which a region server should use to determine the host name used by the
|
||||
master for communication and display purposes.
|
||||
</p><p>Default: <code class="varname">default</code></p></dd><dt><a name="hbase.master.dns.interface"></a><code class="varname">hbase.master.dns.interface</code></dt><dd><p>The name of the Network Interface from which a master
|
||||
should report its IP address.
|
||||
</p><p>Default: <code class="varname">default</code></p></dd><dt><a name="hbase.master.dns.nameserver"></a><code class="varname">hbase.master.dns.nameserver</code></dt><dd><p>The host name or IP address of the name server (DNS)
|
||||
which a master should use to determine the host name used
|
||||
for communication and display purposes.
|
||||
</p><p>Default: <code class="varname">default</code></p></dd><dt><a name="hbase.balancer.period%0A "></a><code class="varname">hbase.balancer.period
|
||||
</code></dt><dd><p>Period at which the region balancer runs in the Master.
|
||||
</p><p>Default: <code class="varname">300000</code></p></dd><dt><a name="hbase.regions.slop"></a><code class="varname">hbase.regions.slop</code></dt><dd><p>Rebalance if any regionserver has average + (average * slop) regions.
|
||||
Default is 20% slop.
|
||||
</p><p>Default: <code class="varname">0.2</code></p></dd><dt><a name="hbase.master.logcleaner.ttl"></a><code class="varname">hbase.master.logcleaner.ttl</code></dt><dd><p>Maximum time a HLog can stay in the .oldlogdir directory,
|
||||
after which it will be cleaned by a Master thread.
|
||||
</p><p>Default: <code class="varname">600000</code></p></dd><dt><a name="hbase.master.logcleaner.plugins"></a><code class="varname">hbase.master.logcleaner.plugins</code></dt><dd><p>A comma-separated list of LogCleanerDelegate invoked by
|
||||
the LogsCleaner service. These WAL/HLog cleaners are called in order,
|
||||
so put the HLog cleaner that prunes the most HLog files in front. To
|
||||
implement your own LogCleanerDelegate, just put it in HBase's classpath
|
||||
and add the fully qualified class name here. Always add the above
|
||||
default log cleaners in the list.
|
||||
</p><p>Default: <code class="varname">org.apache.hadoop.hbase.master.cleaner.TimeToLiveLogCleaner</code></p></dd><dt><a name="hbase.regionserver.global.memstore.upperLimit"></a><code class="varname">hbase.regionserver.global.memstore.upperLimit</code></dt><dd><p>Maximum size of all memstores in a region server before new
|
||||
updates are blocked and flushes are forced. Defaults to 40% of heap.
|
||||
Updates are blocked and flushes are forced until size of all memstores
|
||||
in a region server hits hbase.regionserver.global.memstore.lowerLimit.
|
||||
</p><p>Default: <code class="varname">0.4</code></p></dd><dt><a name="hbase.regionserver.global.memstore.lowerLimit"></a><code class="varname">hbase.regionserver.global.memstore.lowerLimit</code></dt><dd><p>Maximum size of all memstores in a region server before
|
||||
flushes are forced. Defaults to 35% of heap.
|
||||
This value equal to hbase.regionserver.global.memstore.upperLimit causes
|
||||
the minimum possible flushing to occur when updates are blocked due to
|
||||
memstore limiting.
|
||||
</p><p>Default: <code class="varname">0.35</code></p></dd><dt><a name="hbase.server.thread.wakefrequency"></a><code class="varname">hbase.server.thread.wakefrequency</code></dt><dd><p>Time to sleep in between searches for work (in milliseconds).
|
||||
Used as sleep interval by service threads such as log roller.
|
||||
</p><p>Default: <code class="varname">10000</code></p></dd><dt><a name="hbase.server.versionfile.writeattempts"></a><code class="varname">hbase.server.versionfile.writeattempts</code></dt><dd><p>
|
||||
How many time to retry attempting to write a version file
|
||||
before just aborting. Each attempt is seperated by the
|
||||
hbase.server.thread.wakefrequency milliseconds.
|
||||
</p><p>Default: <code class="varname">3</code></p></dd><dt><a name="hbase.hregion.memstore.flush.size"></a><code class="varname">hbase.hregion.memstore.flush.size</code></dt><dd><p>
|
||||
Memstore will be flushed to disk if size of the memstore
|
||||
exceeds this number of bytes. Value is checked by a thread that runs
|
||||
every hbase.server.thread.wakefrequency.
|
||||
</p><p>Default: <code class="varname">134217728</code></p></dd><dt><a name="hbase.hregion.preclose.flush.size"></a><code class="varname">hbase.hregion.preclose.flush.size</code></dt><dd><p>
|
||||
If the memstores in a region are this size or larger when we go
|
||||
to close, run a "pre-flush" to clear out memstores before we put up
|
||||
the region closed flag and take the region offline. On close,
|
||||
a flush is run under the close flag to empty memory. During
|
||||
this time the region is offline and we are not taking on any writes.
|
||||
If the memstore content is large, this flush could take a long time to
|
||||
complete. The preflush is meant to clean out the bulk of the memstore
|
||||
before putting up the close flag and taking the region offline so the
|
||||
flush that runs under the close flag has little to do.
|
||||
</p><p>Default: <code class="varname">5242880</code></p></dd><dt><a name="hbase.hregion.memstore.block.multiplier"></a><code class="varname">hbase.hregion.memstore.block.multiplier</code></dt><dd><p>
|
||||
Block updates if memstore has hbase.hregion.block.memstore
|
||||
time hbase.hregion.flush.size bytes. Useful preventing
|
||||
runaway memstore during spikes in update traffic. Without an
|
||||
upper-bound, memstore fills such that when it flushes the
|
||||
resultant flush files take a long time to compact or split, or
|
||||
worse, we OOME.
|
||||
</p><p>Default: <code class="varname">2</code></p></dd><dt><a name="hbase.hregion.memstore.mslab.enabled"></a><code class="varname">hbase.hregion.memstore.mslab.enabled</code></dt><dd><p>
|
||||
Enables the MemStore-Local Allocation Buffer,
|
||||
a feature which works to prevent heap fragmentation under
|
||||
heavy write loads. This can reduce the frequency of stop-the-world
|
||||
GC pauses on large heaps.
|
||||
</p><p>Default: <code class="varname">true</code></p></dd><dt><a name="hbase.hregion.max.filesize"></a><code class="varname">hbase.hregion.max.filesize</code></dt><dd><p>
|
||||
Maximum HStoreFile size. If any one of a column families' HStoreFiles has
|
||||
grown to exceed this value, the hosting HRegion is split in two.
|
||||
Default: 10G.
|
||||
</p><p>Default: <code class="varname">10737418240</code></p></dd><dt><a name="hbase.hstore.compactionThreshold"></a><code class="varname">hbase.hstore.compactionThreshold</code></dt><dd><p>
|
||||
If more than this number of HStoreFiles in any one HStore
|
||||
(one HStoreFile is written per flush of memstore) then a compaction
|
||||
is run to rewrite all HStoreFiles files as one. Larger numbers
|
||||
put off compaction but when it runs, it takes longer to complete.
|
||||
</p><p>Default: <code class="varname">3</code></p></dd><dt><a name="hbase.hstore.blockingStoreFiles"></a><code class="varname">hbase.hstore.blockingStoreFiles</code></dt><dd><p>
|
||||
If more than this number of StoreFiles in any one Store
|
||||
(one StoreFile is written per flush of MemStore) then updates are
|
||||
blocked for this HRegion until a compaction is completed, or
|
||||
until hbase.hstore.blockingWaitTime has been exceeded.
|
||||
</p><p>Default: <code class="varname">7</code></p></dd><dt><a name="hbase.hstore.blockingWaitTime"></a><code class="varname">hbase.hstore.blockingWaitTime</code></dt><dd><p>
|
||||
The time an HRegion will block updates for after hitting the StoreFile
|
||||
limit defined by hbase.hstore.blockingStoreFiles.
|
||||
After this time has elapsed, the HRegion will stop blocking updates even
|
||||
if a compaction has not been completed. Default: 90 seconds.
|
||||
</p><p>Default: <code class="varname">90000</code></p></dd><dt><a name="hbase.hstore.compaction.max"></a><code class="varname">hbase.hstore.compaction.max</code></dt><dd><p>Max number of HStoreFiles to compact per 'minor' compaction.
|
||||
</p><p>Default: <code class="varname">10</code></p></dd><dt><a name="hbase.hregion.majorcompaction"></a><code class="varname">hbase.hregion.majorcompaction</code></dt><dd><p>The time (in miliseconds) between 'major' compactions of all
|
||||
HStoreFiles in a region. Default: 1 day.
|
||||
Set to 0 to disable automated major compactions.
|
||||
</p><p>Default: <code class="varname">86400000</code></p></dd><dt><a name="hbase.mapreduce.hfileoutputformat.blocksize"></a><code class="varname">hbase.mapreduce.hfileoutputformat.blocksize</code></dt><dd><p>The mapreduce HFileOutputFormat writes storefiles/hfiles.
|
||||
This is the minimum hfile blocksize to emit. Usually in hbase, writing
|
||||
hfiles, the blocksize is gotten from the table schema (HColumnDescriptor)
|
||||
but in the mapreduce outputformat context, we don't have access to the
|
||||
schema so get blocksize from Configuration. The smaller you make
|
||||
the blocksize, the bigger your index and the less you fetch on a
|
||||
random-access. Set the blocksize down if you have small cells and want
|
||||
faster random-access of individual cells.
|
||||
</p><p>Default: <code class="varname">65536</code></p></dd><dt><a name="hfile.block.cache.size"></a><code class="varname">hfile.block.cache.size</code></dt><dd><p>
|
||||
Percentage of maximum heap (-Xmx setting) to allocate to block cache
|
||||
used by HFile/StoreFile. Default of 0.25 means allocate 25%.
|
||||
Set to 0 to disable but it's not recommended.
|
||||
</p><p>Default: <code class="varname">0.25</code></p></dd><dt><a name="hbase.hash.type"></a><code class="varname">hbase.hash.type</code></dt><dd><p>The hashing algorithm for use in HashFunction. Two values are
|
||||
supported now: murmur (MurmurHash) and jenkins (JenkinsHash).
|
||||
Used by bloom filters.
|
||||
</p><p>Default: <code class="varname">murmur</code></p></dd><dt><a name="hfile.block.index.cacheonwrite"></a><code class="varname">hfile.block.index.cacheonwrite</code></dt><dd><p>
|
||||
This allows to put non-root multi-level index blocks into the block
|
||||
cache at the time the index is being written.
|
||||
</p><p>Default: <code class="varname">false</code></p></dd><dt><a name="hfile.index.block.max.size"></a><code class="varname">hfile.index.block.max.size</code></dt><dd><p>
|
||||
When the size of a leaf-level, intermediate-level, or root-level
|
||||
index block in a multi-level block index grows to this size, the
|
||||
block is written out and a new block is started.
|
||||
</p><p>Default: <code class="varname">131072</code></p></dd><dt><a name="hfile.format.version"></a><code class="varname">hfile.format.version</code></dt><dd><p>
|
||||
The HFile format version to use for new files. Set this to 1 to test
|
||||
backwards-compatibility. The default value of this option should be
|
||||
consistent with FixedFileTrailer.MAX_VERSION.
|
||||
</p><p>Default: <code class="varname">2</code></p></dd><dt><a name="io.storefile.bloom.block.size"></a><code class="varname">io.storefile.bloom.block.size</code></dt><dd><p>
|
||||
The size in bytes of a single block ("chunk") of a compound Bloom
|
||||
filter. This size is approximate, because Bloom blocks can only be
|
||||
inserted at data block boundaries, and the number of keys per data
|
||||
block varies.
|
||||
</p><p>Default: <code class="varname">131072</code></p></dd><dt><a name="hfile.block.bloom.cacheonwrite"></a><code class="varname">hfile.block.bloom.cacheonwrite</code></dt><dd><p>
|
||||
Enables cache-on-write for inline blocks of a compound Bloom filter.
|
||||
</p><p>Default: <code class="varname">false</code></p></dd><dt><a name="hbase.rs.cacheblocksonwrite"></a><code class="varname">hbase.rs.cacheblocksonwrite</code></dt><dd><p>
|
||||
Whether an HFile block should be added to the block cache when the
|
||||
block is finished.
|
||||
</p><p>Default: <code class="varname">false</code></p></dd><dt><a name="hbase.rpc.engine"></a><code class="varname">hbase.rpc.engine</code></dt><dd><p>Implementation of org.apache.hadoop.hbase.ipc.RpcEngine to be
|
||||
used for client / server RPC call marshalling.
|
||||
</p><p>Default: <code class="varname">org.apache.hadoop.hbase.ipc.ProtobufRpcEngine</code></p></dd><dt><a name="hbase.ipc.client.tcpnodelay"></a><code class="varname">hbase.ipc.client.tcpnodelay</code></dt><dd><p>Set no delay on rpc socket connections. See
|
||||
http://docs.oracle.com/javase/1.5.0/docs/api/java/net/Socket.html#getTcpNoDelay()
|
||||
</p><p>Default: <code class="varname">true</code></p></dd><dt><a name="hbase.master.keytab.file"></a><code class="varname">hbase.master.keytab.file</code></dt><dd><p>Full path to the kerberos keytab file to use for logging in
|
||||
the configured HMaster server principal.
|
||||
</p><p>Default: <code class="varname"></code></p></dd><dt><a name="hbase.master.kerberos.principal"></a><code class="varname">hbase.master.kerberos.principal</code></dt><dd><p>Ex. "hbase/_HOST@EXAMPLE.COM". The kerberos principal name
|
||||
that should be used to run the HMaster process. The principal name should
|
||||
be in the form: user/hostname@DOMAIN. If "_HOST" is used as the hostname
|
||||
portion, it will be replaced with the actual hostname of the running
|
||||
instance.
|
||||
</p><p>Default: <code class="varname"></code></p></dd><dt><a name="hbase.regionserver.keytab.file"></a><code class="varname">hbase.regionserver.keytab.file</code></dt><dd><p>Full path to the kerberos keytab file to use for logging in
|
||||
the configured HRegionServer server principal.
|
||||
</p><p>Default: <code class="varname"></code></p></dd><dt><a name="hbase.regionserver.kerberos.principal"></a><code class="varname">hbase.regionserver.kerberos.principal</code></dt><dd><p>Ex. "hbase/_HOST@EXAMPLE.COM". The kerberos principal name
|
||||
that should be used to run the HRegionServer process. The principal name
|
||||
should be in the form: user/hostname@DOMAIN. If "_HOST" is used as the
|
||||
hostname portion, it will be replaced with the actual hostname of the
|
||||
running instance. An entry for this principal must exist in the file
|
||||
specified in hbase.regionserver.keytab.file
|
||||
</p><p>Default: <code class="varname"></code></p></dd><dt><a name="hadoop.policy.file"></a><code class="varname">hadoop.policy.file</code></dt><dd><p>The policy configuration file used by RPC servers to make
|
||||
authorization decisions on client requests. Only used when HBase
|
||||
security is enabled.
|
||||
</p><p>Default: <code class="varname">hbase-policy.xml</code></p></dd><dt><a name="hbase.superuser"></a><code class="varname">hbase.superuser</code></dt><dd><p>List of users or groups (comma-separated), who are allowed
|
||||
full privileges, regardless of stored ACLs, across the cluster.
|
||||
Only used when HBase security is enabled.
|
||||
</p><p>Default: <code class="varname"></code></p></dd><dt><a name="hbase.auth.key.update.interval"></a><code class="varname">hbase.auth.key.update.interval</code></dt><dd><p>The update interval for master key for authentication tokens
|
||||
in servers in milliseconds. Only used when HBase security is enabled.
|
||||
</p><p>Default: <code class="varname">86400000</code></p></dd><dt><a name="hbase.auth.token.max.lifetime"></a><code class="varname">hbase.auth.token.max.lifetime</code></dt><dd><p>The maximum lifetime in milliseconds after which an
|
||||
authentication token expires. Only used when HBase security is enabled.
|
||||
</p><p>Default: <code class="varname">604800000</code></p></dd><dt><a name="zookeeper.session.timeout"></a><code class="varname">zookeeper.session.timeout</code></dt><dd><p>ZooKeeper session timeout.
|
||||
HBase passes this to the zk quorum as suggested maximum time for a
|
||||
session (This setting becomes zookeeper's 'maxSessionTimeout'). See
|
||||
http://hadoop.apache.org/zookeeper/docs/current/zookeeperProgrammers.html#ch_zkSessions
|
||||
"The client sends a requested timeout, the server responds with the
|
||||
timeout that it can give the client. " In milliseconds.
|
||||
</p><p>Default: <code class="varname">180000</code></p></dd><dt><a name="zookeeper.znode.parent"></a><code class="varname">zookeeper.znode.parent</code></dt><dd><p>Root ZNode for HBase in ZooKeeper. All of HBase's ZooKeeper
|
||||
files that are configured with a relative path will go under this node.
|
||||
By default, all of HBase's ZooKeeper file path are configured with a
|
||||
relative path, so they will all go under this directory unless changed.
|
||||
</p><p>Default: <code class="varname">/hbase</code></p></dd><dt><a name="zookeeper.znode.rootserver"></a><code class="varname">zookeeper.znode.rootserver</code></dt><dd><p>Path to ZNode holding root region location. This is written by
|
||||
the master and read by clients and region servers. If a relative path is
|
||||
given, the parent folder will be ${zookeeper.znode.parent}. By default,
|
||||
this means the root location is stored at /hbase/root-region-server.
|
||||
</p><p>Default: <code class="varname">root-region-server</code></p></dd><dt><a name="zookeeper.znode.acl.parent"></a><code class="varname">zookeeper.znode.acl.parent</code></dt><dd><p>Root ZNode for access control lists.</p><p>Default: <code class="varname">acl</code></p></dd><dt><a name="hbase.coprocessor.region.classes"></a><code class="varname">hbase.coprocessor.region.classes</code></dt><dd><p>A comma-separated list of Coprocessors that are loaded by
|
||||
default on all tables. For any override coprocessor method, these classes
|
||||
will be called in order. After implementing your own Coprocessor, just put
|
||||
it in HBase's classpath and add the fully qualified class name here.
|
||||
A coprocessor can also be loaded on demand by setting HTableDescriptor.
|
||||
</p><p>Default: <code class="varname"></code></p></dd><dt><a name="hbase.coprocessor.master.classes"></a><code class="varname">hbase.coprocessor.master.classes</code></dt><dd><p>A comma-separated list of
|
||||
org.apache.hadoop.hbase.coprocessor.MasterObserver coprocessors that are
|
||||
loaded by default on the active HMaster process. For any implemented
|
||||
coprocessor methods, the listed classes will be called in order. After
|
||||
implementing your own MasterObserver, just put it in HBase's classpath
|
||||
and add the fully qualified class name here.
|
||||
</p><p>Default: <code class="varname"></code></p></dd><dt><a name="hbase.zookeeper.quorum"></a><code class="varname">hbase.zookeeper.quorum</code></dt><dd><p>Comma separated list of servers in the ZooKeeper Quorum.
|
||||
For example, "host1.mydomain.com,host2.mydomain.com,host3.mydomain.com".
|
||||
By default this is set to localhost for local and pseudo-distributed modes
|
||||
of operation. For a fully-distributed setup, this should be set to a full
|
||||
list of ZooKeeper quorum servers. If HBASE_MANAGES_ZK is set in hbase-env.sh
|
||||
this is the list of servers which we will start/stop ZooKeeper on.
|
||||
</p><p>Default: <code class="varname">localhost</code></p></dd><dt><a name="hbase.zookeeper.peerport"></a><code class="varname">hbase.zookeeper.peerport</code></dt><dd><p>Port used by ZooKeeper peers to talk to each other.
|
||||
See http://hadoop.apache.org/zookeeper/docs/r3.1.1/zookeeperStarted.html#sc_RunningReplicatedZooKeeper
|
||||
for more information.
|
||||
</p><p>Default: <code class="varname">2888</code></p></dd><dt><a name="hbase.zookeeper.leaderport"></a><code class="varname">hbase.zookeeper.leaderport</code></dt><dd><p>Port used by ZooKeeper for leader election.
|
||||
See http://hadoop.apache.org/zookeeper/docs/r3.1.1/zookeeperStarted.html#sc_RunningReplicatedZooKeeper
|
||||
for more information.
|
||||
</p><p>Default: <code class="varname">3888</code></p></dd><dt><a name="hbase.zookeeper.property.initLimit"></a><code class="varname">hbase.zookeeper.property.initLimit</code></dt><dd><p>Property from ZooKeeper's config zoo.cfg.
|
||||
The number of ticks that the initial synchronization phase can take.
|
||||
</p><p>Default: <code class="varname">10</code></p></dd><dt><a name="hbase.zookeeper.property.syncLimit"></a><code class="varname">hbase.zookeeper.property.syncLimit</code></dt><dd><p>Property from ZooKeeper's config zoo.cfg.
|
||||
The number of ticks that can pass between sending a request and getting an
|
||||
acknowledgment.
|
||||
</p><p>Default: <code class="varname">5</code></p></dd><dt><a name="hbase.zookeeper.property.dataDir"></a><code class="varname">hbase.zookeeper.property.dataDir</code></dt><dd><p>Property from ZooKeeper's config zoo.cfg.
|
||||
The directory where the snapshot is stored.
|
||||
</p><p>Default: <code class="varname">${hbase.tmp.dir}/zookeeper</code></p></dd><dt><a name="hbase.zookeeper.property.clientPort"></a><code class="varname">hbase.zookeeper.property.clientPort</code></dt><dd><p>Property from ZooKeeper's config zoo.cfg.
|
||||
The port at which the clients will connect.
|
||||
</p><p>Default: <code class="varname">2181</code></p></dd><dt><a name="hbase.zookeeper.property.maxClientCnxns"></a><code class="varname">hbase.zookeeper.property.maxClientCnxns</code></dt><dd><p>Property from ZooKeeper's config zoo.cfg.
|
||||
Limit on number of concurrent connections (at the socket level) that a
|
||||
single client, identified by IP address, may make to a single member of
|
||||
the ZooKeeper ensemble. Set high to avoid zk connection issues running
|
||||
standalone and pseudo-distributed.
|
||||
</p><p>Default: <code class="varname">300</code></p></dd><dt><a name="hbase.rest.port"></a><code class="varname">hbase.rest.port</code></dt><dd><p>The port for the HBase REST server.</p><p>Default: <code class="varname">8080</code></p></dd><dt><a name="hbase.rest.readonly"></a><code class="varname">hbase.rest.readonly</code></dt><dd><p>
|
||||
Defines the mode the REST server will be started in. Possible values are:
|
||||
false: All HTTP methods are permitted - GET/PUT/POST/DELETE.
|
||||
true: Only the GET method is permitted.
|
||||
</p><p>Default: <code class="varname">false</code></p></dd><dt><a name="hbase.defaults.for.version.skip"></a><code class="varname">hbase.defaults.for.version.skip</code></dt><dd><p>
|
||||
Set to true to skip the 'hbase.defaults.for.version' check.
|
||||
Setting this to true can be useful in contexts other than
|
||||
the other side of a maven generation; i.e. running in an
|
||||
ide. You'll want to set this boolean to true to avoid
|
||||
seeing the RuntimException complaint: "hbase-default.xml file
|
||||
seems to be for and old version of HBase (\${hbase.version}), this
|
||||
version is X.X.X-SNAPSHOT"
|
||||
</p><p>Default: <code class="varname">false</code></p></dd><dt><a name="hbase.coprocessor.abortonerror"></a><code class="varname">hbase.coprocessor.abortonerror</code></dt><dd><p>
|
||||
Set to true to cause the hosting server (master or regionserver) to
|
||||
abort if a coprocessor throws a Throwable object that is not IOException or
|
||||
a subclass of IOException. Setting it to true might be useful in development
|
||||
environments where one wants to terminate the server as soon as possible to
|
||||
simplify coprocessor failure analysis.
|
||||
</p><p>Default: <code class="varname">false</code></p></dd><dt><a name="hbase.online.schema.update.enable"></a><code class="varname">hbase.online.schema.update.enable</code></dt><dd><p>
|
||||
Set true to enable online schema changes. This is an experimental feature.
|
||||
There are known issues modifying table schemas at the same time a region
|
||||
split is happening so your table needs to be quiescent or else you have to
|
||||
be running with splits disabled.
|
||||
</p><p>Default: <code class="varname">false</code></p></dd><dt><a name="dfs.support.append"></a><code class="varname">dfs.support.append</code></dt><dd><p>Does HDFS allow appends to files?
|
||||
This is an hdfs config. set in here so the hdfs client will do append support.
|
||||
You must ensure that this config. is true serverside too when running hbase
|
||||
(You will have to restart your cluster after setting it).
|
||||
</p><p>Default: <code class="varname">true</code></p></dd><dt><a name="hbase.thrift.minWorkerThreads"></a><code class="varname">hbase.thrift.minWorkerThreads</code></dt><dd><p>
|
||||
The "core size" of the thread pool. New threads are created on every
|
||||
connection until this many threads are created.
|
||||
</p><p>Default: <code class="varname">16</code></p></dd><dt><a name="hbase.thrift.maxWorkerThreads"></a><code class="varname">hbase.thrift.maxWorkerThreads</code></dt><dd><p>
|
||||
The maximum size of the thread pool. When the pending request queue
|
||||
overflows, new threads are created until their number reaches this number.
|
||||
After that, the server starts dropping connections.
|
||||
</p><p>Default: <code class="varname">1000</code></p></dd><dt><a name="hbase.thrift.maxQueuedRequests"></a><code class="varname">hbase.thrift.maxQueuedRequests</code></dt><dd><p>
|
||||
The maximum number of pending Thrift connections waiting in the queue. If
|
||||
there are no idle threads in the pool, the server queues requests. Only
|
||||
when the queue overflows, new threads are added, up to
|
||||
hbase.thrift.maxQueuedRequests threads.
|
||||
</p><p>Default: <code class="varname">1000</code></p></dd><dt><a name="hbase.offheapcache.percentage"></a><code class="varname">hbase.offheapcache.percentage</code></dt><dd><p>
|
||||
The amount of off heap space to be allocated towards the experimental
|
||||
off heap cache. If you desire the cache to be disabled, simply set this
|
||||
value to 0.
|
||||
</p><p>Default: <code class="varname">0</code></p></dd><dt><a name="hbase.data.umask.enable"></a><code class="varname">hbase.data.umask.enable</code></dt><dd><p>Enable, if true, that file permissions should be assigned
|
||||
to the files written by the regionserver
|
||||
</p><p>Default: <code class="varname">false</code></p></dd><dt><a name="hbase.data.umask"></a><code class="varname">hbase.data.umask</code></dt><dd><p>File permissions that should be used to write data
|
||||
files when hbase.data.umask.enable is true
|
||||
</p><p>Default: <code class="varname">000</code></p></dd><dt><a name="hbase.metrics.showTableName"></a><code class="varname">hbase.metrics.showTableName</code></dt><dd><p>Whether to include the prefix "tbl.tablename" in per-column family metrics.
|
||||
If true, for each metric M, per-cf metrics will be reported for tbl.T.cf.CF.M, if false,
|
||||
per-cf metrics will be aggregated by column-family across tables, and reported for cf.CF.M.
|
||||
In both cases, the aggregated metric M across tables and cfs will be reported.
|
||||
</p><p>Default: <code class="varname">true</code></p></dd><dt><a name="hbase.metrics.exposeOperationTimes"></a><code class="varname">hbase.metrics.exposeOperationTimes</code></dt><dd><p>Whether to report metrics about time taken performing an
|
||||
operation on the region server. Get, Put, Delete, Increment, and Append can all
|
||||
have their times exposed through Hadoop metrics per CF and per region.
|
||||
</p><p>Default: <code class="varname">true</code></p></dd><dt><a name="hbase.table.archive.directory"></a><code class="varname">hbase.table.archive.directory</code></dt><dd><p>Per-table directory name under which to backup files for a
|
||||
table. Files are moved to the same directories as they would be under the
|
||||
table directory, but instead are just one level lower (under
|
||||
table/.archive/... rather than table/...). Currently only applies to HFiles.</p><p>Default: <code class="varname">.archive</code></p></dd><dt><a name="hbase.master.hfilecleaner.plugins"></a><code class="varname">hbase.master.hfilecleaner.plugins</code></dt><dd><p>A comma-separated list of HFileCleanerDelegate invoked by
|
||||
the HFileCleaner service. These HFiles cleaners are called in order,
|
||||
so put the cleaner that prunes the most files in front. To
|
||||
implement your own HFileCleanerDelegate, just put it in HBase's classpath
|
||||
and add the fully qualified class name here. Always add the above
|
||||
default log cleaners in the list as they will be overwritten in hbase-site.xml.
|
||||
</p><p>Default: <code class="varname">org.apache.hadoop.hbase.master.cleaner.TimeToLiveHFileCleaner</code></p></dd><dt><a name="hbase.regionserver.catalog.timeout"></a><code class="varname">hbase.regionserver.catalog.timeout</code></dt><dd><p>Timeout value for the Catalog Janitor from the regionserver to META.</p><p>Default: <code class="varname">600000</code></p></dd><dt><a name="hbase.master.catalog.timeout"></a><code class="varname">hbase.master.catalog.timeout</code></dt><dd><p>Timeout value for the Catalog Janitor from the master to META.</p><p>Default: <code class="varname">600000</code></p></dd><dt><a name="hbase.config.read.zookeeper.config"></a><code class="varname">hbase.config.read.zookeeper.config</code></dt><dd><p>
|
||||
Set to true to allow HBaseConfiguration to read the
|
||||
zoo.cfg file for ZooKeeper properties. Switching this to true
|
||||
is not recommended, since the functionality of reading ZK
|
||||
properties from a zoo.cfg file has been deprecated.
|
||||
</p><p>Default: <code class="varname">false</code></p></dd></dl></div></div></div><div class="section" title="2.3.2. hbase-env.sh"><div class="titlepage"><div><div><h3 class="title"><a name="hbase.env.sh"></a>2.3.2. <code class="filename">hbase-env.sh</code></h3></div></div></div><p>Set HBase environment variables in this file.
|
||||
Examples include options to pass the JVM on start of
|
||||
an HBase daemon such as heap size and garbarge collector configs.
|
||||
You can also set configurations for HBase configuration, log directories,
|
||||
niceness, ssh options, where to locate process pid files,
|
||||
etc. Open the file at
|
||||
<code class="filename">conf/hbase-env.sh</code> and peruse its content.
|
||||
Each option is fairly well documented. Add your own environment
|
||||
variables here if you want them read by HBase daemons on startup.</p><p>
|
||||
Changes here will require a cluster restart for HBase to notice the change.
|
||||
</p></div><div class="section" title="2.3.3. log4j.properties"><div class="titlepage"><div><div><h3 class="title"><a name="log4j"></a>2.3.3. <code class="filename">log4j.properties</code></h3></div></div></div><p>Edit this file to change rate at which HBase files
|
||||
are rolled and to change the level at which HBase logs messages.
|
||||
</p><p>
|
||||
Changes here will require a cluster restart for HBase to notice the change
|
||||
though log levels can be changed for particular daemons via the HBase UI.
|
||||
</p></div><div class="section" title="2.3.4. Client configuration and dependencies connecting to an HBase cluster"><div class="titlepage"><div><div><h3 class="title"><a name="client_dependencies"></a>2.3.4. Client configuration and dependencies connecting to an HBase cluster</h3></div></div></div><p>
|
||||
Since the HBase Master may move around, clients bootstrap by looking to ZooKeeper for
|
||||
current critical locations. ZooKeeper is where all these values are kept. Thus clients
|
||||
require the location of the ZooKeeper ensemble information before they can do anything else.
|
||||
Usually this the ensemble location is kept out in the <code class="filename">hbase-site.xml</code> and
|
||||
is picked up by the client from the <code class="varname">CLASSPATH</code>.</p><p>If you are configuring an IDE to run a HBase client, you should
|
||||
include the <code class="filename">conf/</code> directory on your classpath so
|
||||
<code class="filename">hbase-site.xml</code> settings can be found (or
|
||||
add <code class="filename">src/test/resources</code> to pick up the hbase-site.xml
|
||||
used by tests).
|
||||
</p><p>
|
||||
Minimally, a client of HBase needs several libraries in its <code class="varname">CLASSPATH</code> when connecting to a cluster, including:
|
||||
</p><pre class="programlisting">
|
||||
commons-configuration (commons-configuration-1.6.jar)
|
||||
commons-lang (commons-lang-2.5.jar)
|
||||
commons-logging (commons-logging-1.1.1.jar)
|
||||
hadoop-core (hadoop-core-1.0.0.jar)
|
||||
hbase (hbase-0.92.0.jar)
|
||||
log4j (log4j-1.2.16.jar)
|
||||
slf4j-api (slf4j-api-1.5.8.jar)
|
||||
slf4j-log4j (slf4j-log4j12-1.5.8.jar)
|
||||
zookeeper (zookeeper-3.4.2.jar)</pre><p>
|
||||
</p><p>
|
||||
An example basic <code class="filename">hbase-site.xml</code> for client only
|
||||
might look as follows:
|
||||
</p><pre class="programlisting">
|
||||
<?xml version="1.0"?>
|
||||
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||
<configuration>
|
||||
<property>
|
||||
<name>hbase.zookeeper.quorum</name>
|
||||
<value>example1,example2,example3</value>
|
||||
<description>The directory shared by region servers.
|
||||
</description>
|
||||
</property>
|
||||
</configuration>
|
||||
</pre><p>
|
||||
</p><div class="section" title="2.3.4.1. Java client configuration"><div class="titlepage"><div><div><h4 class="title"><a name="java.client.config"></a>2.3.4.1. Java client configuration</h4></div></div></div><p>The configuration used by a Java client is kept
|
||||
in an <a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/HBaseConfiguration" target="_top">HBaseConfiguration</a> instance.
|
||||
The factory method on HBaseConfiguration, <code class="code">HBaseConfiguration.create();</code>,
|
||||
on invocation, will read in the content of the first <code class="filename">hbase-site.xml</code> found on
|
||||
the client's <code class="varname">CLASSPATH</code>, if one is present
|
||||
(Invocation will also factor in any <code class="filename">hbase-default.xml</code> found;
|
||||
an hbase-default.xml ships inside the <code class="filename">hbase.X.X.X.jar</code>).
|
||||
It is also possible to specify configuration directly without having to read from a
|
||||
<code class="filename">hbase-site.xml</code>. For example, to set the ZooKeeper
|
||||
ensemble for the cluster programmatically do as follows:
|
||||
</p><pre class="programlisting">Configuration config = HBaseConfiguration.create();
|
||||
config.set("hbase.zookeeper.quorum", "localhost"); // Here we are running zookeeper locally</pre><p>
|
||||
If multiple ZooKeeper instances make up your ZooKeeper ensemble,
|
||||
they may be specified in a comma-separated list (just as in the <code class="filename">hbase-site.xml</code> file).
|
||||
This populated <code class="classname">Configuration</code> instance can then be passed to an
|
||||
<a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/HTable.html" target="_top">HTable</a>,
|
||||
and so on.
|
||||
</p></div></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'config.files';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="standalone_dist.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="configuration.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="example_config.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">2.2. HBase run modes: Standalone and Distributed </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> 2.4. Example Configurations</td></tr></table></div></body></html>
|
|
@ -1,213 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>Chapter 2. Apache HBase (TM) Configuration</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="prev" href="quickstart.html" title="1.2. Quick Start"><link rel="next" href="standalone_dist.html" title="2.2. HBase run modes: Standalone and Distributed"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">Chapter 2. Apache HBase (TM) Configuration</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="quickstart.html">Prev</a> </td><th width="60%" align="center"> </th><td width="20%" align="right"> <a accesskey="n" href="standalone_dist.html">Next</a></td></tr></table><hr></div><div class="chapter" title="Chapter 2. Apache HBase (TM) Configuration"><div class="titlepage"><div><div><h2 class="title"><a name="configuration"></a>Chapter 2. Apache HBase (TM) Configuration</h2></div></div></div><div class="toc"><p><b>Table of Contents</b></p><dl><dt><span class="section"><a href="configuration.html#basic.prerequisites">2.1. Basic Prerequisites</a></span></dt><dd><dl><dt><span class="section"><a href="configuration.html#java">2.1.1. Java</a></span></dt><dt><span class="section"><a href="configuration.html#os">2.1.2. Operating System</a></span></dt><dt><span class="section"><a href="configuration.html#hadoop">2.1.3. Hadoop</a></span></dt></dl></dd><dt><span class="section"><a href="standalone_dist.html">2.2. HBase run modes: Standalone and Distributed</a></span></dt><dd><dl><dt><span class="section"><a href="standalone_dist.html#standalone">2.2.1. Standalone HBase</a></span></dt><dt><span class="section"><a href="standalone_dist.html#distributed">2.2.2. Distributed</a></span></dt><dt><span class="section"><a href="standalone_dist.html#confirm">2.2.3. Running and Confirming Your Installation</a></span></dt></dl></dd><dt><span class="section"><a href="config.files.html">2.3. Configuration Files</a></span></dt><dd><dl><dt><span class="section"><a href="config.files.html#hbase.site">2.3.1. <code class="filename">hbase-site.xml</code> and <code class="filename">hbase-default.xml</code></a></span></dt><dt><span class="section"><a href="config.files.html#hbase.env.sh">2.3.2. <code class="filename">hbase-env.sh</code></a></span></dt><dt><span class="section"><a href="config.files.html#log4j">2.3.3. <code class="filename">log4j.properties</code></a></span></dt><dt><span class="section"><a href="config.files.html#client_dependencies">2.3.4. Client configuration and dependencies connecting to an HBase cluster</a></span></dt></dl></dd><dt><span class="section"><a href="example_config.html">2.4. Example Configurations</a></span></dt><dd><dl><dt><span class="section"><a href="example_config.html#d2121e2235">2.4.1. Basic Distributed HBase Install</a></span></dt></dl></dd><dt><span class="section"><a href="important_configurations.html">2.5. The Important Configurations</a></span></dt><dd><dl><dt><span class="section"><a href="important_configurations.html#required_configuration">2.5.1. Required Configurations</a></span></dt><dt><span class="section"><a href="important_configurations.html#recommended_configurations">2.5.2. Recommended Configurations</a></span></dt><dt><span class="section"><a href="important_configurations.html#other_configuration">2.5.3. Other Configurations</a></span></dt></dl></dd></dl></div><p>This chapter is the Not-So-Quick start guide to Apache HBase (TM) configuration. It goes
|
||||
over system requirements, Hadoop setup, the different Apache HBase run modes, and the
|
||||
various configurations in HBase. Please read this chapter carefully. At a mimimum
|
||||
ensure that all <a class="xref" href="configuration.html#basic.prerequisites" title="2.1. Basic Prerequisites">Section 2.1, “Basic Prerequisites”</a> have
|
||||
been satisfied. Failure to do so will cause you (and us) grief debugging strange errors
|
||||
and/or data loss.</p><p>
|
||||
Apache HBase uses the same configuration system as Apache Hadoop.
|
||||
To configure a deploy, edit a file of environment variables
|
||||
in <code class="filename">conf/hbase-env.sh</code> -- this configuration
|
||||
is used mostly by the launcher shell scripts getting the cluster
|
||||
off the ground -- and then add configuration to an XML file to
|
||||
do things like override HBase defaults, tell HBase what Filesystem to
|
||||
use, and the location of the ZooKeeper ensemble
|
||||
<sup>[<a name="d2121e285" href="#ftn.d2121e285" class="footnote">1</a>]</sup>
|
||||
.
|
||||
</p><p>When running in distributed mode, after you make
|
||||
an edit to an HBase configuration, make sure you copy the
|
||||
content of the <code class="filename">conf</code> directory to
|
||||
all nodes of the cluster. HBase will not do this for you.
|
||||
Use <span class="command"><strong>rsync</strong></span>.</p><div class="section" title="2.1. Basic Prerequisites"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="basic.prerequisites"></a>2.1. Basic Prerequisites</h2></div></div></div><p>This section lists required services and some required system configuration.
|
||||
</p><div class="section" title="2.1.1. Java"><div class="titlepage"><div><div><h3 class="title"><a name="java"></a>2.1.1. Java</h3></div></div></div><p>Just like Hadoop, HBase requires at least java 6 from
|
||||
<a class="link" href="http://www.java.com/download/" target="_top">Oracle</a>.</p></div><div class="section" title="2.1.2. Operating System"><div class="titlepage"><div><div><h3 class="title"><a name="os"></a>2.1.2. Operating System</h3></div></div></div><div class="section" title="2.1.2.1. ssh"><div class="titlepage"><div><div><h4 class="title"><a name="ssh"></a>2.1.2.1. ssh</h4></div></div></div><p><span class="command"><strong>ssh</strong></span> must be installed and
|
||||
<span class="command"><strong>sshd</strong></span> must be running to use Hadoop's scripts to
|
||||
manage remote Hadoop and HBase daemons. You must be able to ssh to all
|
||||
nodes, including your local node, using passwordless login (Google
|
||||
"ssh passwordless login"). If on mac osx, see the section,
|
||||
<a class="link" href="http://wiki.apache.org/hadoop/Running_Hadoop_On_OS_X_10.5_64-bit_%28Single-Node_Cluster%29" target="_top">SSH: Setting up Remote Desktop and Enabling Self-Login</a>
|
||||
on the hadoop wiki.</p></div><div class="section" title="2.1.2.2. DNS"><div class="titlepage"><div><div><h4 class="title"><a name="dns"></a>2.1.2.2. DNS</h4></div></div></div><p>HBase uses the local hostname to self-report its IP address.
|
||||
Both forward and reverse DNS resolving must work in versions of
|
||||
HBase previous to 0.92.0
|
||||
<sup>[<a name="d2121e334" href="#ftn.d2121e334" class="footnote">2</a>]</sup>.</p><p>If your machine has multiple interfaces, HBase will use the
|
||||
interface that the primary hostname resolves to.</p><p>If this is insufficient, you can set
|
||||
<code class="varname">hbase.regionserver.dns.interface</code> to indicate the
|
||||
primary interface. This only works if your cluster configuration is
|
||||
consistent and every host has the same network interface
|
||||
configuration.</p><p>Another alternative is setting
|
||||
<code class="varname">hbase.regionserver.dns.nameserver</code> to choose a
|
||||
different nameserver than the system wide default.</p></div><div class="section" title="2.1.2.3. Loopback IP"><div class="titlepage"><div><div><h4 class="title"><a name="loopback.ip"></a>2.1.2.3. Loopback IP</h4></div></div></div><p>HBase expects the loopback IP address to be 127.0.0.1. See <a class="xref" href="configuration.html#loopback.ip" title="2.1.2.3. Loopback IP">Section 2.1.2.3, “Loopback IP”</a></p></div><div class="section" title="2.1.2.4. NTP"><div class="titlepage"><div><div><h4 class="title"><a name="ntp"></a>2.1.2.4. NTP</h4></div></div></div><p>The clocks on cluster members should be in basic alignments.
|
||||
Some skew is tolerable but wild skew could generate odd behaviors. Run
|
||||
<a class="link" href="http://en.wikipedia.org/wiki/Network_Time_Protocol" target="_top">NTP</a>
|
||||
on your cluster, or an equivalent.</p><p>If you are having problems querying data, or "weird" cluster
|
||||
operations, check system time!</p></div><div class="section" title="2.1.2.5. ulimit and nproc"><div class="titlepage"><div><div><h4 class="title"><a name="ulimit"></a>2.1.2.5.
|
||||
<code class="varname">ulimit</code><a class="indexterm" name="d2121e374"></a>
|
||||
and
|
||||
<code class="varname">nproc</code><a class="indexterm" name="d2121e380"></a>
|
||||
</h4></div></div></div><p>Apache HBase is a database. It uses a lot of files all at the same time.
|
||||
The default ulimit -n -- i.e. user file limit -- of 1024 on most *nix systems
|
||||
is insufficient (On mac os x its 256). Any significant amount of loading will
|
||||
lead you to <a class="xref" href="trouble.rs.html#trouble.rs.runtime.filehandles" title="12.9.2.2. java.io.IOException...(Too many open files)">Section 12.9.2.2, “java.io.IOException...(Too many open files)”</a>.
|
||||
You may also notice errors such as... </p><pre class="programlisting">
|
||||
2010-04-06 03:04:37,542 INFO org.apache.hadoop.hdfs.DFSClient: Exception increateBlockOutputStream java.io.EOFException
|
||||
2010-04-06 03:04:37,542 INFO org.apache.hadoop.hdfs.DFSClient: Abandoning block blk_-6935524980745310745_1391901
|
||||
</pre><p> Do yourself a favor and change the upper bound on the
|
||||
number of file descriptors. Set it to north of 10k. The math runs roughly as follows: per ColumnFamily
|
||||
there is at least one StoreFile and possibly up to 5 or 6 if the region is under load. Multiply the
|
||||
average number of StoreFiles per ColumnFamily times the number of regions per RegionServer. For example, assuming
|
||||
that a schema had 3 ColumnFamilies per region with an average of 3 StoreFiles per ColumnFamily,
|
||||
and there are 100 regions per RegionServer, the JVM will open 3 * 3 * 100 = 900 file descriptors
|
||||
(not counting open jar files, config files, etc.)
|
||||
</p><p>You should also up the hbase users'
|
||||
<code class="varname">nproc</code> setting; under load, a low-nproc
|
||||
setting could manifest as <code class="classname">OutOfMemoryError</code>
|
||||
<sup>[<a name="d2121e399" href="#ftn.d2121e399" class="footnote">3</a>]</sup>
|
||||
<sup>[<a name="d2121e406" href="#ftn.d2121e406" class="footnote">4</a>]</sup>.
|
||||
</p><p>To be clear, upping the file descriptors and nproc for the user who is
|
||||
running the HBase process is an operating system configuration, not an
|
||||
HBase configuration. Also, a common mistake is that administrators
|
||||
will up the file descriptors for a particular user but for whatever
|
||||
reason, HBase will be running as some one else. HBase prints in its
|
||||
logs as the first line the ulimit its seeing. Ensure its correct.
|
||||
<sup>[<a name="d2121e418" href="#ftn.d2121e418" class="footnote">5</a>]</sup></p><div class="section" title="2.1.2.5.1. ulimit on Ubuntu"><div class="titlepage"><div><div><h5 class="title"><a name="ulimit_ubuntu"></a>2.1.2.5.1. <code class="varname">ulimit</code> on Ubuntu</h5></div></div></div><p>If you are on Ubuntu you will need to make the following
|
||||
changes:</p><p>In the file <code class="filename">/etc/security/limits.conf</code> add
|
||||
a line like: </p><pre class="programlisting">hadoop - nofile 32768</pre><p>
|
||||
Replace <code class="varname">hadoop</code> with whatever user is running
|
||||
Hadoop and HBase. If you have separate users, you will need 2
|
||||
entries, one for each user. In the same file set nproc hard and soft
|
||||
limits. For example: </p><pre class="programlisting">hadoop soft/hard nproc 32000</pre><p>.</p><p>In the file <code class="filename">/etc/pam.d/common-session</code> add
|
||||
as the last line in the file: </p><pre class="programlisting">session required pam_limits.so</pre><p>
|
||||
Otherwise the changes in <code class="filename">/etc/security/limits.conf</code> won't be
|
||||
applied.</p><p>Don't forget to log out and back in again for the changes to
|
||||
take effect!</p></div></div><div class="section" title="2.1.2.6. Windows"><div class="titlepage"><div><div><h4 class="title"><a name="windows"></a>2.1.2.6. Windows</h4></div></div></div><p>Apache HBase has been little tested running on Windows. Running a
|
||||
production install of HBase on top of Windows is not
|
||||
recommended.</p><p>If you are running HBase on Windows, you must install <a class="link" href="http://cygwin.com/" target="_top">Cygwin</a> to have a *nix-like
|
||||
environment for the shell scripts. The full details are explained in
|
||||
the <a class="link" href="http://hbase.apache.org/cygwin.html" target="_top">Windows
|
||||
Installation</a> guide. Also
|
||||
<a class="link" href="http://search-hadoop.com/?q=hbase+windows&fc_project=HBase&fc_type=mail+_hash_+dev" target="_top">search our user mailing list</a> to pick
|
||||
up latest fixes figured by Windows users.</p></div></div><div class="section" title="2.1.3. Hadoop"><div class="titlepage"><div><div><h3 class="title"><a name="hadoop"></a>2.1.3. <a class="link" href="http://hadoop.apache.org" target="_top">Hadoop</a><a class="indexterm" name="d2121e478"></a></h3></div></div></div><div class="note" title="Please read all of this section" style="margin-left: 0.5in; margin-right: 0.5in;"><h3 class="title">Please read all of this section</h3><p>Please read this section to the end. Up front we
|
||||
wade through the weeds of Hadoop versions. Later we talk of what you must do in HBase
|
||||
to make it work w/ a particular Hadoop version.</p></div><p>
|
||||
HBase will lose data unless it is running on an HDFS that has a durable
|
||||
<code class="code">sync</code> implementation. Hadoop 0.20.2, Hadoop 0.20.203.0, and Hadoop 0.20.204.0
|
||||
DO NOT have this attribute.
|
||||
Currently only Hadoop versions 0.20.205.x or any release in excess of this
|
||||
version -- this includes hadoop 1.0.0 -- have a working, durable sync
|
||||
<sup>[<a name="d2121e491" href="#ftn.d2121e491" class="footnote">6</a>]</sup>. Sync has to be explicitly enabled by setting
|
||||
<code class="varname">dfs.support.append</code> equal
|
||||
to true on both the client side -- in <code class="filename">hbase-site.xml</code>
|
||||
-- and on the serverside in <code class="filename">hdfs-site.xml</code> (The sync
|
||||
facility HBase needs is a subset of the append code path).
|
||||
</p><pre class="programlisting">
|
||||
<property>
|
||||
<name>dfs.support.append</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
</pre><p>
|
||||
You will have to restart your cluster after making this edit. Ignore the chicken-little
|
||||
comment you'll find in the <code class="filename">hdfs-default.xml</code> in the
|
||||
description for the <code class="varname">dfs.support.append</code> configuration; it says it is not enabled because there
|
||||
are <span class="quote">“<span class="quote">... bugs in the 'append code' and is not supported in any production
|
||||
cluster.</span>”</span>. This comment is stale, from another era, and while I'm sure there
|
||||
are bugs, the sync/append code has been running
|
||||
in production at large scale deploys and is on
|
||||
by default in the offerings of hadoop by commercial vendors
|
||||
<sup>[<a name="d2121e519" href="#ftn.d2121e519" class="footnote">7</a>]</sup>
|
||||
<sup>[<a name="d2121e529" href="#ftn.d2121e529" class="footnote">8</a>]</sup><sup>[<a name="d2121e535" href="#ftn.d2121e535" class="footnote">9</a>]</sup>.
|
||||
Please use the most up-to-date Hadoop possible.</p><div class="note" title="Apache HBase 0.96.0 requires Apache Hadoop 1.0.0 at a minimum" style="margin-left: 0.5in; margin-right: 0.5in;"><h3 class="title">Apache HBase 0.96.0 requires Apache Hadoop 1.0.0 at a minimum</h3><p>As of Apache HBase 0.96.x, Apache Hadoop 1.0.x at least is required. We will no
|
||||
longer run properly on older Hadoops such as <code class="filename">0.20.205</code> or <code class="filename">branch-0.20-append</code>.
|
||||
Do not move to Apache HBase 0.96.x if you cannot upgrade your Hadoop<sup>[<a name="d2121e552" href="#ftn.d2121e552" class="footnote">10</a>]</sup>.</p><p>Apache HBase 0.96.0 runs on Apache Hadoop 2.0.
|
||||
</p></div><p>Or use the
|
||||
<a class="link" href="http://www.cloudera.com/" target="_top">Cloudera</a> or
|
||||
<a class="link" href="http://www.mapr.com/" target="_top">MapR</a> distributions.
|
||||
Cloudera' <a class="link" href="http://archive.cloudera.com/docs/" target="_top">CDH3</a>
|
||||
is Apache Hadoop 0.20.x plus patches including all of the
|
||||
<a class="link" href="http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-append/" target="_top">branch-0.20-append</a>
|
||||
additions needed to add a durable sync. Use the released, most recent version of CDH3. In CDH, append
|
||||
support is enabled by default so you do not need to make the above mentioned edits to
|
||||
<code class="filename">hdfs-site.xml</code> or to <code class="filename">hbase-site.xml</code>.</p><p>
|
||||
<a class="link" href="http://www.mapr.com/" target="_top">MapR</a>
|
||||
includes a commercial, reimplementation of HDFS.
|
||||
It has a durable sync as well as some other interesting features that are not
|
||||
yet in Apache Hadoop. Their <a class="link" href="http://www.mapr.com/products/mapr-editions/m3-edition" target="_top">M3</a>
|
||||
product is free to use and unlimited.
|
||||
</p><p>Because HBase depends on Hadoop, it bundles an instance of the
|
||||
Hadoop jar under its <code class="filename">lib</code> directory. The bundled jar is ONLY for use in standalone mode.
|
||||
In distributed mode, it is <span class="emphasis"><em>critical</em></span> that the version of Hadoop that is out
|
||||
on your cluster match what is under HBase. Replace the hadoop jar found in the HBase
|
||||
<code class="filename">lib</code> directory with the hadoop jar you are running on
|
||||
your cluster to avoid version mismatch issues. Make sure you
|
||||
replace the jar in HBase everywhere on your cluster. Hadoop version
|
||||
mismatch issues have various manifestations but often all looks like
|
||||
its hung up.</p><div class="note" title="Packaging and Apache BigTop" style="margin-left: 0.5in; margin-right: 0.5in;"><h3 class="title"><a name="bigtop"></a>Packaging and Apache BigTop</h3><p><a class="link" href="http://bigtop.apache.org" target="_top">Apache Bigtop</a>
|
||||
is an umbrella for packaging and tests of the Apache Hadoop
|
||||
ecosystem, including Apache HBase. Bigtop performs testing at various
|
||||
levels (packaging, platform, runtime, upgrade, etc...), developed by a
|
||||
community, with a focus on the system as a whole, rather than individual
|
||||
projects. We recommend installing Apache HBase packages as provided by a
|
||||
Bigtop release rather than rolling your own piecemeal integration of
|
||||
various component releases.</p></div><div class="section" title="2.1.3.1. Apache HBase on Secure Hadoop"><div class="titlepage"><div><div><h4 class="title"><a name="hadoop.security"></a>2.1.3.1. Apache HBase on Secure Hadoop</h4></div></div></div><p>Apache HBase will run on any Hadoop 0.20.x that incorporates Hadoop
|
||||
security features -- e.g. Y! 0.20S or CDH3B3 -- as long as you do as
|
||||
suggested above and replace the Hadoop jar that ships with HBase
|
||||
with the secure version. If you want to read more about how to setup
|
||||
Secure HBase, see <a class="xref" href="security.html#hbase.secure.configuration" title="8.1. Secure Client Access to Apache HBase">Section 8.1, “Secure Client Access to Apache HBase”</a>.</p></div><div class="section" title="2.1.3.2. dfs.datanode.max.xcievers"><div class="titlepage"><div><div><h4 class="title"><a name="dfs.datanode.max.xcievers"></a>2.1.3.2. <code class="varname">dfs.datanode.max.xcievers</code><a class="indexterm" name="d2121e617"></a></h4></div></div></div><p>An Hadoop HDFS datanode has an upper bound on the number of
|
||||
files that it will serve at any one time. The upper bound parameter is
|
||||
called <code class="varname">xcievers</code> (yes, this is misspelled). Again,
|
||||
before doing any loading, make sure you have configured Hadoop's
|
||||
<code class="filename">conf/hdfs-site.xml</code> setting the
|
||||
<code class="varname">xceivers</code> value to at least the following:
|
||||
</p><pre class="programlisting">
|
||||
<property>
|
||||
<name>dfs.datanode.max.xcievers</name>
|
||||
<value>4096</value>
|
||||
</property>
|
||||
</pre><p>Be sure to restart your HDFS after making the above
|
||||
configuration.</p><p>Not having this configuration in place makes for strange looking
|
||||
failures. Eventually you'll see a complain in the datanode logs
|
||||
complaining about the xcievers exceeded, but on the run up to this one
|
||||
manifestation is complaint about missing blocks. For example:
|
||||
<code class="code">10/12/08 20:10:31 INFO hdfs.DFSClient: Could not obtain block
|
||||
blk_XXXXXXXXXXXXXXXXXXXXXX_YYYYYYYY from any node:
|
||||
java.io.IOException: No live nodes contain current block. Will get new
|
||||
block locations from namenode and retry...</code>
|
||||
<sup>[<a name="d2121e640" href="#ftn.d2121e640" class="footnote">11</a>]</sup></p><p>See also <a class="xref" href="casestudies.perftroub.html#casestudies.xceivers" title="13.3.4. Case Study #4 (xcievers Config)">Section 13.3.4, “Case Study #4 (xcievers Config)”</a>
|
||||
</p></div></div></div><div class="footnotes"><br><hr width="100" align="left"><div class="footnote"><p><sup>[<a id="ftn.d2121e285" href="#d2121e285" class="para">1</a>] </sup>
|
||||
Be careful editing XML. Make sure you close all elements.
|
||||
Run your file through <span class="command"><strong>xmllint</strong></span> or similar
|
||||
to ensure well-formedness of your document after an edit session.
|
||||
</p></div><div class="footnote"><p><sup>[<a id="ftn.d2121e334" href="#d2121e334" class="para">2</a>] </sup>The <a class="link" href="https://github.com/sujee/hadoop-dns-checker" target="_top">hadoop-dns-checker</a> tool can be used to verify
|
||||
DNS is working correctly on the cluster. The project README file provides detailed instructions on usage.
|
||||
</p></div><div class="footnote"><p><sup>[<a id="ftn.d2121e399" href="#d2121e399" class="para">3</a>] </sup>See Jack Levin's <a class="link" href="" target="_top">major hdfs issues</a>
|
||||
note up on the user list.</p></div><div class="footnote"><p><sup>[<a id="ftn.d2121e406" href="#d2121e406" class="para">4</a>] </sup>The requirement that a database requires upping of system limits
|
||||
is not peculiar to Apache HBase. See for example the section
|
||||
<span class="emphasis"><em>Setting Shell Limits for the Oracle User</em></span> in
|
||||
<a class="link" href="http://www.akadia.com/services/ora_linux_install_10g.html" target="_top">
|
||||
Short Guide to install Oracle 10 on Linux</a>.</p></div><div class="footnote"><p><sup>[<a id="ftn.d2121e418" href="#d2121e418" class="para">5</a>] </sup>A useful read setting config on you hadoop cluster is Aaron
|
||||
Kimballs' Configuration
|
||||
Parameters: What can you just ignore?</p></div><div class="footnote"><p><sup>[<a id="ftn.d2121e491" href="#d2121e491" class="para">6</a>] </sup>The Cloudera blog post <a class="link" href="http://www.cloudera.com/blog/2012/01/an-update-on-apache-hadoop-1-0/" target="_top">An update on Apache Hadoop 1.0</a>
|
||||
by Charles Zedlweski has a nice exposition on how all the Hadoop versions relate.
|
||||
Its worth checking out if you are having trouble making sense of the
|
||||
Hadoop version morass.
|
||||
</p></div><div class="footnote"><p><sup>[<a id="ftn.d2121e519" href="#d2121e519" class="para">7</a>] </sup>Until recently only the
|
||||
<a class="link" href="http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-append/" target="_top">branch-0.20-append</a>
|
||||
branch had a working sync but no official release was ever made from this branch.
|
||||
You had to build it yourself. Michael Noll wrote a detailed blog,
|
||||
<a class="link" href="http://www.michael-noll.com/blog/2011/04/14/building-an-hadoop-0-20-x-version-for-hbase-0-90-2/" target="_top">Building
|
||||
an Hadoop 0.20.x version for Apache HBase 0.90.2</a>, on how to build an
|
||||
Hadoop from branch-0.20-append. Recommended.</p></div><div class="footnote"><p><sup>[<a id="ftn.d2121e529" href="#d2121e529" class="para">8</a>] </sup>Praveen Kumar has written
|
||||
a complimentary article,
|
||||
<a class="link" href="http://praveen.kumar.in/2011/06/20/building-hadoop-and-hbase-for-hbase-maven-application-development/" target="_top">Building Hadoop and HBase for HBase Maven application development</a>.
|
||||
</p></div><div class="footnote"><code class="varname"><sup>[<a name="ftn.d2121e535" href="#d2121e535" class="varname">9</a>] </sup>dfs.support.append</code></div><div class="footnote"><p><sup>[<a id="ftn.d2121e552" href="#d2121e552" class="para">10</a>] </sup>See <a class="link" href="http://search-hadoop.com/m/7vFVx4EsUb2" target="_top">HBase, mail # dev - DISCUSS: Have hbase require at least hadoop 1.0.0 in hbase 0.96.0?</a></p></div><div class="footnote"><p><sup>[<a id="ftn.d2121e640" href="#d2121e640" class="para">11</a>] </sup>See <a class="link" href="http://ccgtech.blogspot.com/2010/02/hadoop-hdfs-deceived-by-xciever.html" target="_top">Hadoop HDFS: Deceived by Xciever</a> for an informative rant on xceivering.</p></div></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'configuration';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="quickstart.html">Prev</a> </td><td width="20%" align="center"> </td><td width="40%" align="right"> <a accesskey="n" href="standalone_dist.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">1.2. Quick Start </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> 2.2. HBase run modes: Standalone and Distributed</td></tr></table></div></body></html>
|
|
@ -1,17 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>6.12. Constraints</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="schema.html" title="Chapter 6. HBase and Schema Design"><link rel="prev" href="schema.ops.html" title="6.11. Operational and Performance Configuration Options"><link rel="next" href="mapreduce.html" title="Chapter 7. HBase and MapReduce"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">6.12. Constraints</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="schema.ops.html">Prev</a> </td><th width="60%" align="center">Chapter 6. HBase and Schema Design</th><td width="20%" align="right"> <a accesskey="n" href="mapreduce.html">Next</a></td></tr></table><hr></div><div class="section" title="6.12. Constraints"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="constraints"></a>6.12. Constraints</h2></div></div></div><p>HBase currently supports 'constraints' in traditional (SQL) database parlance. The advised usage for Constraints is in enforcing business rules for attributes in the table (eg. make sure values are in the range 1-10).
|
||||
Constraints could also be used to enforce referential integrity, but this is strongly discouraged as it will dramatically decrease the write throughput of the tables where integrity checking is enabled.
|
||||
Extensive documentation on using Constraints can be found at: <a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/constraint" target="_top">Constraint</a> since version 0.94.
|
||||
</p></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'constraints';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="schema.ops.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="schema.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="mapreduce.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">6.11. Operational and Performance Configuration Options </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> Chapter 7. HBase and MapReduce</td></tr></table></div></body></html>
|
|
@ -1,54 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>5.7. Data Model Operations</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="datamodel.html" title="Chapter 5. Data Model"><link rel="prev" href="cells.html" title="5.6. Cells"><link rel="next" href="versions.html" title="5.8. Versions"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">5.7. Data Model Operations</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="cells.html">Prev</a> </td><th width="60%" align="center">Chapter 5. Data Model</th><td width="20%" align="right"> <a accesskey="n" href="versions.html">Next</a></td></tr></table><hr></div><div class="section" title="5.7. Data Model Operations"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="data_model_operations"></a>5.7. Data Model Operations</h2></div></div></div><p>The four primary data model operations are Get, Put, Scan, and Delete. Operations are applied via
|
||||
<a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/HTable.html" target="_top">HTable</a> instances.
|
||||
</p><div class="section" title="5.7.1. Get"><div class="titlepage"><div><div><h3 class="title"><a name="get"></a>5.7.1. Get</h3></div></div></div><p><a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Get.html" target="_top">Get</a> returns
|
||||
attributes for a specified row. Gets are executed via
|
||||
<a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/HTable.html#get%28org.apache.hadoop.hbase.client.Get%29" target="_top">
|
||||
HTable.get</a>.
|
||||
</p></div><div class="section" title="5.7.2. Put"><div class="titlepage"><div><div><h3 class="title"><a name="put"></a>5.7.2. Put</h3></div></div></div><p><a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Put.html" target="_top">Put</a> either
|
||||
adds new rows to a table (if the key is new) or can update existing rows (if the key already exists). Puts are executed via
|
||||
<a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/HTable.html#put%28org.apache.hadoop.hbase.client.Put%29" target="_top">
|
||||
HTable.put</a> (writeBuffer) or <a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/HTable.html#batch%28java.util.List%29" target="_top">
|
||||
HTable.batch</a> (non-writeBuffer).
|
||||
</p></div><div class="section" title="5.7.3. Scans"><div class="titlepage"><div><div><h3 class="title"><a name="scan"></a>5.7.3. Scans</h3></div></div></div><p><a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Scan.html" target="_top">Scan</a> allow
|
||||
iteration over multiple rows for specified attributes.
|
||||
</p><p>The following is an example of a
|
||||
on an HTable table instance. Assume that a table is populated with rows with keys "row1", "row2", "row3",
|
||||
and then another set of rows with the keys "abc1", "abc2", and "abc3". The following example shows how startRow and stopRow
|
||||
can be applied to a Scan instance to return the rows beginning with "row".
|
||||
</p><pre class="programlisting">
|
||||
HTable htable = ... // instantiate HTable
|
||||
|
||||
Scan scan = new Scan();
|
||||
scan.addColumn(Bytes.toBytes("cf"),Bytes.toBytes("attr"));
|
||||
scan.setStartRow( Bytes.toBytes("row")); // start key is inclusive
|
||||
scan.setStopRow( Bytes.toBytes("row" + (char)0)); // stop key is exclusive
|
||||
ResultScanner rs = htable.getScanner(scan);
|
||||
try {
|
||||
for (Result r = rs.next(); r != null; r = rs.next()) {
|
||||
// process result...
|
||||
} finally {
|
||||
rs.close(); // always close the ResultScanner!
|
||||
}
|
||||
</pre><p>
|
||||
</p></div><div class="section" title="5.7.4. Delete"><div class="titlepage"><div><div><h3 class="title"><a name="delete"></a>5.7.4. Delete</h3></div></div></div><p><a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Delete.html" target="_top">Delete</a> removes
|
||||
a row from a table. Deletes are executed via
|
||||
<a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/HTable.html#delete%28org.apache.hadoop.hbase.client.Delete%29" target="_top">
|
||||
HTable.delete</a>.
|
||||
</p><p>HBase does not modify data in place, and so deletes are handled by creating new markers called <span class="emphasis"><em>tombstones</em></span>.
|
||||
These tombstones, along with the dead values, are cleaned up on major compactions.
|
||||
</p><p>See <a class="xref" href="versions.html#version.delete" title="5.8.1.5. Delete">Section 5.8.1.5, “Delete”</a> for more information on deleting versions of columns, and see
|
||||
<a class="xref" href="regions.arch.html#compaction" title="9.7.5.5. Compaction">Section 9.7.5.5, “Compaction”</a> for more information on compactions.
|
||||
</p></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'data_model_operations';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="cells.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="datamodel.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="versions.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">5.6. Cells </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> 5.8. Versions</td></tr></table></div></body></html>
|
|
@ -1,42 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>Chapter 5. Data Model</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="prev" href="shell_tricks.html" title="4.2. Shell Tricks"><link rel="next" href="physical.view.html" title="5.2. Physical View"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">Chapter 5. Data Model</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="shell_tricks.html">Prev</a> </td><th width="60%" align="center"> </th><td width="20%" align="right"> <a accesskey="n" href="physical.view.html">Next</a></td></tr></table><hr></div><div class="chapter" title="Chapter 5. Data Model"><div class="titlepage"><div><div><h2 class="title"><a name="datamodel"></a>Chapter 5. Data Model</h2></div></div></div><div class="toc"><p><b>Table of Contents</b></p><dl><dt><span class="section"><a href="datamodel.html#conceptual.view">5.1. Conceptual View</a></span></dt><dt><span class="section"><a href="physical.view.html">5.2. Physical View</a></span></dt><dt><span class="section"><a href="table.html">5.3. Table</a></span></dt><dt><span class="section"><a href="row.html">5.4. Row</a></span></dt><dt><span class="section"><a href="columnfamily.html">5.5. Column Family</a></span></dt><dt><span class="section"><a href="cells.html">5.6. Cells</a></span></dt><dt><span class="section"><a href="data_model_operations.html">5.7. Data Model Operations</a></span></dt><dd><dl><dt><span class="section"><a href="data_model_operations.html#get">5.7.1. Get</a></span></dt><dt><span class="section"><a href="data_model_operations.html#put">5.7.2. Put</a></span></dt><dt><span class="section"><a href="data_model_operations.html#scan">5.7.3. Scans</a></span></dt><dt><span class="section"><a href="data_model_operations.html#delete">5.7.4. Delete</a></span></dt></dl></dd><dt><span class="section"><a href="versions.html">5.8. Versions</a></span></dt><dd><dl><dt><span class="section"><a href="versions.html#versions.ops">5.8.1. Versions and HBase Operations</a></span></dt><dt><span class="section"><a href="versions.html#d2121e3443">5.8.2. Current Limitations</a></span></dt></dl></dd><dt><span class="section"><a href="dm.sort.html">5.9. Sort Order</a></span></dt><dt><span class="section"><a href="dm.column.metadata.html">5.10. Column Metadata</a></span></dt><dt><span class="section"><a href="joins.html">5.11. Joins</a></span></dt></dl></div><p>In short, applications store data into an HBase table.
|
||||
Tables are made of rows and columns.
|
||||
All columns in HBase belong to a particular column family.
|
||||
Table cells -- the intersection of row and column
|
||||
coordinates -- are versioned.
|
||||
A cell’s content is an uninterpreted array of bytes.
|
||||
</p><p>Table row keys are also byte arrays so almost anything can
|
||||
serve as a row key from strings to binary representations of longs or
|
||||
even serialized data structures. Rows in HBase tables
|
||||
are sorted by row key. The sort is byte-ordered. All table accesses are
|
||||
via the table row key -- its primary key.
|
||||
</p><div class="section" title="5.1. Conceptual View"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="conceptual.view"></a>5.1. Conceptual View</h2></div></div></div><p>
|
||||
The following example is a slightly modified form of the one on page
|
||||
2 of the <a class="link" href="http://research.google.com/archive/bigtable.html" target="_top">BigTable</a> paper.
|
||||
There is a table called <code class="varname">webtable</code> that contains two column families named
|
||||
<code class="varname">contents</code> and <code class="varname">anchor</code>.
|
||||
In this example, <code class="varname">anchor</code> contains two
|
||||
columns (<code class="varname">anchor:cssnsi.com</code>, <code class="varname">anchor:my.look.ca</code>)
|
||||
and <code class="varname">contents</code> contains one column (<code class="varname">contents:html</code>).
|
||||
</p><div class="note" title="Column Names" style="margin-left: 0.5in; margin-right: 0.5in;"><h3 class="title">Column Names</h3><p>
|
||||
By convention, a column name is made of its column family prefix and a
|
||||
<span class="emphasis"><em>qualifier</em></span>. For example, the
|
||||
column
|
||||
<span class="emphasis"><em>contents:html</em></span> is of the column family <code class="varname">contents</code>
|
||||
The colon character (<code class="literal">:</code>) delimits the column family from the
|
||||
column family <span class="emphasis"><em>qualifier</em></span>.
|
||||
</p></div><p>
|
||||
</p><div class="table"><a name="d2121e2922"></a><p class="title"><b>Table 5.1. Table <code class="varname">webtable</code></b></p><div class="table-contents"><table summary="Table webtable" border="1"><colgroup><col align="left" class="c1"><col align="left" class="c2"><col align="left" class="c3"><col align="left" class="c4"></colgroup><thead><tr><th align="left">Row Key</th><th align="left">Time Stamp</th><th align="left">ColumnFamily <code class="varname">contents</code></th><th align="left">ColumnFamily <code class="varname">anchor</code></th></tr></thead><tbody><tr><td align="left">"com.cnn.www"</td><td align="left">t9</td><td align="left"> </td><td align="left"><code class="varname">anchor:cnnsi.com</code> = "CNN"</td></tr><tr><td align="left">"com.cnn.www"</td><td align="left">t8</td><td align="left"> </td><td align="left"><code class="varname">anchor:my.look.ca</code> = "CNN.com"</td></tr><tr><td align="left">"com.cnn.www"</td><td align="left">t6</td><td align="left"><code class="varname">contents:html</code> = "<html>..."</td><td align="left"> </td></tr><tr><td align="left">"com.cnn.www"</td><td align="left">t5</td><td align="left"><code class="varname">contents:html</code> = "<html>..."</td><td align="left"> </td></tr><tr><td align="left">"com.cnn.www"</td><td align="left">t3</td><td align="left"><code class="varname">contents:html</code> = "<html>..."</td><td align="left"> </td></tr></tbody></table></div></div><p><br class="table-break">
|
||||
</p></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'datamodel';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="shell_tricks.html">Prev</a> </td><td width="20%" align="center"> </td><td width="40%" align="right"> <a accesskey="n" href="physical.view.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">4.2. Shell Tricks </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> 5.2. Physical View</td></tr></table></div></body></html>
|
File diff suppressed because one or more lines are too long
|
@ -1,53 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>15.9. Developing</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="developer.html" title="Chapter 15. Building and Developing Apache HBase (TM)"><link rel="prev" href="getting.involved.html" title="15.8. Getting Involved"><link rel="next" href="submitting.patches.html" title="15.10. Submitting Patches"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">15.9. Developing</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="getting.involved.html">Prev</a> </td><th width="60%" align="center">Chapter 15. Building and Developing Apache HBase (TM)</th><td width="20%" align="right"> <a accesskey="n" href="submitting.patches.html">Next</a></td></tr></table><hr></div><div class="section" title="15.9. Developing"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="developing"></a>15.9. Developing</h2></div></div></div><div class="section" title="15.9.1. Codelines"><div class="titlepage"><div><div><h3 class="title"><a name="codelines"></a>15.9.1. Codelines</h3></div></div></div><p>Most development is done on TRUNK. However, there are branches for minor releases (e.g., 0.90.1, 0.90.2, and 0.90.3 are on the 0.90 branch).</p><p>If you have any questions on this just send an email to the dev dist-list.</p></div><div class="section" title="15.9.2. Unit Tests"><div class="titlepage"><div><div><h3 class="title"><a name="unit.tests"></a>15.9.2. Unit Tests</h3></div></div></div><p>In HBase we use <a class="link" href="http://junit.org" target="_top">JUnit</a> 4.
|
||||
If you need to run miniclusters of HDFS, ZooKeeper, HBase, or MapReduce testing,
|
||||
be sure to checkout the <code class="classname">HBaseTestingUtility</code>.
|
||||
Alex Baranau of Sematext describes how it can be used in
|
||||
<a class="link" href="http://blog.sematext.com/2010/08/30/hbase-case-study-using-hbasetestingutility-for-local-testing-development/" target="_top">HBase Case-Study: Using HBaseTestingUtility for Local Testing and Development</a> (2010).
|
||||
</p><div class="section" title="15.9.2.1. Mockito"><div class="titlepage"><div><div><h4 class="title"><a name="mockito"></a>15.9.2.1. Mockito</h4></div></div></div><p>Sometimes you don't need a full running server
|
||||
unit testing. For example, some methods can make do with a
|
||||
a <code class="classname">org.apache.hadoop.hbase.Server</code> instance
|
||||
or a <code class="classname">org.apache.hadoop.hbase.master.MasterServices</code>
|
||||
Interface reference rather than a full-blown
|
||||
<code class="classname">org.apache.hadoop.hbase.master.HMaster</code>.
|
||||
In these cases, you maybe able to get away with a mocked
|
||||
<code class="classname">Server</code> instance. For example:
|
||||
</p><pre class="programlisting">
|
||||
TODO...
|
||||
</pre><p>
|
||||
</p></div></div><div class="section" title="15.9.3. Code Standards"><div class="titlepage"><div><div><h3 class="title"><a name="code.standards"></a>15.9.3. Code Standards</h3></div></div></div><p>See <a class="xref" href="ides.html#eclipse.code.formatting" title="15.2.1.1. Code Formatting">Section 15.2.1.1, “Code Formatting”</a> and <a class="xref" href="submitting.patches.html#common.patch.feedback" title="15.10.5. Common Patch Feedback">Section 15.10.5, “Common Patch Feedback”</a>.
|
||||
</p><p>Also, please pay attention to the interface stability/audience classifications that you
|
||||
will see all over our code base. They look like this at the head of the class:
|
||||
</p><pre class="programlisting">@InterfaceAudience.Public
|
||||
@InterfaceStability.Stable</pre><p>
|
||||
</p><p>If the <code class="classname">InterfaceAudience</code> is <code class="varname">Private</code>,
|
||||
we can change the class (and we do not need to include a <code class="classname">InterfaceStability</code> mark).
|
||||
If a class is marked <code class="varname">Public</code> but its <code class="classname">InterfaceStability</code>
|
||||
is marked <code class="varname">Unstable</code>, we can change it. If it's
|
||||
marked <code class="varname">Public</code>/<code class="varname">Evolving</code>, we're allowed to change it
|
||||
but should try not to. If it's <code class="varname">Public</code> and <code class="varname">Stable</code>
|
||||
we can't change it without a deprecation path or with a really GREAT reason.</p><p>When you add new classes, mark them with the annotations above if publically accessible.
|
||||
If you are not cleared on how to mark your additions, ask up on the dev list.
|
||||
</p><p>This convention comes from our parent project Hadoop.</p></div><div class="section" title="15.9.4. Invariants"><div class="titlepage"><div><div><h3 class="title"><a name="design.invariants"></a>15.9.4. Invariants</h3></div></div></div><p>We don't have many but what we have we list below. All are subject to challenge of
|
||||
course but until then, please hold to the rules of the road.
|
||||
</p><div class="section" title="15.9.4.1. No permanent state in ZooKeeper"><div class="titlepage"><div><div><h4 class="title"><a name="design.invariants.zk.data"></a>15.9.4.1. No permanent state in ZooKeeper</h4></div></div></div><p>ZooKeeper state should transient (treat it like memory). If deleted, hbase
|
||||
should be able to recover and essentially be in the same state<sup>[<a name="d2121e10441" href="#ftn.d2121e10441" class="footnote">32</a>]</sup>.
|
||||
</p></div></div><div class="section" title="15.9.5. Running In-Situ"><div class="titlepage"><div><div><h3 class="title"><a name="run.insitu"></a>15.9.5. Running In-Situ</h3></div></div></div><p>If you are developing Apache HBase, frequently it is useful to test your changes against a more-real cluster than what you find in unit tests. In this case, HBase can be run directly from the source in local-mode.
|
||||
All you need to do is run:
|
||||
</p><pre class="programlisting">${HBASE_HOME}/bin/start-hbase.sh</pre><p>
|
||||
This will spin up a full local-cluster, just as if you had packaged up HBase and installed it on your machine.
|
||||
</p><p>Keep in mind that you will need to have installed HBase into your local maven repository for the in-situ cluster to work properly. That is, you will need to run:</p><pre class="programlisting">mvn clean install -DskipTests</pre><p>to ensure that maven can find the correct classpath and dependencies. Generally, the above command
|
||||
is just a good thing to try running first, if maven is acting oddly.</p></div><div class="footnotes"><br><hr width="100" align="left"><div class="footnote"><p><sup>[<a id="ftn.d2121e10441" href="#d2121e10441" class="para">32</a>] </sup>There are currently
|
||||
a few exceptions that we need to fix around whether a table is enabled or disabled</p></div></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'developing';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="getting.involved.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="developer.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="submitting.patches.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">15.8. Getting Involved </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> 15.10. Submitting Patches</td></tr></table></div></body></html>
|
|
@ -1,19 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>5.10. Column Metadata</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="datamodel.html" title="Chapter 5. Data Model"><link rel="prev" href="dm.sort.html" title="5.9. Sort Order"><link rel="next" href="joins.html" title="5.11. Joins"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">5.10. Column Metadata</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="dm.sort.html">Prev</a> </td><th width="60%" align="center">Chapter 5. Data Model</th><td width="20%" align="right"> <a accesskey="n" href="joins.html">Next</a></td></tr></table><hr></div><div class="section" title="5.10. Column Metadata"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="dm.column.metadata"></a>5.10. Column Metadata</h2></div></div></div><p>There is no store of column metadata outside of the internal KeyValue instances for a ColumnFamily.
|
||||
Thus, while HBase can support not only a wide number of columns per row, but a heterogenous set of columns
|
||||
between rows as well, it is your responsibility to keep track of the column names.
|
||||
</p><p>The only way to get a complete set of columns that exist for a ColumnFamily is to process all the rows.
|
||||
For more information about how HBase stores data internally, see <a class="xref" href="regions.arch.html#keyvalue" title="9.7.5.4. KeyValue">Section 9.7.5.4, “KeyValue”</a>.
|
||||
</p></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'dm.column.metadata';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="dm.sort.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="datamodel.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="joins.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">5.9. Sort Order </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> 5.11. Joins</td></tr></table></div></body></html>
|
|
@ -1,17 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>5.9. Sort Order</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="datamodel.html" title="Chapter 5. Data Model"><link rel="prev" href="versions.html" title="5.8. Versions"><link rel="next" href="dm.column.metadata.html" title="5.10. Column Metadata"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">5.9. Sort Order</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="versions.html">Prev</a> </td><th width="60%" align="center">Chapter 5. Data Model</th><td width="20%" align="right"> <a accesskey="n" href="dm.column.metadata.html">Next</a></td></tr></table><hr></div><div class="section" title="5.9. Sort Order"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="dm.sort"></a>5.9. Sort Order</h2></div></div></div><p>All data model operations HBase return data in sorted order. First by row,
|
||||
then by ColumnFamily, followed by column qualifier, and finally timestamp (sorted
|
||||
in reverse, so newest records are returned first).
|
||||
</p></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'dm.sort';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="versions.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="datamodel.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="dm.column.metadata.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">5.8. Versions </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> 5.10. Column Metadata</td></tr></table></div></body></html>
|
|
@ -1,96 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>2.4. Example Configurations</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="configuration.html" title="Chapter 2. Apache HBase (TM) Configuration"><link rel="prev" href="config.files.html" title="2.3. Configuration Files"><link rel="next" href="important_configurations.html" title="2.5. The Important Configurations"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">2.4. Example Configurations</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="config.files.html">Prev</a> </td><th width="60%" align="center">Chapter 2. Apache HBase (TM) Configuration</th><td width="20%" align="right"> <a accesskey="n" href="important_configurations.html">Next</a></td></tr></table><hr></div><div class="section" title="2.4. Example Configurations"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="example_config"></a>2.4. Example Configurations</h2></div></div></div><div class="section" title="2.4.1. Basic Distributed HBase Install"><div class="titlepage"><div><div><h3 class="title"><a name="d2121e2235"></a>2.4.1. Basic Distributed HBase Install</h3></div></div></div><p>Here is an example basic configuration for a distributed ten
|
||||
node cluster. The nodes are named <code class="varname">example0</code>,
|
||||
<code class="varname">example1</code>, etc., through node
|
||||
<code class="varname">example9</code> in this example. The HBase Master and the
|
||||
HDFS namenode are running on the node <code class="varname">example0</code>.
|
||||
RegionServers run on nodes
|
||||
<code class="varname">example1</code>-<code class="varname">example9</code>. A 3-node
|
||||
ZooKeeper ensemble runs on <code class="varname">example1</code>,
|
||||
<code class="varname">example2</code>, and <code class="varname">example3</code> on the
|
||||
default ports. ZooKeeper data is persisted to the directory
|
||||
<code class="filename">/export/zookeeper</code>. Below we show what the main
|
||||
configuration files -- <code class="filename">hbase-site.xml</code>,
|
||||
<code class="filename">regionservers</code>, and
|
||||
<code class="filename">hbase-env.sh</code> -- found in the HBase
|
||||
<code class="filename">conf</code> directory might look like.</p><div class="section" title="2.4.1.1. hbase-site.xml"><div class="titlepage"><div><div><h4 class="title"><a name="hbase_site"></a>2.4.1.1. <code class="filename">hbase-site.xml</code></h4></div></div></div><pre class="programlisting">
|
||||
|
||||
<?xml version="1.0"?>
|
||||
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||
<configuration>
|
||||
<property>
|
||||
<name>hbase.zookeeper.quorum</name>
|
||||
<value>example1,example2,example3</value>
|
||||
<description>The directory shared by RegionServers.
|
||||
</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>hbase.zookeeper.property.dataDir</name>
|
||||
<value>/export/zookeeper</value>
|
||||
<description>Property from ZooKeeper's config zoo.cfg.
|
||||
The directory where the snapshot is stored.
|
||||
</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>hbase.rootdir</name>
|
||||
<value>hdfs://example0:8020/hbase</value>
|
||||
<description>The directory shared by RegionServers.
|
||||
</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>hbase.cluster.distributed</name>
|
||||
<value>true</value>
|
||||
<description>The mode the cluster will be in. Possible values are
|
||||
false: standalone and pseudo-distributed setups with managed Zookeeper
|
||||
true: fully-distributed with unmanaged Zookeeper Quorum (see hbase-env.sh)
|
||||
</description>
|
||||
</property>
|
||||
</configuration>
|
||||
|
||||
</pre></div><div class="section" title="2.4.1.2. regionservers"><div class="titlepage"><div><div><h4 class="title"><a name="regionservers"></a>2.4.1.2. <code class="filename">regionservers</code></h4></div></div></div><p>In this file you list the nodes that will run RegionServers.
|
||||
In our case we run RegionServers on all but the head node
|
||||
<code class="varname">example1</code> which is carrying the HBase Master and
|
||||
the HDFS namenode</p><pre class="programlisting">
|
||||
example1
|
||||
example3
|
||||
example4
|
||||
example5
|
||||
example6
|
||||
example7
|
||||
example8
|
||||
example9
|
||||
</pre></div><div class="section" title="2.4.1.3. hbase-env.sh"><div class="titlepage"><div><div><h4 class="title"><a name="hbase_env"></a>2.4.1.3. <code class="filename">hbase-env.sh</code></h4></div></div></div><p>Below we use a <span class="command"><strong>diff</strong></span> to show the differences
|
||||
from default in the <code class="filename">hbase-env.sh</code> file. Here we
|
||||
are setting the HBase heap to be 4G instead of the default
|
||||
1G.</p><pre class="programlisting">
|
||||
|
||||
$ git diff hbase-env.sh
|
||||
diff --git a/conf/hbase-env.sh b/conf/hbase-env.sh
|
||||
index e70ebc6..96f8c27 100644
|
||||
--- a/conf/hbase-env.sh
|
||||
+++ b/conf/hbase-env.sh
|
||||
@@ -31,7 +31,7 @@ export JAVA_HOME=/usr/lib//jvm/java-6-sun/
|
||||
# export HBASE_CLASSPATH=
|
||||
|
||||
# The maximum amount of heap to use, in MB. Default is 1000.
|
||||
-# export HBASE_HEAPSIZE=1000
|
||||
+export HBASE_HEAPSIZE=4096
|
||||
|
||||
# Extra Java runtime options.
|
||||
# Below are what we set by default. May only work with SUN JVM.
|
||||
|
||||
</pre><p>Use <span class="command"><strong>rsync</strong></span> to copy the content of the
|
||||
<code class="filename">conf</code> directory to all nodes of the
|
||||
cluster.</p></div></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'example_config';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="config.files.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="configuration.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="important_configurations.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">2.3. Configuration Files </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> 2.5. The Important Configurations</td></tr></table></div></body></html>
|
|
@ -1,20 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>Chapter 10. Apache HBase (TM) External APIs</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="prev" href="arch.hdfs.html" title="9.9. HDFS"><link rel="next" href="rest.html" title="10.2. REST"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">Chapter 10. Apache HBase (TM) External APIs</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="arch.hdfs.html">Prev</a> </td><th width="60%" align="center"> </th><td width="20%" align="right"> <a accesskey="n" href="rest.html">Next</a></td></tr></table><hr></div><div class="chapter" title="Chapter 10. Apache HBase (TM) External APIs"><div class="titlepage"><div><div><h2 class="title"><a name="external_apis"></a>Chapter 10. Apache HBase (TM) External APIs</h2></div></div></div><div class="toc"><p><b>Table of Contents</b></p><dl><dt><span class="section"><a href="external_apis.html#nonjava.jvm">10.1. Non-Java Languages Talking to the JVM</a></span></dt><dt><span class="section"><a href="rest.html">10.2. REST</a></span></dt><dt><span class="section"><a href="thrift.html">10.3. Thrift</a></span></dt><dd><dl><dt><span class="section"><a href="thrift.html#thrift.filter-language">10.3.1. Filter Language</a></span></dt></dl></dd><dt><span class="section"><a href="c.html">10.4. C/C++ Apache HBase Client</a></span></dt></dl></div>
|
||||
This chapter will cover access to Apache HBase (TM) either through non-Java languages, or through custom protocols.
|
||||
|
||||
<div class="section" title="10.1. Non-Java Languages Talking to the JVM"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="nonjava.jvm"></a>10.1. Non-Java Languages Talking to the JVM</h2></div></div></div><p>Currently the documentation on this topic in the
|
||||
<a class="link" href="http://wiki.apache.org/hadoop/Hbase" target="_top">Apache HBase Wiki</a>.
|
||||
See also the <a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/thrift/package-summary.html#package_description" target="_top">Thrift API Javadoc</a>.
|
||||
</p></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'external_apis';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="arch.hdfs.html">Prev</a> </td><td width="20%" align="center"> </td><td width="40%" align="right"> <a accesskey="n" href="rest.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">9.9. HDFS </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> 10.2. REST</td></tr></table></div></body></html>
|
|
@ -1,99 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>Appendix A. FAQ</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="prev" href="community.roles.html" title="17.2. Community Roles"><link rel="next" href="hbck.in.depth.html" title="Appendix B. hbck In Depth"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">Appendix A. FAQ</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="community.roles.html">Prev</a> </td><th width="60%" align="center"> </th><td width="20%" align="right"> <a accesskey="n" href="hbck.in.depth.html">Next</a></td></tr></table><hr></div><div class="appendix" title="Appendix A. FAQ"><div class="titlepage"><div><div><h2 class="title"><a name="faq"></a>Appendix A. FAQ</h2></div></div></div><div class="qandaset" title="Frequently Asked Questions"><a name="d2121e11158"></a><dl><dt>A.1. <a href="faq.html#d2121e11159">General</a></dt><dd><dl><dt> <a href="faq.html#d2121e11162">When should I use HBase?</a></dt><dt> <a href="faq.html#d2121e11171">Are there other HBase FAQs?</a></dt><dt> <a href="faq.html#faq.sql">Does HBase support SQL?</a></dt><dt> <a href="faq.html#d2121e11193">How can I find examples of NoSQL/HBase?</a></dt><dt> <a href="faq.html#d2121e11202">What is the history of HBase?</a></dt></dl></dd><dt>A.2. <a href="faq.html#faq.arch">Architecture</a></dt><dd><dl><dt> <a href="faq.html#faq.arch.regions">How does HBase handle Region-RegionServer assignment and locality?</a></dt></dl></dd><dt>A.3. <a href="faq.html#faq.config">Configuration</a></dt><dd><dl><dt> <a href="faq.html#faq.config.started">How can I get started with my first cluster?</a></dt><dt> <a href="faq.html#faq.config.started">Where can I learn about the rest of the configuration options?</a></dt></dl></dd><dt>A.4. <a href="faq.html#faq.design">Schema Design / Data Access</a></dt><dd><dl><dt> <a href="faq.html#faq.design.schema">How should I design my schema in HBase?</a></dt><dt> <a href="faq.html#d2121e11258">
|
||||
How can I store (fill in the blank) in HBase?
|
||||
</a></dt><dt> <a href="faq.html#secondary.indices">
|
||||
How can I handle secondary indexes in HBase?
|
||||
</a></dt><dt> <a href="faq.html#faq.changing.rowkeys">Can I change a table's rowkeys?</a></dt><dt> <a href="faq.html#faq.apis">What APIs does HBase support?</a></dt></dl></dd><dt>A.5. <a href="faq.html#faq.mapreduce">MapReduce</a></dt><dd><dl><dt> <a href="faq.html#faq.mapreduce.use">How can I use MapReduce with HBase?</a></dt></dl></dd><dt>A.6. <a href="faq.html#d2121e11310">Performance and Troubleshooting</a></dt><dd><dl><dt> <a href="faq.html#d2121e11313">
|
||||
How can I improve HBase cluster performance?
|
||||
</a></dt><dt> <a href="faq.html#d2121e11322">
|
||||
How can I troubleshoot my HBase cluster?
|
||||
</a></dt></dl></dd><dt>A.7. <a href="faq.html#ec2">Amazon EC2</a></dt><dd><dl><dt> <a href="faq.html#d2121e11334">
|
||||
I am running HBase on Amazon EC2 and...
|
||||
</a></dt></dl></dd><dt>A.8. <a href="faq.html#d2121e11345">Operations</a></dt><dd><dl><dt> <a href="faq.html#d2121e11348">
|
||||
How do I manage my HBase cluster?
|
||||
</a></dt><dt> <a href="faq.html#d2121e11357">
|
||||
How do I back up my HBase cluster?
|
||||
</a></dt></dl></dd><dt>A.9. <a href="faq.html#d2121e11366">HBase in Action</a></dt><dd><dl><dt> <a href="faq.html#d2121e11369">Where can I find interesting videos and presentations on HBase?</a></dt></dl></dd></dl><table border="0" width="100%" summary="Q and A Set"><col align="left" width="1%"><col><tbody><tr class="qandadiv"><td align="left" valign="top" colspan="2"><h3 class="title"><a name="d2121e11159"></a>A.1. General</h3></td></tr><tr class="toc"><td align="left" valign="top" colspan="2"><dl><dt> <a href="faq.html#d2121e11162">When should I use HBase?</a></dt><dt> <a href="faq.html#d2121e11171">Are there other HBase FAQs?</a></dt><dt> <a href="faq.html#faq.sql">Does HBase support SQL?</a></dt><dt> <a href="faq.html#d2121e11193">How can I find examples of NoSQL/HBase?</a></dt><dt> <a href="faq.html#d2121e11202">What is the history of HBase?</a></dt></dl></td></tr><tr class="question"><td align="left" valign="top"><a name="d2121e11162"></a><a name="d2121e11163"></a></td><td align="left" valign="top"><p>When should I use HBase?</p></td></tr><tr class="answer"><td align="left" valign="top"></td><td align="left" valign="top"><p>See the <a class="xref" href="architecture.html#arch.overview" title="9.1. Overview">Section 9.1, “Overview”</a> in the Architecture chapter.
|
||||
</p></td></tr><tr class="question"><td align="left" valign="top"><a name="d2121e11171"></a><a name="d2121e11172"></a></td><td align="left" valign="top"><p>Are there other HBase FAQs?</p></td></tr><tr class="answer"><td align="left" valign="top"></td><td align="left" valign="top"><p>
|
||||
See the FAQ that is up on the wiki, <a class="link" href="http://wiki.apache.org/hadoop/Hbase/FAQ" target="_top">HBase Wiki FAQ</a>.
|
||||
</p></td></tr><tr class="question"><td align="left" valign="top"><a name="faq.sql"></a><a name="d2121e11182"></a></td><td align="left" valign="top"><p>Does HBase support SQL?</p></td></tr><tr class="answer"><td align="left" valign="top"></td><td align="left" valign="top"><p>
|
||||
Not really. SQL-ish support for HBase via <a class="link" href="http://hive.apache.org/" target="_top">Hive</a> is in development, however Hive is based on MapReduce which is not generally suitable for low-latency requests.
|
||||
See the <a class="xref" href="datamodel.html" title="Chapter 5. Data Model">Chapter 5, <i>Data Model</i></a> section for examples on the HBase client.
|
||||
</p></td></tr><tr class="question"><td align="left" valign="top"><a name="d2121e11193"></a><a name="d2121e11194"></a></td><td align="left" valign="top"><p>How can I find examples of NoSQL/HBase?</p></td></tr><tr class="answer"><td align="left" valign="top"></td><td align="left" valign="top"><p>See the link to the BigTable paper in <a class="xref" href="other.info.html" title="Appendix F. Other Information About HBase">Appendix F, <i>Other Information About HBase</i></a> in the appendix, as
|
||||
well as the other papers.
|
||||
</p></td></tr><tr class="question"><td align="left" valign="top"><a name="d2121e11202"></a><a name="d2121e11203"></a></td><td align="left" valign="top"><p>What is the history of HBase?</p></td></tr><tr class="answer"><td align="left" valign="top"></td><td align="left" valign="top"><p>See <a class="xref" href="hbase.history.html" title="Appendix G. HBase History">Appendix G, <i>HBase History</i></a>.
|
||||
</p></td></tr><tr class="qandadiv"><td align="left" valign="top" colspan="2"><h3 class="title"><a name="faq.arch"></a>A.2. Architecture</h3></td></tr><tr class="toc"><td align="left" valign="top" colspan="2"><dl><dt> <a href="faq.html#faq.arch.regions">How does HBase handle Region-RegionServer assignment and locality?</a></dt></dl></td></tr><tr class="question"><td align="left" valign="top"><a name="faq.arch.regions"></a><a name="d2121e11215"></a></td><td align="left" valign="top"><p>How does HBase handle Region-RegionServer assignment and locality?</p></td></tr><tr class="answer"><td align="left" valign="top"></td><td align="left" valign="top"><p>
|
||||
See <a class="xref" href="regions.arch.html" title="9.7. Regions">Section 9.7, “Regions”</a>.
|
||||
</p></td></tr><tr class="qandadiv"><td align="left" valign="top" colspan="2"><h3 class="title"><a name="faq.config"></a>A.3. Configuration</h3></td></tr><tr class="toc"><td align="left" valign="top" colspan="2"><dl><dt> <a href="faq.html#faq.config.started">How can I get started with my first cluster?</a></dt><dt> <a href="faq.html#faq.config.started">Where can I learn about the rest of the configuration options?</a></dt></dl></td></tr><tr class="question"><td align="left" valign="top"><a name="faq.config.started"></a><a name="d2121e11227"></a></td><td align="left" valign="top"><p>How can I get started with my first cluster?</p></td></tr><tr class="answer"><td align="left" valign="top"></td><td align="left" valign="top"><p>
|
||||
See <a class="xref" href="quickstart.html" title="1.2. Quick Start">Section 1.2, “Quick Start”</a>.
|
||||
</p></td></tr><tr class="question"><td align="left" valign="top"><a name="faq.config.started"></a><a name="d2121e11236"></a></td><td align="left" valign="top"><p>Where can I learn about the rest of the configuration options?</p></td></tr><tr class="answer"><td align="left" valign="top"></td><td align="left" valign="top"><p>
|
||||
See <a class="xref" href="configuration.html" title="Chapter 2. Apache HBase (TM) Configuration">Chapter 2, <i>Apache HBase (TM) Configuration</i></a>.
|
||||
</p></td></tr><tr class="qandadiv"><td align="left" valign="top" colspan="2"><h3 class="title"><a name="faq.design"></a>A.4. Schema Design / Data Access</h3></td></tr><tr class="toc"><td align="left" valign="top" colspan="2"><dl><dt> <a href="faq.html#faq.design.schema">How should I design my schema in HBase?</a></dt><dt> <a href="faq.html#d2121e11258">
|
||||
How can I store (fill in the blank) in HBase?
|
||||
</a></dt><dt> <a href="faq.html#secondary.indices">
|
||||
How can I handle secondary indexes in HBase?
|
||||
</a></dt><dt> <a href="faq.html#faq.changing.rowkeys">Can I change a table's rowkeys?</a></dt><dt> <a href="faq.html#faq.apis">What APIs does HBase support?</a></dt></dl></td></tr><tr class="question"><td align="left" valign="top"><a name="faq.design.schema"></a><a name="d2121e11248"></a></td><td align="left" valign="top"><p>How should I design my schema in HBase?</p></td></tr><tr class="answer"><td align="left" valign="top"></td><td align="left" valign="top"><p>
|
||||
See <a class="xref" href="datamodel.html" title="Chapter 5. Data Model">Chapter 5, <i>Data Model</i></a> and <a class="xref" href="schema.html" title="Chapter 6. HBase and Schema Design">Chapter 6, <i>HBase and Schema Design</i></a>
|
||||
</p></td></tr><tr class="question"><td align="left" valign="top"><a name="d2121e11258"></a><a name="d2121e11259"></a></td><td align="left" valign="top"><p>
|
||||
How can I store (fill in the blank) in HBase?
|
||||
</p></td></tr><tr class="answer"><td align="left" valign="top"></td><td align="left" valign="top"><p>
|
||||
See <a class="xref" href="supported.datatypes.html" title="6.5. Supported Datatypes">Section 6.5, “
|
||||
Supported Datatypes
|
||||
”</a>.
|
||||
</p></td></tr><tr class="question"><td align="left" valign="top"><a name="secondary.indices"></a><a name="d2121e11268"></a></td><td align="left" valign="top"><p>
|
||||
How can I handle secondary indexes in HBase?
|
||||
</p></td></tr><tr class="answer"><td align="left" valign="top"></td><td align="left" valign="top"><p>
|
||||
See <a class="xref" href="secondary.indexes.html" title="6.9. Secondary Indexes and Alternate Query Paths">Section 6.9, “
|
||||
Secondary Indexes and Alternate Query Paths
|
||||
”</a>
|
||||
</p></td></tr><tr class="question"><td align="left" valign="top"><a name="faq.changing.rowkeys"></a><a name="d2121e11277"></a></td><td align="left" valign="top"><p>Can I change a table's rowkeys?</p></td></tr><tr class="answer"><td align="left" valign="top"></td><td align="left" valign="top"><p>
|
||||
This is a very common quesiton. You can't. See <a class="xref" href="rowkey.design.html#changing.rowkeys" title="6.3.5. Immutability of Rowkeys">Section 6.3.5, “Immutability of Rowkeys”</a>.
|
||||
</p></td></tr><tr class="question"><td align="left" valign="top"><a name="faq.apis"></a><a name="d2121e11286"></a></td><td align="left" valign="top"><p>What APIs does HBase support?</p></td></tr><tr class="answer"><td align="left" valign="top"></td><td align="left" valign="top"><p>
|
||||
See <a class="xref" href="datamodel.html" title="Chapter 5. Data Model">Chapter 5, <i>Data Model</i></a>, <a class="xref" href="client.html" title="9.3. Client">Section 9.3, “Client”</a> and <a class="xref" href="external_apis.html#nonjava.jvm" title="10.1. Non-Java Languages Talking to the JVM">Section 10.1, “Non-Java Languages Talking to the JVM”</a>.
|
||||
</p></td></tr><tr class="qandadiv"><td align="left" valign="top" colspan="2"><h3 class="title"><a name="faq.mapreduce"></a>A.5. MapReduce</h3></td></tr><tr class="toc"><td align="left" valign="top" colspan="2"><dl><dt> <a href="faq.html#faq.mapreduce.use">How can I use MapReduce with HBase?</a></dt></dl></td></tr><tr class="question"><td align="left" valign="top"><a name="faq.mapreduce.use"></a><a name="d2121e11302"></a></td><td align="left" valign="top"><p>How can I use MapReduce with HBase?</p></td></tr><tr class="answer"><td align="left" valign="top"></td><td align="left" valign="top"><p>
|
||||
See <a class="xref" href="mapreduce.html" title="Chapter 7. HBase and MapReduce">Chapter 7, <i>HBase and MapReduce</i></a>
|
||||
</p></td></tr><tr class="qandadiv"><td align="left" valign="top" colspan="2"><h3 class="title"><a name="d2121e11310"></a>A.6. Performance and Troubleshooting</h3></td></tr><tr class="toc"><td align="left" valign="top" colspan="2"><dl><dt> <a href="faq.html#d2121e11313">
|
||||
How can I improve HBase cluster performance?
|
||||
</a></dt><dt> <a href="faq.html#d2121e11322">
|
||||
How can I troubleshoot my HBase cluster?
|
||||
</a></dt></dl></td></tr><tr class="question"><td align="left" valign="top"><a name="d2121e11313"></a><a name="d2121e11314"></a></td><td align="left" valign="top"><p>
|
||||
How can I improve HBase cluster performance?
|
||||
</p></td></tr><tr class="answer"><td align="left" valign="top"></td><td align="left" valign="top"><p>
|
||||
See <a class="xref" href="performance.html" title="Chapter 11. Apache HBase (TM) Performance Tuning">Chapter 11, <i>Apache HBase (TM) Performance Tuning</i></a>.
|
||||
</p></td></tr><tr class="question"><td align="left" valign="top"><a name="d2121e11322"></a><a name="d2121e11323"></a></td><td align="left" valign="top"><p>
|
||||
How can I troubleshoot my HBase cluster?
|
||||
</p></td></tr><tr class="answer"><td align="left" valign="top"></td><td align="left" valign="top"><p>
|
||||
See <a class="xref" href="trouble.html" title="Chapter 12. Troubleshooting and Debugging Apache HBase (TM)">Chapter 12, <i>Troubleshooting and Debugging Apache HBase (TM)</i></a>.
|
||||
</p></td></tr><tr class="qandadiv"><td align="left" valign="top" colspan="2"><h3 class="title"><a name="ec2"></a>A.7. Amazon EC2</h3></td></tr><tr class="toc"><td align="left" valign="top" colspan="2"><dl><dt> <a href="faq.html#d2121e11334">
|
||||
I am running HBase on Amazon EC2 and...
|
||||
</a></dt></dl></td></tr><tr class="question"><td align="left" valign="top"><a name="d2121e11334"></a><a name="d2121e11335"></a></td><td align="left" valign="top"><p>
|
||||
I am running HBase on Amazon EC2 and...
|
||||
</p></td></tr><tr class="answer"><td align="left" valign="top"></td><td align="left" valign="top"><p>
|
||||
EC2 issues are a special case. See Troubleshooting <a class="xref" href="trouble.ec2.html" title="12.12. Amazon EC2">Section 12.12, “Amazon EC2”</a> and Performance <a class="xref" href="perf.ec2.html" title="11.11. Amazon EC2">Section 11.11, “Amazon EC2”</a> sections.
|
||||
</p></td></tr><tr class="qandadiv"><td align="left" valign="top" colspan="2"><h3 class="title"><a name="d2121e11345"></a>A.8. Operations</h3></td></tr><tr class="toc"><td align="left" valign="top" colspan="2"><dl><dt> <a href="faq.html#d2121e11348">
|
||||
How do I manage my HBase cluster?
|
||||
</a></dt><dt> <a href="faq.html#d2121e11357">
|
||||
How do I back up my HBase cluster?
|
||||
</a></dt></dl></td></tr><tr class="question"><td align="left" valign="top"><a name="d2121e11348"></a><a name="d2121e11349"></a></td><td align="left" valign="top"><p>
|
||||
How do I manage my HBase cluster?
|
||||
</p></td></tr><tr class="answer"><td align="left" valign="top"></td><td align="left" valign="top"><p>
|
||||
See <a class="xref" href="ops_mgt.html" title="Chapter 14. Apache HBase (TM) Operational Management">Chapter 14, <i>Apache HBase (TM) Operational Management</i></a>
|
||||
</p></td></tr><tr class="question"><td align="left" valign="top"><a name="d2121e11357"></a><a name="d2121e11358"></a></td><td align="left" valign="top"><p>
|
||||
How do I back up my HBase cluster?
|
||||
</p></td></tr><tr class="answer"><td align="left" valign="top"></td><td align="left" valign="top"><p>
|
||||
See <a class="xref" href="ops.backup.html" title="14.7. HBase Backup">Section 14.7, “HBase Backup”</a>
|
||||
</p></td></tr><tr class="qandadiv"><td align="left" valign="top" colspan="2"><h3 class="title"><a name="d2121e11366"></a>A.9. HBase in Action</h3></td></tr><tr class="toc"><td align="left" valign="top" colspan="2"><dl><dt> <a href="faq.html#d2121e11369">Where can I find interesting videos and presentations on HBase?</a></dt></dl></td></tr><tr class="question"><td align="left" valign="top"><a name="d2121e11369"></a><a name="d2121e11370"></a></td><td align="left" valign="top"><p>Where can I find interesting videos and presentations on HBase?</p></td></tr><tr class="answer"><td align="left" valign="top"></td><td align="left" valign="top"><p>
|
||||
See <a class="xref" href="other.info.html" title="Appendix F. Other Information About HBase">Appendix F, <i>Other Information About HBase</i></a>
|
||||
</p></td></tr></tbody></table></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'faq';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="community.roles.html">Prev</a> </td><td width="20%" align="center"> </td><td width="40%" align="right"> <a accesskey="n" href="hbck.in.depth.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">17.2. Community Roles </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> Appendix B. hbck In Depth</td></tr></table></div></body></html>
|
|
@ -1,33 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>15.8. Getting Involved</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="developer.html" title="Chapter 15. Building and Developing Apache HBase (TM)"><link rel="prev" href="maven.build.commands.html" title="15.7. Maven Build Commands"><link rel="next" href="developing.html" title="15.9. Developing"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">15.8. Getting Involved</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="maven.build.commands.html">Prev</a> </td><th width="60%" align="center">Chapter 15. Building and Developing Apache HBase (TM)</th><td width="20%" align="right"> <a accesskey="n" href="developing.html">Next</a></td></tr></table><hr></div><div class="section" title="15.8. Getting Involved"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="getting.involved"></a>15.8. Getting Involved</h2></div></div></div><p>Apache HBase gets better only when people contribute!
|
||||
</p><p>As Apache HBase is an Apache Software Foundation project, see <a class="xref" href="asf.html" title="Appendix H. HBase and the Apache Software Foundation">Appendix H, <i>HBase and the Apache Software Foundation</i></a> for more information about how the ASF functions.
|
||||
</p><div class="section" title="15.8.1. Mailing Lists"><div class="titlepage"><div><div><h3 class="title"><a name="mailing.list"></a>15.8.1. Mailing Lists</h3></div></div></div><p>Sign up for the dev-list and the user-list. See the
|
||||
<a class="link" href="http://hbase.apache.org/mail-lists.html" target="_top">mailing lists</a> page.
|
||||
Posing questions - and helping to answer other people's questions - is encouraged!
|
||||
There are varying levels of experience on both lists so patience and politeness are encouraged (and please
|
||||
stay on topic.)
|
||||
</p></div><div class="section" title="15.8.2. Jira"><div class="titlepage"><div><div><h3 class="title"><a name="jira"></a>15.8.2. Jira</h3></div></div></div><p>Check for existing issues in <a class="link" href="https://issues.apache.org/jira/browse/HBASE" target="_top">Jira</a>.
|
||||
If it's either a new feature request, enhancement, or a bug, file a ticket.
|
||||
</p><div class="section" title="15.8.2.1. Jira Priorities"><div class="titlepage"><div><div><h4 class="title"><a name="jira.priorities"></a>15.8.2.1. Jira Priorities</h4></div></div></div><p>The following is a guideline on setting Jira issue priorities:
|
||||
</p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem">Blocker: Should only be used if the issue WILL cause data loss or cluster instability reliably.</li><li class="listitem">Critical: The issue described can cause data loss or cluster instability in some cases.</li><li class="listitem">Major: Important but not tragic issues, like updates to the client API that will add a lot of much-needed functionality or significant
|
||||
bugs that need to be fixed but that don't cause data loss.</li><li class="listitem">Minor: Useful enhancements and annoying but not damaging bugs.</li><li class="listitem">Trivial: Useful enhancements but generally cosmetic.</li></ul></div><p>
|
||||
</p></div><div class="section" title="15.8.2.2. Code Blocks in Jira Comments"><div class="titlepage"><div><div><h4 class="title"><a name="submitting.patches.jira.code"></a>15.8.2.2. Code Blocks in Jira Comments</h4></div></div></div><p>A commonly used macro in Jira is {code}. If you do this in a Jira comment...
|
||||
</p><pre class="programlisting">
|
||||
{code}
|
||||
code snippet
|
||||
{code}
|
||||
</pre><p>
|
||||
... Jira will format the code snippet like code, instead of a regular comment. It improves readability.
|
||||
</p></div></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'getting.involved';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="maven.build.commands.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="developer.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="developing.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">15.7. Maven Build Commands </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> 15.9. Developing</td></tr></table></div></body></html>
|
|
@ -1,16 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>Chapter 1. Getting Started</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="prev" href="preface.html" title="Preface"><link rel="next" href="quickstart.html" title="1.2. Quick Start"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">Chapter 1. Getting Started</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="preface.html">Prev</a> </td><th width="60%" align="center"> </th><td width="20%" align="right"> <a accesskey="n" href="quickstart.html">Next</a></td></tr></table><hr></div><div class="chapter" title="Chapter 1. Getting Started"><div class="titlepage"><div><div><h2 class="title"><a name="getting_started"></a>Chapter 1. Getting Started</h2></div></div></div><div class="toc"><p><b>Table of Contents</b></p><dl><dt><span class="section"><a href="getting_started.html#d2121e77">1.1. Introduction</a></span></dt><dt><span class="section"><a href="quickstart.html">1.2. Quick Start</a></span></dt><dd><dl><dt><span class="section"><a href="quickstart.html#d2121e105">1.2.1. Download and unpack the latest stable release.</a></span></dt><dt><span class="section"><a href="quickstart.html#start_hbase">1.2.2. Start HBase</a></span></dt><dt><span class="section"><a href="quickstart.html#shell_exercises">1.2.3. Shell Exercises</a></span></dt><dt><span class="section"><a href="quickstart.html#stopping">1.2.4. Stopping HBase</a></span></dt><dt><span class="section"><a href="quickstart.html#d2121e265">1.2.5. Where to go next</a></span></dt></dl></dd></dl></div><div class="section" title="1.1. Introduction"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="d2121e77"></a>1.1. Introduction</h2></div></div></div><p><a class="xref" href="quickstart.html" title="1.2. Quick Start">Section 1.2, “Quick Start”</a> will get you up and
|
||||
running on a single-node instance of HBase using the local filesystem.
|
||||
</p></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'getting_started';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="preface.html">Prev</a> </td><td width="20%" align="center"> </td><td width="40%" align="right"> <a accesskey="n" href="quickstart.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">Preface </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> 1.2. Quick Start</td></tr></table></div></body></html>
|
|
@ -1,30 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>C.4. GZIP</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="compression.html" title="Appendix C. Compression In HBase"><link rel="prev" href="lzo.compression.html" title="C.3. LZO"><link rel="next" href="snappy.compression.html" title="C.5. SNAPPY"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">C.4.
|
||||
GZIP
|
||||
</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="lzo.compression.html">Prev</a> </td><th width="60%" align="center">Appendix C. Compression In HBase</th><td width="20%" align="right"> <a accesskey="n" href="snappy.compression.html">Next</a></td></tr></table><hr></div><div class="section" title="C.4. GZIP"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="gzip.compression"></a>C.4.
|
||||
GZIP
|
||||
</h2></div></div></div><p>
|
||||
GZIP will generally compress better than LZO though slower.
|
||||
For some setups, better compression may be preferred.
|
||||
Java will use java's GZIP unless the native Hadoop libs are
|
||||
available on the CLASSPATH; in this case it will use native
|
||||
compressors instead (If the native libs are NOT present,
|
||||
you will see lots of <span class="emphasis"><em>Got brand-new compressor</em></span>
|
||||
reports in your logs; see <a class="xref" href="">???</a>).
|
||||
</p></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'gzip.compression';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="lzo.compression.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="compression.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="snappy.compression.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">C.3.
|
||||
LZO
|
||||
</td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> C.5.
|
||||
SNAPPY
|
||||
</td></tr></table></div></body></html>
|
|
@ -1,120 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>8.2. Access Control</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="security.html" title="Chapter 8. Secure Apache HBase (TM)"><link rel="prev" href="security.html" title="Chapter 8. Secure Apache HBase (TM)"><link rel="next" href="architecture.html" title="Chapter 9. Architecture"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">8.2. Access Control</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="security.html">Prev</a> </td><th width="60%" align="center">Chapter 8. Secure Apache HBase (TM)</th><td width="20%" align="right"> <a accesskey="n" href="architecture.html">Next</a></td></tr></table><hr></div><div class="section" title="8.2. Access Control"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="hbase.accesscontrol.configuration"></a>8.2. Access Control</h2></div></div></div><p>
|
||||
Newer releases of Apache HBase (>= 0.92) support optional access control
|
||||
list (ACL-) based protection of resources on a column family and/or
|
||||
table basis.
|
||||
</p><p>
|
||||
This describes how to set up Secure HBase for access control, with an
|
||||
example of granting and revoking user permission on table resources
|
||||
provided.
|
||||
</p><div class="section" title="8.2.1. Prerequisites"><div class="titlepage"><div><div><h3 class="title"><a name="d2121e4236"></a>8.2.1. Prerequisites</h3></div></div></div><p>
|
||||
You must configure HBase for secure operation. Refer to the section
|
||||
"Secure Client Access to HBase" and complete all of the steps described
|
||||
there.
|
||||
</p><p>
|
||||
You must also configure ZooKeeper for secure operation. Changes to ACLs
|
||||
are synchronized throughout the cluster using ZooKeeper. Secure
|
||||
authentication to ZooKeeper must be enabled or otherwise it will be
|
||||
possible to subvert HBase access control via direct client access to
|
||||
ZooKeeper. Refer to the section on secure ZooKeeper configuration and
|
||||
complete all of the steps described there.
|
||||
</p></div><div class="section" title="8.2.2. Overview"><div class="titlepage"><div><div><h3 class="title"><a name="d2121e4243"></a>8.2.2. Overview</h3></div></div></div><p>
|
||||
With Secure RPC and Access Control enabled, client access to HBase is
|
||||
authenticated and user data is private unless access has been
|
||||
explicitly granted. Access to data can be granted at a table or per
|
||||
column family basis.
|
||||
</p><p>
|
||||
However, the following items have been left out of the initial
|
||||
implementation for simplicity:
|
||||
</p><div class="orderedlist"><ol class="orderedlist" type="1"><li class="listitem"><p>Row-level or per value (cell): This would require broader changes for storing the ACLs inline with rows. It is a future goal.</p></li><li class="listitem"><p>Push down of file ownership to HDFS: HBase is not designed for the case where files may have different permissions than the HBase system principal. Pushing file ownership down into HDFS would necessitate changes to core code. Also, while HDFS file ownership would make applying quotas easy, and possibly make bulk imports more straightforward, it is not clear that it would offer a more secure setup.</p></li><li class="listitem"><p>HBase managed "roles" as collections of permissions: We will not model "roles" internally in HBase to begin with. We instead allow group names to be granted permissions, which allows external modeling of roles via group membership. Groups are created and manipulated externally to HBase, via the Hadoop group mapping service.</p></li></ol></div><p>
|
||||
Access control mechanisms are mature and fairly standardized in the relational database world. The HBase implementation approximates current convention, but HBase has a simpler feature set than relational databases, especially in terms of client operations. We don't distinguish between an insert (new record) and update (of existing record), for example, as both collapse down into a Put. Accordingly, the important operations condense to four permissions: READ, WRITE, CREATE, and ADMIN.
|
||||
</p><table id="d2121e4262">Operation To Permission Mapping<thead>PermissionOperation</thead><tbody>ReadGetExistsScanWritePutDeleteLock/UnlockRowIncrementColumnValueCheckAndDelete/PutFlushCompactCreateCreateAlterDropAdminEnable/DisableSplitMajor CompactGrantRevokeShutdown</tbody></table><p>
|
||||
Permissions can be granted in any of the following scopes, though
|
||||
CREATE and ADMIN permissions are effective only at table scope.
|
||||
</p><p>
|
||||
</p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>Table</p><p>
|
||||
</p><div class="itemizedlist"><ul class="itemizedlist" type="circle"><li class="listitem"><p>Read: User can read from any column family in table</p></li><li class="listitem"><p>Write: User can write to any column family in table</p></li><li class="listitem"><p>Create: User can alter table attributes; add, alter, or drop column families; and drop the table.</p></li><li class="listitem"><p>Admin: User can alter table attributes; add, alter, or drop column families; and enable, disable, or drop the table. User can also trigger region (re)assignments or relocation.</p></li></ul></div><p>
|
||||
</p></li><li class="listitem"><p>Column Family</p><p>
|
||||
</p><div class="itemizedlist"><ul class="itemizedlist" type="circle"><li class="listitem"><p>Read: User can read from the column family</p></li><li class="listitem"><p>Write: User can write to the column family</p></li></ul></div><p>
|
||||
</p></li></ul></div><p>
|
||||
</p><p>
|
||||
There is also an implicit global scope for the superuser.
|
||||
</p><p>
|
||||
The superuser is a principal, specified in the HBase site configuration
|
||||
file, that has equivalent access to HBase as the 'root' user would on a
|
||||
UNIX derived system. Normally this is the principal that the HBase
|
||||
processes themselves authenticate as. Although future versions of HBase
|
||||
Access Control may support multiple superusers, the superuser privilege
|
||||
will always include the principal used to run the HMaster process. Only
|
||||
the superuser is allowed to create tables, switch the balancer on or
|
||||
off, or take other actions with global consequence. Furthermore, the
|
||||
superuser has an implicit grant of all permissions to all resources.
|
||||
</p><p>
|
||||
Tables have a new metadata attribute: OWNER, the user principal who owns
|
||||
the table. By default this will be set to the user principal who creates
|
||||
the table, though it may be changed at table creation time or during an
|
||||
alter operation by setting or changing the OWNER table attribute. Only a
|
||||
single user principal can own a table at a given time. A table owner will
|
||||
have all permissions over a given table.
|
||||
</p></div><div class="section" title="8.2.3. Server-side Configuration for Access Control"><div class="titlepage"><div><div><h3 class="title"><a name="d2121e4400"></a>8.2.3. Server-side Configuration for Access Control</h3></div></div></div><p>
|
||||
Enable the AccessController coprocessor in the cluster configuration
|
||||
and restart HBase. The restart can be a rolling one. Complete the
|
||||
restart of all Master and RegionServer processes before setting up
|
||||
ACLs.
|
||||
</p><p>
|
||||
To enable the AccessController, modify the <code class="code">hbase-site.xml</code> file on every server machine in the cluster to look like:
|
||||
</p><pre class="programlisting">
|
||||
<property>
|
||||
<name>hbase.coprocessor.master.classes</name>
|
||||
<value>org.apache.hadoop.hbase.security.access.AccessController</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hbase.coprocessor.region.classes</name>
|
||||
<value>org.apache.hadoop.hbase.security.token.TokenProvider,
|
||||
org.apache.hadoop.hbase.security.access.AccessController</value>
|
||||
</property>
|
||||
</pre></div><div class="section" title="8.2.4. Shell Enhancements for Access Control"><div class="titlepage"><div><div><h3 class="title"><a name="d2121e4412"></a>8.2.4. Shell Enhancements for Access Control</h3></div></div></div><p>
|
||||
The HBase shell has been extended to provide simple commands for editing and updating user permissions. The following commands have been added for access control list management:
|
||||
</p>
|
||||
Grant
|
||||
<p>
|
||||
</p><pre class="programlisting">
|
||||
grant <user> <permissions> <table> [ <column family> [ <column qualifier> ] ]
|
||||
</pre><p>
|
||||
</p><p>
|
||||
<code class="code"><permissions></code> is zero or more letters from the set "RWCA": READ('R'), WRITE('W'), CREATE('C'), ADMIN('A').
|
||||
</p><p>
|
||||
Note: Grants and revocations of individual permissions on a resource are both accomplished using the <code class="code">grant</code> command. A separate <code class="code">revoke</code> command is also provided by the shell, but this is for fast revocation of all of a user's access rights to a given resource only.
|
||||
</p><p>
|
||||
Revoke
|
||||
</p><p>
|
||||
</p><pre class="programlisting">
|
||||
revoke <user> <table> [ <column family> [ <column qualifier> ] ]
|
||||
</pre><p>
|
||||
</p><p>
|
||||
Alter
|
||||
</p><p>
|
||||
The <code class="code">alter</code> command has been extended to allow ownership assignment:
|
||||
</p><pre class="programlisting">
|
||||
alter 'tablename', {OWNER => 'username'}
|
||||
</pre><p>
|
||||
</p><p>
|
||||
User Permission
|
||||
</p><p>
|
||||
The <code class="code">user_permission</code> command shows all access permissions for the current user for a given table:
|
||||
</p><pre class="programlisting">
|
||||
user_permission <table>
|
||||
</pre><p>
|
||||
</p></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'hbase.accesscontrol.configuration';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="security.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="security.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="architecture.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">Chapter 8. Secure Apache HBase (TM) </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> Chapter 9. Architecture</td></tr></table></div></body></html>
|
|
@ -1,18 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>Appendix G. HBase History</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="prev" href="other.info.books.hadoop.html" title="F.6. Hadoop Books"><link rel="next" href="asf.html" title="Appendix H. HBase and the Apache Software Foundation"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">Appendix G. HBase History</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="other.info.books.hadoop.html">Prev</a> </td><th width="60%" align="center"> </th><td width="20%" align="right"> <a accesskey="n" href="asf.html">Next</a></td></tr></table><hr></div><div class="appendix" title="Appendix G. HBase History"><div class="titlepage"><div><div><h2 class="title"><a name="hbase.history"></a>Appendix G. HBase History</h2></div></div></div><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem">2006: <a class="link" href="http://research.google.com/archive/bigtable.html" target="_top">BigTable</a> paper published by Google.
|
||||
</li><li class="listitem">2006 (end of year): HBase development starts.
|
||||
</li><li class="listitem">2008: HBase becomes Hadoop sub-project.
|
||||
</li><li class="listitem">2010: HBase becomes Apache top-level project.
|
||||
</li></ul></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'hbase.history';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="other.info.books.hadoop.html">Prev</a> </td><td width="20%" align="center"> </td><td width="40%" align="right"> <a accesskey="n" href="asf.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">F.6. Hadoop Books </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> Appendix H. HBase and the Apache Software Foundation</td></tr></table></div></body></html>
|
|
@ -1,22 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>15.5. Updating hbase.apache.org</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="developer.html" title="Chapter 15. Building and Developing Apache HBase (TM)"><link rel="prev" href="mvn_repo.html" title="15.4. Adding an Apache HBase release to Apache's Maven Repository"><link rel="next" href="hbase.tests.html" title="15.6. Tests"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">15.5. Updating hbase.apache.org</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="mvn_repo.html">Prev</a> </td><th width="60%" align="center">Chapter 15. Building and Developing Apache HBase (TM)</th><td width="20%" align="right"> <a accesskey="n" href="hbase.tests.html">Next</a></td></tr></table><hr></div><div class="section" title="15.5. Updating hbase.apache.org"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="hbase.org"></a>15.5. Updating hbase.apache.org</h2></div></div></div><div class="section" title="15.5.1. Contributing to hbase.apache.org"><div class="titlepage"><div><div><h3 class="title"><a name="hbase.org.site.contributing"></a>15.5.1. Contributing to hbase.apache.org</h3></div></div></div><p>The Apache HBase apache web site (including this reference guide) is maintained as part of the main Apache HBase source tree, under <code class="filename">/src/docbkx</code> and <code class="filename">/src/site</code>. The former is this reference guide; the latter, in most cases, are legacy pages that are in the process of being merged into the docbkx tree.</p><p>To contribute to the reference guide, edit these files and submit them as a patch (see <a class="xref" href="submitting.patches.html" title="15.10. Submitting Patches">Section 15.10, “Submitting Patches”</a>). Your Jira should contain a summary of the changes in each section (see <a class="link" href="https://issues.apache.org/jira/browse/HBASE-6081" target="_top">HBASE-6081</a> for an example).</p><p>To generate the site locally while you're working on it, run:
|
||||
</p><pre class="programlisting">mvn site</pre><p>
|
||||
Then you can load up the generated HTML files in your browser (file are under <code class="filename">/target/site</code>).</p></div><div class="section" title="15.5.2. Publishing hbase.apache.org"><div class="titlepage"><div><div><h3 class="title"><a name="hbase.org.site.publishing"></a>15.5.2. Publishing hbase.apache.org</h3></div></div></div><p>If you're a committer with rights to publish the site artifacts: set up your apache credentials and the target site location locally in a place and
|
||||
form that maven can pick it up, in <code class="filename">~/.m2/settings.xml</code>. See ??? for an example.
|
||||
Next, run the following:
|
||||
</p><pre class="programlisting">$ mvn -DskipTests -Papache-release site site:deploy</pre><p>
|
||||
You will be asked for your password. It can take a little time.
|
||||
Remember that it can take a few hours for your site changes to show up.
|
||||
</p></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'hbase.org';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="mvn_repo.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="developer.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="hbase.tests.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">15.4. Adding an Apache HBase release to Apache's Maven Repository </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> 15.6. Tests</td></tr></table></div></body></html>
|
|
@ -1,41 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>C.2. hbase.regionserver.codecs</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="compression.html" title="Appendix C. Compression In HBase"><link rel="prev" href="compression.html" title="Appendix C. Compression In HBase"><link rel="next" href="lzo.compression.html" title="C.3. LZO"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">C.2.
|
||||
<code class="varname">
|
||||
hbase.regionserver.codecs
|
||||
</code>
|
||||
</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="compression.html">Prev</a> </td><th width="60%" align="center">Appendix C. Compression In HBase</th><td width="20%" align="right"> <a accesskey="n" href="lzo.compression.html">Next</a></td></tr></table><hr></div><div class="section" title="C.2. hbase.regionserver.codecs"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="hbase.regionserver.codecs"></a>C.2.
|
||||
<code class="varname">
|
||||
hbase.regionserver.codecs
|
||||
</code>
|
||||
</h2></div></div></div><p>
|
||||
To have a RegionServer test a set of codecs and fail-to-start if any
|
||||
code is missing or misinstalled, add the configuration
|
||||
<code class="varname">
|
||||
hbase.regionserver.codecs
|
||||
</code>
|
||||
to your <code class="filename">hbase-site.xml</code> with a value of
|
||||
codecs to test on startup. For example if the
|
||||
<code class="varname">
|
||||
hbase.regionserver.codecs
|
||||
</code> value is <code class="code">lzo,gz</code> and if lzo is not present
|
||||
or improperly installed, the misconfigured RegionServer will fail
|
||||
to start.
|
||||
</p><p>
|
||||
Administrators might make use of this facility to guard against
|
||||
the case where a new server is added to cluster but the cluster
|
||||
requires install of a particular coded.
|
||||
</p></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'hbase.regionserver.codecs';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="compression.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="compression.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="lzo.compression.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">Appendix C. Compression In HBase </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> C.3.
|
||||
LZO
|
||||
</td></tr></table></div></body></html>
|
|
@ -1,288 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>15.6. Tests</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="developer.html" title="Chapter 15. Building and Developing Apache HBase (TM)"><link rel="prev" href="hbase.org.html" title="15.5. Updating hbase.apache.org"><link rel="next" href="maven.build.commands.html" title="15.7. Maven Build Commands"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">15.6. Tests</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="hbase.org.html">Prev</a> </td><th width="60%" align="center">Chapter 15. Building and Developing Apache HBase (TM)</th><td width="20%" align="right"> <a accesskey="n" href="maven.build.commands.html">Next</a></td></tr></table><hr></div><div class="section" title="15.6. Tests"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="hbase.tests"></a>15.6. Tests</h2></div></div></div><p> Developers, at a minimum, should familiarize themselves with the unit test detail; unit tests in
|
||||
HBase have a character not usually seen in other projects.</p><div class="section" title="15.6.1. Apache HBase Modules"><div class="titlepage"><div><div><h3 class="title"><a name="hbase.moduletests"></a>15.6.1. Apache HBase Modules</h3></div></div></div><p>As of 0.96, Apache HBase is split into multiple modules which creates "interesting" rules for
|
||||
how and where tests are written. If you are writting code for <code class="classname">hbase-server</code>, see
|
||||
<a class="xref" href="hbase.tests.html#hbase.unittests" title="15.6.2. Unit Tests">Section 15.6.2, “Unit Tests”</a> for how to write your tests; these tests can spin
|
||||
up a minicluster and will need to be categorized. For any other module, for example
|
||||
<code class="classname">hbase-common</code>, the tests must be strict unit tests and just test the class
|
||||
under test - no use of the HBaseTestingUtility or minicluster is allowed (or even possible
|
||||
given the dependency tree).</p><div class="section" title="15.6.1.1. Running Tests in other Modules"><div class="titlepage"><div><div><h4 class="title"><a name="hbase.moduletest.run"></a>15.6.1.1. Running Tests in other Modules</h4></div></div></div>
|
||||
If the module you are developing in has no other dependencies on other HBase modules, then
|
||||
you can cd into that module and just run:
|
||||
<pre class="programlisting">mvn test</pre>
|
||||
which will just run the tests IN THAT MODULE. If there are other dependencies on other modules,
|
||||
then you will have run the command from the ROOT HBASE DIRECTORY. This will run the tests in the other
|
||||
modules, unless you specify to skip the tests in that module. For instance, to skip the tests in the hbase-server module,
|
||||
you would run:
|
||||
<pre class="programlisting">mvn clean test -Dskip-server-tests</pre>
|
||||
from the top level directory to run all the tests in modules other than hbase-server. Note that you
|
||||
can specify to skip tests in multiple modules as well as just for a single module. For example, to skip
|
||||
the tests in <code class="classname">hbase-server</code> and <code class="classname">hbase-common</code>, you would run:
|
||||
<pre class="programlisting">mvn clean test -Dskip-server-tests -Dskip-common-tests</pre><p>Also, keep in mind that if you are running tests in the <code class="classname">hbase-server</code> module you will need to
|
||||
apply the maven profiles discussed in <a class="xref" href="hbase.tests.html#hbase.unittests.cmds" title="15.6.3. Running tests">Section 15.6.3, “Running tests”</a> to get the tests to run properly.</p></div></div><div class="section" title="15.6.2. Unit Tests"><div class="titlepage"><div><div><h3 class="title"><a name="hbase.unittests"></a>15.6.2. Unit Tests</h3></div></div></div><p>Apache HBase unit tests are subdivided into four categories: small, medium, large, and
|
||||
integration with corresponding JUnit <a class="link" href="http://www.junit.org/node/581" target="_top">categories</a>:
|
||||
<code class="classname">SmallTests</code>, <code class="classname">MediumTests</code>,
|
||||
<code class="classname">LargeTests</code>, <code class="classname">IntegrationTests</code>.
|
||||
JUnit categories are denoted using java annotations and look like this in your unit test code.
|
||||
</p><pre class="programlisting">...
|
||||
@Category(SmallTests.class)
|
||||
public class TestHRegionInfo {
|
||||
@Test
|
||||
public void testCreateHRegionInfoName() throws Exception {
|
||||
// ...
|
||||
}
|
||||
}</pre><p>
|
||||
The above example shows how to mark a unit test as belonging to the small category.
|
||||
All unit tests in HBase have a categorization.
|
||||
</p><p>
|
||||
The first three categories, small, medium, and large are for tests run when
|
||||
you type <code class="code">$ mvn test</code>; i.e. these three categorizations are for
|
||||
HBase unit tests. The integration category is for not for unit tests but for integration
|
||||
tests. These are run when you invoke <code class="code">$ mvn verify</code>. Integration tests
|
||||
are described in <a class="xref" href="hbase.tests.html#integration.tests" title="15.6.5. Integration Tests">Section 15.6.5, “Integration Tests”</a> and will not be discussed further
|
||||
in this section on HBase unit tests.</p><p>
|
||||
Apache HBase uses a patched maven surefire plugin and maven profiles to implement
|
||||
its unit test characterizations.
|
||||
</p><p>Read the below to figure which annotation of the set small, medium, and large to
|
||||
put on your new HBase unit test.
|
||||
</p><div class="section" title="15.6.2.1. Small Tests"><div class="titlepage"><div><div><h4 class="title"><a name="hbase.unittests.small"></a>15.6.2.1. Small Tests<a class="indexterm" name="d2121e9851"></a></h4></div></div></div><p>
|
||||
<span class="emphasis"><em>Small</em></span> tests are executed in a shared JVM. We put in this category all the tests that can
|
||||
be executed quickly in a shared JVM. The maximum execution time for a small test is 15 seconds,
|
||||
and small tests should not use a (mini)cluster.</p></div><div class="section" title="15.6.2.2. Medium Tests"><div class="titlepage"><div><div><h4 class="title"><a name="hbase.unittests.medium"></a>15.6.2.2. Medium Tests<a class="indexterm" name="d2121e9862"></a></h4></div></div></div><p><span class="emphasis"><em>Medium</em></span> tests represent tests that must be executed
|
||||
before proposing a patch. They are designed to run in less than 30 minutes altogether,
|
||||
and are quite stable in their results. They are designed to last less than 50 seconds
|
||||
individually. They can use a cluster, and each of them is executed in a separate JVM.
|
||||
</p></div><div class="section" title="15.6.2.3. Large Tests"><div class="titlepage"><div><div><h4 class="title"><a name="hbase.unittests.large"></a>15.6.2.3. Large Tests<a class="indexterm" name="d2121e9872"></a></h4></div></div></div><p><span class="emphasis"><em>Large</em></span> tests are everything else. They are typically large-scale
|
||||
tests, regression tests for specific bugs, timeout tests, performance tests.
|
||||
They are executed before a commit on the pre-integration machines. They can be run on
|
||||
the developer machine as well.
|
||||
</p></div><div class="section" title="15.6.2.4. Integration Tests"><div class="titlepage"><div><div><h4 class="title"><a name="hbase.unittests.integration"></a>15.6.2.4. Integration Tests<a class="indexterm" name="d2121e9882"></a></h4></div></div></div><p><span class="emphasis"><em>Integration</em></span> tests are system level tests. See
|
||||
<a class="xref" href="hbase.tests.html#integration.tests" title="15.6.5. Integration Tests">Section 15.6.5, “Integration Tests”</a> for more info.
|
||||
</p></div></div><div class="section" title="15.6.3. Running tests"><div class="titlepage"><div><div><h3 class="title"><a name="hbase.unittests.cmds"></a>15.6.3. Running tests</h3></div></div></div><p>Below we describe how to run the Apache HBase junit categories.</p><div class="section" title="15.6.3.1. Default: small and medium category tests"><div class="titlepage"><div><div><h4 class="title"><a name="hbase.unittests.cmds.test"></a>15.6.3.1. Default: small and medium category tests
|
||||
</h4></div></div></div><p>Running </p><pre class="programlisting">mvn test</pre><p> will execute all small tests in a single JVM
|
||||
(no fork) and then medium tests in a separate JVM for each test instance.
|
||||
Medium tests are NOT executed if there is an error in a small test.
|
||||
Large tests are NOT executed. There is one report for small tests, and one report for
|
||||
medium tests if they are executed.
|
||||
</p></div><div class="section" title="15.6.3.2. Running all tests"><div class="titlepage"><div><div><h4 class="title"><a name="hbase.unittests.cmds.test.runAllTests"></a>15.6.3.2. Running all tests</h4></div></div></div><p>Running </p><pre class="programlisting">mvn test -P runAllTests</pre><p>
|
||||
will execute small tests in a single JVM then medium and large tests in a separate JVM for each test.
|
||||
Medium and large tests are NOT executed if there is an error in a small test.
|
||||
Large tests are NOT executed if there is an error in a small or medium test.
|
||||
There is one report for small tests, and one report for medium and large tests if they are executed.
|
||||
</p></div><div class="section" title="15.6.3.3. Running a single test or all tests in a package"><div class="titlepage"><div><div><h4 class="title"><a name="hbase.unittests.cmds.test.localtests.mytest"></a>15.6.3.3. Running a single test or all tests in a package</h4></div></div></div><p>To run an individual test, e.g. <code class="classname">MyTest</code>, do
|
||||
</p><pre class="programlisting">mvn test -Dtest=MyTest</pre><p> You can also
|
||||
pass multiple, individual tests as a comma-delimited list:
|
||||
</p><pre class="programlisting">mvn test -Dtest=MyTest1,MyTest2,MyTest3</pre><p>
|
||||
You can also pass a package, which will run all tests under the package:
|
||||
</p><pre class="programlisting">mvn test -Dtest=org.apache.hadoop.hbase.client.*</pre><p>
|
||||
</p><p>
|
||||
When <code class="code">-Dtest</code> is specified, <code class="code">localTests</code> profile will be used. It will use the official release
|
||||
of maven surefire, rather than our custom surefire plugin, and the old connector (The HBase build uses a patched
|
||||
version of the maven surefire plugin). Each junit tests is executed in a separate JVM (A fork per test class).
|
||||
There is no parallelization when tests are running in this mode. You will see a new message at the end of the
|
||||
-report: "[INFO] Tests are skipped". It's harmless. While you need to make sure the sum of <code class="code">Tests run:</code> in
|
||||
the <code class="code">Results :</code> section of test reports matching the number of tests you specified because no
|
||||
error will be reported when a non-existent test case is specified.
|
||||
</p></div><div class="section" title="15.6.3.4. Other test invocation permutations"><div class="titlepage"><div><div><h4 class="title"><a name="hbase.unittests.cmds.test.profiles"></a>15.6.3.4. Other test invocation permutations</h4></div></div></div><p>Running </p><pre class="programlisting">mvn test -P runSmallTests</pre><p> will execute small tests only, in a single JVM.
|
||||
</p><p>Running </p><pre class="programlisting">mvn test -P runMediumTests</pre><p> will execute medium tests in a single JVM.
|
||||
</p><p>Running </p><pre class="programlisting">mvn test -P runLargeTests</pre><p> execute medium tests in a single JVM.
|
||||
</p></div><div class="section" title="15.6.3.5. Running tests faster"><div class="titlepage"><div><div><h4 class="title"><a name="hbase.unittests.test.faster"></a>15.6.3.5. Running tests faster</h4></div></div></div><p>
|
||||
By default, <code class="code">$ mvn test -P runAllTests</code> runs 5 tests in parallel.
|
||||
It can be increased on a developer's machine. Allowing that you can have 2
|
||||
tests in parallel per core, and you need about 2Gb of memory per test (at the
|
||||
extreme), if you have an 8 core, 24Gb box, you can have 16 tests in parallel.
|
||||
but the memory available limits it to 12 (24/2), To run all tests with 12 tests
|
||||
in parallell, do this:
|
||||
<span class="command"><strong>mvn test -P runAllTests -Dsurefire.secondPartThreadCount=12</strong></span>.
|
||||
To increase the speed, you can as well use a ramdisk. You will need 2Gb of memory
|
||||
to run all tests. You will also need to delete the files between two test run.
|
||||
The typical way to configure a ramdisk on Linux is:
|
||||
</p><pre class="programlisting">$ sudo mkdir /ram2G
|
||||
sudo mount -t tmpfs -o size=2048M tmpfs /ram2G</pre><p>
|
||||
You can then use it to run all HBase tests with the command:
|
||||
<span class="command"><strong>mvn test -P runAllTests -Dsurefire.secondPartThreadCount=12 -Dtest.build.data.basedirectory=/ram2G</strong></span>
|
||||
</p></div><div class="section" title="15.6.3.6. hbasetests.sh"><div class="titlepage"><div><div><h4 class="title"><a name="hbase.unittests.cmds.test.hbasetests"></a>15.6.3.6. <span class="command"><strong>hbasetests.sh</strong></span></h4></div></div></div><p>It's also possible to use the script <span class="command"><strong>hbasetests.sh</strong></span>. This script runs the medium and
|
||||
large tests in parallel with two maven instances, and provides a single report. This script does not use
|
||||
the hbase version of surefire so no parallelization is being done other than the two maven instances the
|
||||
script sets up.
|
||||
It must be executed from the directory which contains the <code class="filename">pom.xml</code>.</p><p>For example running
|
||||
</p><pre class="programlisting">./dev-support/hbasetests.sh</pre><p> will execute small and medium tests.
|
||||
Running </p><pre class="programlisting">./dev-support/hbasetests.sh runAllTests</pre><p> will execute all tests.
|
||||
Running </p><pre class="programlisting">./dev-support/hbasetests.sh replayFailed</pre><p> will rerun the failed tests a
|
||||
second time, in a separate jvm and without parallelisation.
|
||||
</p></div><div class="section" title="15.6.3.7. Test Resource Checker"><div class="titlepage"><div><div><h4 class="title"><a name="hbase.unittests.resource.checker"></a>15.6.3.7. Test Resource Checker<a class="indexterm" name="d2121e10005"></a></h4></div></div></div><p>
|
||||
A custom Maven SureFire plugin listener checks a number of resources before
|
||||
and after each HBase unit test runs and logs its findings at the end of the test
|
||||
output files which can be found in <code class="filename">target/surefire-reports</code>
|
||||
per Maven module (Tests write test reports named for the test class into this directory.
|
||||
Check the <code class="filename">*-out.txt</code> files). The resources counted are the number
|
||||
of threads, the number of file descriptors, etc. If the number has increased, it adds
|
||||
a <span class="emphasis"><em>LEAK?</em></span> comment in the logs. As you can have an HBase instance
|
||||
running in the background, some threads can be deleted/created without any specific
|
||||
action in the test. However, if the test does not work as expected, or if the test
|
||||
should not impact these resources, it's worth checking these log lines
|
||||
<code class="computeroutput">...hbase.ResourceChecker(157): before...</code> and
|
||||
<code class="computeroutput">...hbase.ResourceChecker(157): after...</code>. For example:
|
||||
<code class="computeroutput">
|
||||
2012-09-26 09:22:15,315 INFO [pool-1-thread-1] hbase.ResourceChecker(157): after: regionserver.TestColumnSeeking#testReseeking Thread=65 (was 65), OpenFileDescriptor=107 (was 107), MaxFileDescriptor=10240 (was 10240), ConnectionCount=1 (was 1)
|
||||
</code>
|
||||
</p></div></div><div class="section" title="15.6.4. Writing Tests"><div class="titlepage"><div><div><h3 class="title"><a name="hbase.tests.writing"></a>15.6.4. Writing Tests</h3></div></div></div><div class="section" title="15.6.4.1. General rules"><div class="titlepage"><div><div><h4 class="title"><a name="hbase.tests.rules"></a>15.6.4.1. General rules</h4></div></div></div><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem">
|
||||
As much as possible, tests should be written as category small tests.
|
||||
</li><li class="listitem">
|
||||
All tests must be written to support parallel execution on the same machine, hence they should not use shared resources as fixed ports or fixed file names.
|
||||
</li><li class="listitem">
|
||||
Tests should not overlog. More than 100 lines/second makes the logs complex to read and use i/o that are hence not available for the other tests.
|
||||
</li><li class="listitem">
|
||||
Tests can be written with <code class="classname">HBaseTestingUtility</code>.
|
||||
This class offers helper functions to create a temp directory and do the cleanup, or to start a cluster.
|
||||
Categories and execution time
|
||||
</li><li class="listitem">
|
||||
All tests must be categorized, if not they could be skipped.
|
||||
</li><li class="listitem">
|
||||
All tests should be written to be as fast as possible.
|
||||
</li><li class="listitem">
|
||||
Small category tests should last less than 15 seconds, and must not have any side effect.
|
||||
</li><li class="listitem">
|
||||
Medium category tests should last less than 50 seconds.
|
||||
</li><li class="listitem">
|
||||
Large category tests should last less than 3 minutes. This should ensure a good parallelization for people using it, and ease the analysis when the test fails.
|
||||
</li></ul></div></div><div class="section" title="15.6.4.2. Sleeps in tests"><div class="titlepage"><div><div><h4 class="title"><a name="hbase.tests.sleeps"></a>15.6.4.2. Sleeps in tests</h4></div></div></div><p>Whenever possible, tests should not use <code class="methodname">Thread.sleep</code>, but rather waiting for the real event they need. This is faster and clearer for the reader.
|
||||
Tests should not do a <code class="methodname">Thread.sleep</code> without testing an ending condition. This allows understanding what the test is waiting for. Moreover, the test will work whatever the machine performance is.
|
||||
Sleep should be minimal to be as fast as possible. Waiting for a variable should be done in a 40ms sleep loop. Waiting for a socket operation should be done in a 200 ms sleep loop.
|
||||
</p></div><div class="section" title="15.6.4.3. Tests using a cluster"><div class="titlepage"><div><div><h4 class="title"><a name="hbase.tests.cluster"></a>15.6.4.3. Tests using a cluster
|
||||
</h4></div></div></div><p>Tests using a HRegion do not have to start a cluster: A region can use the local file system.
|
||||
Start/stopping a cluster cost around 10 seconds. They should not be started per test method but per test class.
|
||||
Started cluster must be shutdown using <code class="methodname">HBaseTestingUtility#shutdownMiniCluster</code>, which cleans the directories.
|
||||
As most as possible, tests should use the default settings for the cluster. When they don't, they should document it. This will allow to share the cluster later.
|
||||
</p></div></div><div class="section" title="15.6.5. Integration Tests"><div class="titlepage"><div><div><h3 class="title"><a name="integration.tests"></a>15.6.5. Integration Tests</h3></div></div></div><p>HBase integration/system tests are tests that are beyond HBase unit tests. They
|
||||
are generally long-lasting, sizeable (the test can be asked to 1M rows or 1B rows),
|
||||
targetable (they can take configuration that will point them at the ready-made cluster
|
||||
they are to run against; integration tests do not include cluster start/stop code),
|
||||
and verifying success, integration tests rely on public APIs only; they do not
|
||||
attempt to examine server internals asserting success/fail. Integration tests
|
||||
are what you would run when you need to more elaborate proofing of a release candidate
|
||||
beyond what unit tests can do. They are not generally run on the Apache Continuous Integration
|
||||
build server, however, some sites opt to run integration tests as a part of their
|
||||
continuous testing on an actual cluster.
|
||||
</p><p>
|
||||
Integration tests currently live under the <code class="filename">src/test</code> directory
|
||||
in the hbase-it submodule and will match the regex: <code class="filename">**/IntegrationTest*.java</code>.
|
||||
All integration tests are also annotated with <code class="code">@Category(IntegrationTests.class)</code>.
|
||||
</p><p>
|
||||
Integration tests can be run in two modes: using a mini cluster, or against an actual distributed cluster.
|
||||
Maven failsafe is used to run the tests using the mini cluster. IntegrationTestsDriver class is used for
|
||||
executing the tests against a distributed cluster. Integration tests SHOULD NOT assume that they are running against a
|
||||
mini cluster, and SHOULD NOT use private API's to access cluster state. To interact with the distributed or mini
|
||||
cluster uniformly, <code class="code">IntegrationTestingUtility</code>, and <code class="code">HBaseCluster</code> classes,
|
||||
and public client API's can be used.
|
||||
</p><div class="section" title="15.6.5.1. Running integration tests against mini cluster"><div class="titlepage"><div><div><h4 class="title"><a name="maven.build.commands.integration.tests.mini"></a>15.6.5.1. Running integration tests against mini cluster</h4></div></div></div><p>HBase 0.92 added a <code class="varname">verify</code> maven target.
|
||||
Invoking it, for example by doing <code class="code">mvn verify</code>, will
|
||||
run all the phases up to and including the verify phase via the
|
||||
maven <a class="link" href="http://maven.apache.org/plugins/maven-failsafe-plugin/" target="_top">failsafe plugin</a>,
|
||||
running all the above mentioned HBase unit tests as well as tests that are in the HBase integration test group.
|
||||
After you have completed
|
||||
</p><pre class="programlisting">mvn install -DskipTests</pre><p>
|
||||
You can run just the integration tests by invoking:
|
||||
</p><pre class="programlisting">
|
||||
cd hbase-it
|
||||
mvn verify</pre><p>
|
||||
|
||||
If you just want to run the integration tests in top-level, you need to run two commands. First:
|
||||
</p><pre class="programlisting">mvn failsafe:integration-test</pre><p>
|
||||
This actually runs ALL the integration tests.
|
||||
</p><div class="note" title="Note" style="margin-left: 0.5in; margin-right: 0.5in;"><h3 class="title">Note</h3><p>This command will always output <code class="code">BUILD SUCCESS</code> even if there are test failures.
|
||||
</p></div><p>
|
||||
At this point, you could grep the output by hand looking for failed tests. However, maven will do this for us; just use:
|
||||
</p><pre class="programlisting">mvn failsafe:verify</pre><p>
|
||||
The above command basically looks at all the test results (so don't remove the 'target' directory) for test failures and reports the results.</p><div class="section" title="15.6.5.1.1. Running a subset of Integration tests"><div class="titlepage"><div><div><h5 class="title"><a name="maven.build.commanas.integration.tests2"></a>15.6.5.1.1. Running a subset of Integration tests</h5></div></div></div><p>This is very similar to how you specify running a subset of unit tests (see above), but use the property
|
||||
<code class="code">it.test</code> instead of <code class="code">test</code>.
|
||||
To just run <code class="classname">IntegrationTestClassXYZ.java</code>, use:
|
||||
</p><pre class="programlisting">mvn failsafe:integration-test -Dit.test=IntegrationTestClassXYZ</pre><p>
|
||||
The next thing you might want to do is run groups of integration tests, say all integration tests that are named IntegrationTestClassX*.java:
|
||||
</p><pre class="programlisting">mvn failsafe:integration-test -Dit.test=*ClassX*</pre><p>
|
||||
This runs everything that is an integration test that matches *ClassX*. This means anything matching: "**/IntegrationTest*ClassX*".
|
||||
You can also run multiple groups of integration tests using comma-delimited lists (similar to unit tests). Using a list of matches still supports full regex matching for each of the groups.This would look something like:
|
||||
</p><pre class="programlisting">mvn failsafe:integration-test -Dit.test=*ClassX*, *ClassY</pre><p>
|
||||
</p></div></div><div class="section" title="15.6.5.2. Running integration tests against distributed cluster"><div class="titlepage"><div><div><h4 class="title"><a name="maven.build.commands.integration.tests.distributed"></a>15.6.5.2. Running integration tests against distributed cluster</h4></div></div></div><p>
|
||||
If you have an already-setup HBase cluster, you can launch the integration tests by invoking the class <code class="code">IntegrationTestsDriver</code>. You may have to
|
||||
run test-compile first. The configuration will be picked by the bin/hbase script.
|
||||
</p><pre class="programlisting">mvn test-compile</pre><p>
|
||||
Then launch the tests with:
|
||||
</p><pre class="programlisting">bin/hbase [--config config_dir] org.apache.hadoop.hbase.IntegrationTestsDriver [-test=class_regex]</pre><p>
|
||||
|
||||
This execution will launch the tests under <code class="code">hbase-it/src/test</code>, having <code class="code">@Category(IntegrationTests.class)</code> annotation,
|
||||
and a name starting with <code class="code">IntegrationTests</code>. If specified, class_regex will be used to filter test classes. The regex is checked against full class name; so, part of class name can be used.
|
||||
IntegrationTestsDriver uses Junit to run the tests. Currently there is no support for running integration tests against a distributed cluster using maven (see <a class="link" href="https://issues.apache.org/jira/browse/HBASE-6201" target="_top">HBASE-6201</a>).
|
||||
</p><p>
|
||||
The tests interact with the distributed cluster by using the methods in the <code class="code">DistributedHBaseCluster</code> (implementing <code class="code">HBaseCluster</code>) class, which in turn uses a pluggable <code class="code">ClusterManager</code>. Concrete implementations provide actual functionality for carrying out deployment-specific and environment-dependent tasks (SSH, etc). The default <code class="code">ClusterManager</code> is <code class="code">HBaseClusterManager</code>, which uses SSH to remotely execute start/stop/kill/signal commands, and assumes some posix commands (ps, etc). Also assumes the user running the test has enough "power" to start/stop servers on the remote machines. By default, it picks up <code class="code">HBASE_SSH_OPTS, HBASE_HOME, HBASE_CONF_DIR</code> from the env, and uses <code class="code">bin/hbase-daemon.sh</code> to carry out the actions. Currently tarball deployments, deployments which uses hbase-daemons.sh, and <a class="link" href="http://incubator.apache.org/ambari/" target="_top">Apache Ambari</a> deployments are supported. /etc/init.d/ scripts are not supported for now, but it can be easily added. For other deployment options, a ClusterManager can be implemented and plugged in.
|
||||
</p></div><div class="section" title="15.6.5.3. Destructive integration / system tests"><div class="titlepage"><div><div><h4 class="title"><a name="maven.build.commands.integration.tests.destructive"></a>15.6.5.3. Destructive integration / system tests</h4></div></div></div><p>
|
||||
In 0.96, a tool named <code class="code">ChaosMonkey</code> has been introduced. It is modeled after the <a class="link" href="http://techblog.netflix.com/2012/07/chaos-monkey-released-into-wild.html" target="_top">same-named tool by Netflix</a>.
|
||||
Some of the tests use ChaosMonkey to simulate faults in the running cluster in the way of killing random servers,
|
||||
disconnecting servers, etc. ChaosMonkey can also be used as a stand-alone tool to run a (misbehaving) policy while you
|
||||
are running other tests.
|
||||
</p><p>
|
||||
ChaosMonkey defines Action's and Policy's. Actions are sequences of events. We have at least the following actions:
|
||||
</p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem">Restart active master (sleep 5 sec)</li><li class="listitem">Restart random regionserver (sleep 5 sec)</li><li class="listitem">Restart random regionserver (sleep 60 sec)</li><li class="listitem">Restart META regionserver (sleep 5 sec)</li><li class="listitem">Restart ROOT regionserver (sleep 5 sec)</li><li class="listitem">Batch restart of 50% of regionservers (sleep 5 sec)</li><li class="listitem">Rolling restart of 100% of regionservers (sleep 5 sec)</li></ul></div><p>
|
||||
|
||||
Policies on the other hand are responsible for executing the actions based on a strategy.
|
||||
The default policy is to execute a random action every minute based on predefined action
|
||||
weights. ChaosMonkey executes predefined named policies until it is stopped. More than one
|
||||
policy can be active at any time.
|
||||
</p><p>
|
||||
To run ChaosMonkey as a standalone tool deploy your HBase cluster as usual. ChaosMonkey uses the configuration
|
||||
from the bin/hbase script, thus no extra configuration needs to be done. You can invoke the ChaosMonkey by running:
|
||||
</p><pre class="programlisting">bin/hbase org.apache.hadoop.hbase.util.ChaosMonkey</pre><p>
|
||||
|
||||
This will output smt like:
|
||||
</p><pre class="programlisting">
|
||||
12/11/19 23:21:57 INFO util.ChaosMonkey: Using ChaosMonkey Policy: class org.apache.hadoop.hbase.util.ChaosMonkey$PeriodicRandomActionPolicy, period:60000
|
||||
12/11/19 23:21:57 INFO util.ChaosMonkey: Sleeping for 26953 to add jitter
|
||||
12/11/19 23:22:24 INFO util.ChaosMonkey: Performing action: Restart active master
|
||||
12/11/19 23:22:24 INFO util.ChaosMonkey: Killing master:master.example.com,60000,1353367210440
|
||||
12/11/19 23:22:24 INFO hbase.HBaseCluster: Aborting Master: master.example.com,60000,1353367210440
|
||||
12/11/19 23:22:24 INFO hbase.ClusterManager: Executing remote command: ps aux | grep master | grep -v grep | tr -s ' ' | cut -d ' ' -f2 | xargs kill -s SIGKILL , hostname:master.example.com
|
||||
12/11/19 23:22:25 INFO hbase.ClusterManager: Executed remote command, exit code:0 , output:
|
||||
12/11/19 23:22:25 INFO hbase.HBaseCluster: Waiting service:master to stop: master.example.com,60000,1353367210440
|
||||
12/11/19 23:22:25 INFO hbase.ClusterManager: Executing remote command: ps aux | grep master | grep -v grep | tr -s ' ' | cut -d ' ' -f2 , hostname:master.example.com
|
||||
12/11/19 23:22:25 INFO hbase.ClusterManager: Executed remote command, exit code:0 , output:
|
||||
12/11/19 23:22:25 INFO util.ChaosMonkey: Killed master server:master.example.com,60000,1353367210440
|
||||
12/11/19 23:22:25 INFO util.ChaosMonkey: Sleeping for:5000
|
||||
12/11/19 23:22:30 INFO util.ChaosMonkey: Starting master:master.example.com
|
||||
12/11/19 23:22:30 INFO hbase.HBaseCluster: Starting Master on: master.example.com
|
||||
12/11/19 23:22:30 INFO hbase.ClusterManager: Executing remote command: /homes/enis/code/hbase-0.94/bin/../bin/hbase-daemon.sh --config /homes/enis/code/hbase-0.94/bin/../conf start master , hostname:master.example.com
|
||||
12/11/19 23:22:31 INFO hbase.ClusterManager: Executed remote command, exit code:0 , output:starting master, logging to /homes/enis/code/hbase-0.94/bin/../logs/hbase-enis-master-master.example.com.out
|
||||
....
|
||||
12/11/19 23:22:33 INFO util.ChaosMonkey: Started master: master.example.com,60000,1353367210440
|
||||
12/11/19 23:22:33 INFO util.ChaosMonkey: Sleeping for:51321
|
||||
12/11/19 23:23:24 INFO util.ChaosMonkey: Performing action: Restart random region server
|
||||
12/11/19 23:23:24 INFO util.ChaosMonkey: Killing region server:rs3.example.com,60020,1353367027826
|
||||
12/11/19 23:23:24 INFO hbase.HBaseCluster: Aborting RS: rs3.example.com,60020,1353367027826
|
||||
12/11/19 23:23:24 INFO hbase.ClusterManager: Executing remote command: ps aux | grep regionserver | grep -v grep | tr -s ' ' | cut -d ' ' -f2 | xargs kill -s SIGKILL , hostname:rs3.example.com
|
||||
12/11/19 23:23:25 INFO hbase.ClusterManager: Executed remote command, exit code:0 , output:
|
||||
12/11/19 23:23:25 INFO hbase.HBaseCluster: Waiting service:regionserver to stop: rs3.example.com,60020,1353367027826
|
||||
12/11/19 23:23:25 INFO hbase.ClusterManager: Executing remote command: ps aux | grep regionserver | grep -v grep | tr -s ' ' | cut -d ' ' -f2 , hostname:rs3.example.com
|
||||
12/11/19 23:23:25 INFO hbase.ClusterManager: Executed remote command, exit code:0 , output:
|
||||
12/11/19 23:23:25 INFO util.ChaosMonkey: Killed region server:rs3.example.com,60020,1353367027826. Reported num of rs:6
|
||||
12/11/19 23:23:25 INFO util.ChaosMonkey: Sleeping for:60000
|
||||
12/11/19 23:24:25 INFO util.ChaosMonkey: Starting region server:rs3.example.com
|
||||
12/11/19 23:24:25 INFO hbase.HBaseCluster: Starting RS on: rs3.example.com
|
||||
12/11/19 23:24:25 INFO hbase.ClusterManager: Executing remote command: /homes/enis/code/hbase-0.94/bin/../bin/hbase-daemon.sh --config /homes/enis/code/hbase-0.94/bin/../conf start regionserver , hostname:rs3.example.com
|
||||
12/11/19 23:24:26 INFO hbase.ClusterManager: Executed remote command, exit code:0 , output:starting regionserver, logging to /homes/enis/code/hbase-0.94/bin/../logs/hbase-enis-regionserver-rs3.example.com.out
|
||||
|
||||
12/11/19 23:24:27 INFO util.ChaosMonkey: Started region server:rs3.example.com,60020,1353367027826. Reported num of rs:6
|
||||
</pre><p>
|
||||
|
||||
As you can see from the log, ChaosMonkey started the default PeriodicRandomActionPolicy, which is configured with all the available actions, and ran RestartActiveMaster and RestartRandomRs actions. ChaosMonkey tool, if run from command line, will keep on running until the process is killed.
|
||||
</p></div></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'hbase.tests';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="hbase.org.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="developer.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="maven.build.commands.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">15.5. Updating hbase.apache.org </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> 15.7. Maven Build Commands</td></tr></table></div></body></html>
|
File diff suppressed because one or more lines are too long
|
@ -1,37 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>Appendix B. hbck In Depth</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="prev" href="faq.html" title="Appendix A. FAQ"><link rel="next" href="apbs02.html" title="B.2. Inconsistencies"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">Appendix B. hbck In Depth</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="faq.html">Prev</a> </td><th width="60%" align="center"> </th><td width="20%" align="right"> <a accesskey="n" href="apbs02.html">Next</a></td></tr></table><hr></div><div class="appendix" title="Appendix B. hbck In Depth"><div class="titlepage"><div><div><h2 class="title"><a name="hbck.in.depth"></a>Appendix B. hbck In Depth</h2></div></div></div><div class="toc"><p><b>Table of Contents</b></p><dl><dt><span class="section"><a href="hbck.in.depth.html#d2121e11383">B.1. Running hbck to identify inconsistencies</a></span></dt><dt><span class="section"><a href="apbs02.html">B.2. Inconsistencies</a></span></dt><dt><span class="section"><a href="apbs03.html">B.3. Localized repairs</a></span></dt><dt><span class="section"><a href="apbs04.html">B.4. Region Overlap Repairs</a></span></dt><dd><dl><dt><span class="section"><a href="apbs04.html#d2121e11495">B.4.1. Special cases: Meta is not properly assigned</a></span></dt><dt><span class="section"><a href="apbs04.html#d2121e11504">B.4.2. Special cases: HBase version file is missing</a></span></dt><dt><span class="section"><a href="apbs04.html#d2121e11511">B.4.3. Special case: Root and META are corrupt.</a></span></dt><dt><span class="section"><a href="apbs04.html#d2121e11518">B.4.4. Special cases: Offline split parent</a></span></dt></dl></dd></dl></div><p>HBaseFsck (hbck) is a tool for checking for region consistency and table integrity problems
|
||||
and repairing a corrupted HBase. It works in two basic modes -- a read-only inconsistency
|
||||
identifying mode and a multi-phase read-write repair mode.
|
||||
</p><div class="section" title="B.1. Running hbck to identify inconsistencies"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="d2121e11383"></a>B.1. Running hbck to identify inconsistencies</h2></div></div></div>
|
||||
To check to see if your HBase cluster has corruptions, run hbck against your HBase cluster:
|
||||
<pre class="programlisting">
|
||||
$ ./bin/hbase hbck
|
||||
</pre><p>
|
||||
At the end of the commands output it prints OK or tells you the number of INCONSISTENCIES
|
||||
present. You may also want to run run hbck a few times because some inconsistencies can be
|
||||
transient (e.g. cluster is starting up or a region is splitting). Operationally you may want to run
|
||||
hbck regularly and setup alert (e.g. via nagios) if it repeatedly reports inconsistencies .
|
||||
A run of hbck will report a list of inconsistencies along with a brief description of the regions and
|
||||
tables affected. The using the <code class="code">-details</code> option will report more details including a representative
|
||||
listing of all the splits present in all the tables.
|
||||
</p><pre class="programlisting">
|
||||
$ ./bin/hbase hbck -details
|
||||
</pre>
|
||||
If you just want to know if some tables are corrupted, you can limit hbck to identify inconsistencies
|
||||
in only specific tables. For example the following command would only attempt to check table
|
||||
TableFoo and TableBar. The benefit is that hbck will run in less time.
|
||||
<pre class="programlisting">
|
||||
$ ./bin/hbase/ hbck TableFoo TableBar
|
||||
</pre></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'hbck.in.depth';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="faq.html">Prev</a> </td><td width="20%" align="center"> </td><td width="40%" align="right"> <a accesskey="n" href="apbs02.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">Appendix A. FAQ </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> B.2. Inconsistencies</td></tr></table></div></body></html>
|
|
@ -1,20 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>Appendix E. HFile format version 2</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="prev" href="apd.html" title="Appendix D. YCSB: The Yahoo! Cloud Serving Benchmark and HBase"><link rel="next" href="apes02.html" title="E.2. HFile format version 1 overview"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">Appendix E. HFile format version 2</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="apd.html">Prev</a> </td><th width="60%" align="center"> </th><td width="20%" align="right"> <a accesskey="n" href="apes02.html">Next</a></td></tr></table><hr></div><div class="appendix" title="Appendix E. HFile format version 2"><div class="titlepage"><div><div><h2 class="title"><a name="hfilev2"></a>Appendix E. HFile format version 2</h2></div></div></div><div class="toc"><p><b>Table of Contents</b></p><dl><dt><span class="section"><a href="hfilev2.html#d2121e11674">E.1. Motivation </a></span></dt><dt><span class="section"><a href="apes02.html">E.2. HFile format version 1 overview </a></span></dt><dd><dl><dt><span class="section"><a href="apes02.html#d2121e11709">E.2.1. Block index format in version 1 </a></span></dt></dl></dd><dt><span class="section"><a href="apes03.html">E.3.
|
||||
HBase file format with inline blocks (version 2)
|
||||
</a></span></dt><dd><dl><dt><span class="section"><a href="apes03.html#d2121e11736">E.3.1. Overview</a></span></dt><dt><span class="section"><a href="apes03.html#d2121e11751">E.3.2. Unified version 2 block format</a></span></dt><dt><span class="section"><a href="apes03.html#d2121e11820">E.3.3. Block index in version 2</a></span></dt><dt><span class="section"><a href="apes03.html#d2121e11845">E.3.4.
|
||||
Root block index format in version 2</a></span></dt><dt><span class="section"><a href="apes03.html#d2121e11898">E.3.5.
|
||||
Non-root block index format in version 2</a></span></dt><dt><span class="section"><a href="apes03.html#d2121e11923">E.3.6.
|
||||
Bloom filters in version 2</a></span></dt><dt><span class="section"><a href="apes03.html#d2121e11960">E.3.7. File Info format in versions 1 and 2</a></span></dt><dt><span class="section"><a href="apes03.html#d2121e12006">E.3.8.
|
||||
Fixed file trailer format differences between versions 1 and 2</a></span></dt></dl></dd></dl></div><div class="section" title="E.1. Motivation"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="d2121e11674"></a>E.1. Motivation </h2></div></div></div><p>Note: this feature was introduced in HBase 0.92</p><p>We found it necessary to revise the HFile format after encountering high memory usage and slow startup times caused by large Bloom filters and block indexes in the region server. Bloom filters can get as large as 100 MB per HFile, which adds up to 2 GB when aggregated over 20 regions. Block indexes can grow as large as 6 GB in aggregate size over the same set of regions. A region is not considered opened until all of its block index data is loaded. Large Bloom filters produce a different performance problem: the first get request that requires a Bloom filter lookup will incur the latency of loading the entire Bloom filter bit array.</p><p>To speed up region server startup we break Bloom filters and block indexes into multiple blocks and write those blocks out as they fill up, which also reduces the HFile writer’s memory footprint. In the Bloom filter case, “filling up a block” means accumulating enough keys to efficiently utilize a fixed-size bit array, and in the block index case we accumulate an “index block” of the desired size. Bloom filter blocks and index blocks (we call these “inline blocks”) become interspersed with data blocks, and as a side effect we can no longer rely on the difference between block offsets to determine data block length, as it was done in version 1.</p><p>HFile is a low-level file format by design, and it should not deal with application-specific details such as Bloom filters, which are handled at StoreFile level. Therefore, we call Bloom filter blocks in an HFile "inline" blocks. We also supply HFile with an interface to write those inline blocks. </p><p>Another format modification aimed at reducing the region server startup time is to use a contiguous “load-on-open” section that has to be loaded in memory at the time an HFile is being opened. Currently, as an HFile opens, there are separate seek operations to read the trailer, data/meta indexes, and file info. To read the Bloom filter, there are two more seek operations for its “data” and “meta” portions. In version 2, we seek once to read the trailer and seek again to read everything else we need to open the file from a contiguous block.</p></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'hfilev2';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="apd.html">Prev</a> </td><td width="20%" align="center"> </td><td width="40%" align="right"> <a accesskey="n" href="apes02.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">Appendix D. YCSB: The Yahoo! Cloud Serving Benchmark and HBase </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> E.2. HFile format version 1 overview </td></tr></table></div></body></html>
|
|
@ -1,49 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>15.2. IDEs</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="developer.html" title="Chapter 15. Building and Developing Apache HBase (TM)"><link rel="prev" href="developer.html" title="Chapter 15. Building and Developing Apache HBase (TM)"><link rel="next" href="build.html" title="15.3. Building Apache HBase"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">15.2. IDEs</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="developer.html">Prev</a> </td><th width="60%" align="center">Chapter 15. Building and Developing Apache HBase (TM)</th><td width="20%" align="right"> <a accesskey="n" href="build.html">Next</a></td></tr></table><hr></div><div class="section" title="15.2. IDEs"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="ides"></a>15.2. IDEs</h2></div></div></div><div class="section" title="15.2.1. Eclipse"><div class="titlepage"><div><div><h3 class="title"><a name="eclipse"></a>15.2.1. Eclipse</h3></div></div></div><div class="section" title="15.2.1.1. Code Formatting"><div class="titlepage"><div><div><h4 class="title"><a name="eclipse.code.formatting"></a>15.2.1.1. Code Formatting</h4></div></div></div><p>Under the <code class="filename">dev-support</code> folder, you will find <code class="filename">hbase_eclipse_formatter.xml</code>.
|
||||
We encourage you to have this formatter in place in eclipse when editing HBase code. To load it into eclipse:
|
||||
</p><div class="orderedlist"><ol class="orderedlist" type="1"><li class="listitem"><p>Go to Eclipse->Preferences...</p></li><li class="listitem"><p>In Preferences, Go to Java->Code Style->Formatter</p></li><li class="listitem"><p>Import... <code class="filename">hbase_eclipse_formatter.xml</code></p></li><li class="listitem"><p>Click Apply</p></li><li class="listitem"><p>Still in Preferences, Go to Java->Editor->Save Actions</p></li><li class="listitem"><p>Check the following:
|
||||
</p><div class="orderedlist"><ol class="orderedlist" type="a"><li class="listitem"><p>Perform the selected actions on save</p></li><li class="listitem"><p>Format source code</p></li><li class="listitem"><p>Format edited lines</p></li></ol></div><p>
|
||||
</p></li><li class="listitem"><p>Click Apply</p></li></ol></div><p>
|
||||
</p><p>In addition to the automatic formatting, make sure you follow the style guidelines explained in <a class="xref" href="submitting.patches.html#common.patch.feedback" title="15.10.5. Common Patch Feedback">Section 15.10.5, “Common Patch Feedback”</a></p><p>Also, no @author tags - that's a rule. Quality Javadoc comments are appreciated. And include the Apache license.</p></div><div class="section" title="15.2.1.2. Subversive Plugin"><div class="titlepage"><div><div><h4 class="title"><a name="eclipse.svn"></a>15.2.1.2. Subversive Plugin</h4></div></div></div><p>Download and install the Subversive plugin.</p><p>Set up an SVN Repository target from <a class="xref" href="developer.html#svn" title="15.1.1. SVN">Section 15.1.1, “SVN”</a>, then check out the code.</p></div><div class="section" title="15.2.1.3. Git Plugin"><div class="titlepage"><div><div><h4 class="title"><a name="eclipse.git.plugin"></a>15.2.1.3. Git Plugin</h4></div></div></div><p>If you cloned the project via git, download and install the Git plugin (EGit). Attach to your local git repo (via the Git Repositories window) and you'll be able to see file revision history, generate patches, etc.</p></div><div class="section" title="15.2.1.4. HBase Project Setup in Eclipse"><div class="titlepage"><div><div><h4 class="title"><a name="eclipse.maven.setup"></a>15.2.1.4. HBase Project Setup in Eclipse</h4></div></div></div><p>The easiest way is to use the m2eclipse plugin for Eclipse. Eclipse Indigo or newer has m2eclipse built-in, or it can be found here:http://www.eclipse.org/m2e/. M2Eclipse provides Maven integration for Eclipse - it even lets you use the direct Maven commands from within Eclipse to compile and test your project.</p><p>To import the project, you merely need to go to File->Import...Maven->Existing Maven Projects and then point Eclipse at the HBase root directory; m2eclipse will automatically find all the hbase modules for you.</p><p>If you install m2eclipse and import HBase in your workspace, you will have to fix your eclipse Build Path.
|
||||
Remove <code class="filename">target</code> folder, add <code class="filename">target/generated-jamon</code>
|
||||
and <code class="filename">target/generated-sources/java</code> folders. You may also remove from your Build Path
|
||||
the exclusions on the <code class="filename">src/main/resources</code> and <code class="filename">src/test/resources</code>
|
||||
to avoid error message in the console 'Failed to execute goal org.apache.maven.plugins:maven-antrun-plugin:1.6:run (default) on project hbase:
|
||||
'An Ant BuildException has occured: Replace: source file .../target/classes/hbase-default.xml doesn't exist'. This will also
|
||||
reduce the eclipse build cycles and make your life easier when developing.</p></div><div class="section" title="15.2.1.5. Import into eclipse with the command line"><div class="titlepage"><div><div><h4 class="title"><a name="eclipse.commandline"></a>15.2.1.5. Import into eclipse with the command line</h4></div></div></div><p>For those not inclined to use m2eclipse, you can generate the Eclipse files from the command line. First, run (you should only have to do this once):
|
||||
</p><pre class="programlisting">mvn clean install -DskipTests</pre><p>
|
||||
and then close Eclipse and execute...
|
||||
</p><pre class="programlisting">mvn eclipse:eclipse</pre><p>
|
||||
... from your local HBase project directory in your workspace to generate some new <code class="filename">.project</code>
|
||||
and <code class="filename">.classpath</code>files. Then reopen Eclipse, or refresh your eclipse project (F5), and import
|
||||
the .project file in the HBase directory to a workspace.
|
||||
</p></div><div class="section" title="15.2.1.6. Maven Classpath Variable"><div class="titlepage"><div><div><h4 class="title"><a name="eclipse.maven.class"></a>15.2.1.6. Maven Classpath Variable</h4></div></div></div><p>The <code class="varname">M2_REPO</code> classpath variable needs to be set up for the project. This needs to be set to
|
||||
your local Maven repository, which is usually <code class="filename">~/.m2/repository</code></p>
|
||||
If this classpath variable is not configured, you will see compile errors in Eclipse like this...
|
||||
<pre class="programlisting">
|
||||
Description Resource Path Location Type
|
||||
The project cannot be built until build path errors are resolved hbase Unknown Java Problem
|
||||
Unbound classpath variable: 'M2_REPO/asm/asm/3.1/asm-3.1.jar' in project 'hbase' hbase Build path Build Path Problem
|
||||
Unbound classpath variable: 'M2_REPO/com/github/stephenc/high-scale-lib/high-scale-lib/1.1.1/high-scale-lib-1.1.1.jar' in project 'hbase' hbase Build path Build Path Problem
|
||||
Unbound classpath variable: 'M2_REPO/com/google/guava/guava/r09/guava-r09.jar' in project 'hbase' hbase Build path Build Path Problem
|
||||
Unbound classpath variable: 'M2_REPO/com/google/protobuf/protobuf-java/2.3.0/protobuf-java-2.3.0.jar' in project 'hbase' hbase Build path Build Path Problem Unbound classpath variable:
|
||||
</pre></div><div class="section" title="15.2.1.7. Eclipse Known Issues"><div class="titlepage"><div><div><h4 class="title"><a name="eclipse.issues"></a>15.2.1.7. Eclipse Known Issues</h4></div></div></div><p>Eclipse will currently complain about <code class="filename">Bytes.java</code>. It is not possible to turn these errors off.</p><pre class="programlisting">
|
||||
Description Resource Path Location Type
|
||||
Access restriction: The method arrayBaseOffset(Class) from the type Unsafe is not accessible due to restriction on required library /System/Library/Java/JavaVirtualMachines/1.6.0.jdk/Contents/Classes/classes.jar Bytes.java /hbase/src/main/java/org/apache/hadoop/hbase/util line 1061 Java Problem
|
||||
Access restriction: The method arrayIndexScale(Class) from the type Unsafe is not accessible due to restriction on required library /System/Library/Java/JavaVirtualMachines/1.6.0.jdk/Contents/Classes/classes.jar Bytes.java /hbase/src/main/java/org/apache/hadoop/hbase/util line 1064 Java Problem
|
||||
Access restriction: The method getLong(Object, long) from the type Unsafe is not accessible due to restriction on required library /System/Library/Java/JavaVirtualMachines/1.6.0.jdk/Contents/Classes/classes.jar Bytes.java /hbase/src/main/java/org/apache/hadoop/hbase/util line 1111 Java Problem
|
||||
</pre></div><div class="section" title="15.2.1.8. Eclipse - More Information"><div class="titlepage"><div><div><h4 class="title"><a name="eclipse.more"></a>15.2.1.8. Eclipse - More Information</h4></div></div></div><p>For additional information on setting up Eclipse for HBase development on Windows, see
|
||||
<a class="link" href="http://michaelmorello.blogspot.com/2011/09/hbase-subversion-eclipse-windows.html" target="_top">Michael Morello's blog</a> on the topic.
|
||||
</p></div></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'ides';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="developer.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="developer.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="build.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">Chapter 15. Building and Developing Apache HBase (TM) </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> 15.3. Building Apache HBase</td></tr></table></div></body></html>
|
|
@ -1,159 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>2.5. The Important Configurations</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="configuration.html" title="Chapter 2. Apache HBase (TM) Configuration"><link rel="prev" href="example_config.html" title="2.4. Example Configurations"><link rel="next" href="upgrading.html" title="Chapter 3. Upgrading"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">2.5. The Important Configurations</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="example_config.html">Prev</a> </td><th width="60%" align="center">Chapter 2. Apache HBase (TM) Configuration</th><td width="20%" align="right"> <a accesskey="n" href="upgrading.html">Next</a></td></tr></table><hr></div><div class="section" title="2.5. The Important Configurations"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="important_configurations"></a>2.5. The Important Configurations</h2></div></div></div><p>Below we list what the <span class="emphasis"><em>important</em></span>
|
||||
Configurations. We've divided this section into
|
||||
required configuration and worth-a-look recommended configs.
|
||||
</p><div class="section" title="2.5.1. Required Configurations"><div class="titlepage"><div><div><h3 class="title"><a name="required_configuration"></a>2.5.1. Required Configurations</h3></div></div></div><p>Review the <a class="xref" href="configuration.html#os" title="2.1.2. Operating System">Section 2.1.2, “Operating System”</a> and <a class="xref" href="configuration.html#hadoop" title="2.1.3. Hadoop">Section 2.1.3, “Hadoop”</a> sections.
|
||||
</p><div class="section" title="2.5.1.1. Big Cluster Configurations"><div class="titlepage"><div><div><h4 class="title"><a name="big.cluster.config"></a>2.5.1.1. Big Cluster Configurations</h4></div></div></div><p>If a cluster with a lot of regions, it is possible if an eager beaver
|
||||
regionserver checks in soon after master start while all the rest in the
|
||||
cluster are laggardly, this first server to checkin will be assigned all
|
||||
regions. If lots of regions, this first server could buckle under the
|
||||
load. To prevent the above scenario happening up the
|
||||
<code class="varname">hbase.master.wait.on.regionservers.mintostart</code> from its
|
||||
default value of 1. See
|
||||
<a class="link" href="https://issues.apache.org/jira/browse/HBASE-6389" target="_top">HBASE-6389 Modify the conditions to ensure that Master waits for sufficient number of Region Servers before starting region assignments</a>
|
||||
for more detail.
|
||||
</p></div></div><div class="section" title="2.5.2. Recommended Configurations"><div class="titlepage"><div><div><h3 class="title"><a name="recommended_configurations"></a>2.5.2. Recommended Configurations</h3></div></div></div><div class="section" title="2.5.2.1. ZooKeeper Configuration"><div class="titlepage"><div><div><h4 class="title"><a name="recommended_configurations.zk"></a>2.5.2.1. ZooKeeper Configuration</h4></div></div></div><div class="section" title="2.5.2.1.1. zookeeper.session.timeout"><div class="titlepage"><div><div><h5 class="title"><a name="zookeeper.session.timeout"></a>2.5.2.1.1. <code class="varname">zookeeper.session.timeout</code></h5></div></div></div><p>The default timeout is three minutes (specified in milliseconds). This means
|
||||
that if a server crashes, it will be three minutes before the Master notices
|
||||
the crash and starts recovery. You might like to tune the timeout down to
|
||||
a minute or even less so the Master notices failures the sooner.
|
||||
Before changing this value, be sure you have your JVM garbage collection
|
||||
configuration under control otherwise, a long garbage collection that lasts
|
||||
beyond the ZooKeeper session timeout will take out
|
||||
your RegionServer (You might be fine with this -- you probably want recovery to start
|
||||
on the server if a RegionServer has been in GC for a long period of time).</p><p>To change this configuration, edit <code class="filename">hbase-site.xml</code>,
|
||||
copy the changed file around the cluster and restart.</p><p>We set this value high to save our having to field noob questions up on the mailing lists asking
|
||||
why a RegionServer went down during a massive import. The usual cause is that their JVM is untuned and
|
||||
they are running into long GC pauses. Our thinking is that
|
||||
while users are getting familiar with HBase, we'd save them having to know all of its
|
||||
intricacies. Later when they've built some confidence, then they can play
|
||||
with configuration such as this.
|
||||
</p></div><div class="section" title="2.5.2.1.2. Number of ZooKeeper Instances"><div class="titlepage"><div><div><h5 class="title"><a name="zookeeper.instances"></a>2.5.2.1.2. Number of ZooKeeper Instances</h5></div></div></div><p>See <a class="xref" href="zookeeper.html" title="Chapter 16. ZooKeeper">Chapter 16, <i>ZooKeeper</i></a>.
|
||||
</p></div></div><div class="section" title="2.5.2.2. HDFS Configurations"><div class="titlepage"><div><div><h4 class="title"><a name="recommended.configurations.hdfs"></a>2.5.2.2. HDFS Configurations</h4></div></div></div><div class="section" title="2.5.2.2.1. dfs.datanode.failed.volumes.tolerated"><div class="titlepage"><div><div><h5 class="title"><a name="dfs.datanode.failed.volumes.tolerated"></a>2.5.2.2.1. dfs.datanode.failed.volumes.tolerated</h5></div></div></div><p>This is the "...number of volumes that are allowed to fail before a datanode stops offering service. By default
|
||||
any volume failure will cause a datanode to shutdown" from the <code class="filename">hdfs-default.xml</code>
|
||||
description. If you have > three or four disks, you might want to set this to 1 or if you have many disks,
|
||||
two or more.
|
||||
</p></div></div><div class="section" title="2.5.2.3. hbase.regionserver.handler.count"><div class="titlepage"><div><div><h4 class="title"><a name="hbase.regionserver.handler.count"></a>2.5.2.3. <code class="varname">hbase.regionserver.handler.count</code></h4></div></div></div><p>
|
||||
This setting defines the number of threads that are kept open to answer
|
||||
incoming requests to user tables. The default of 10 is rather low in order to
|
||||
prevent users from killing their region servers when using large write buffers
|
||||
with a high number of concurrent clients. The rule of thumb is to keep this
|
||||
number low when the payload per request approaches the MB (big puts, scans using
|
||||
a large cache) and high when the payload is small (gets, small puts, ICVs, deletes).
|
||||
</p><p>
|
||||
It is safe to set that number to the
|
||||
maximum number of incoming clients if their payload is small, the typical example
|
||||
being a cluster that serves a website since puts aren't typically buffered
|
||||
and most of the operations are gets.
|
||||
</p><p>
|
||||
The reason why it is dangerous to keep this setting high is that the aggregate
|
||||
size of all the puts that are currently happening in a region server may impose
|
||||
too much pressure on its memory, or even trigger an OutOfMemoryError. A region server
|
||||
running on low memory will trigger its JVM's garbage collector to run more frequently
|
||||
up to a point where GC pauses become noticeable (the reason being that all the memory
|
||||
used to keep all the requests' payloads cannot be trashed, no matter how hard the
|
||||
garbage collector tries). After some time, the overall cluster
|
||||
throughput is affected since every request that hits that region server will take longer,
|
||||
which exacerbates the problem even more.
|
||||
</p><p>You can get a sense of whether you have too little or too many handlers by
|
||||
<a class="xref" href="trouble.log.html#rpc.logging" title="12.2.2.1. Enabling RPC-level logging">Section 12.2.2.1, “Enabling RPC-level logging”</a>
|
||||
on an individual RegionServer then tailing its logs (Queued requests
|
||||
consume memory).
|
||||
</p></div><div class="section" title="2.5.2.4. Configuration for large memory machines"><div class="titlepage"><div><div><h4 class="title"><a name="big_memory"></a>2.5.2.4. Configuration for large memory machines</h4></div></div></div><p>
|
||||
HBase ships with a reasonable, conservative configuration that will
|
||||
work on nearly all
|
||||
machine types that people might want to test with. If you have larger
|
||||
machines -- HBase has 8G and larger heap -- you might the following configuration options helpful.
|
||||
TODO.
|
||||
</p></div><div class="section" title="2.5.2.5. Compression"><div class="titlepage"><div><div><h4 class="title"><a name="config.compression"></a>2.5.2.5. Compression</h4></div></div></div><p>You should consider enabling ColumnFamily compression. There are several options that are near-frictionless and in most all cases boost
|
||||
performance by reducing the size of StoreFiles and thus reducing I/O.
|
||||
</p><p>See <a class="xref" href="compression.html" title="Appendix C. Compression In HBase">Appendix C, <i>Compression In HBase</i></a> for more information.</p></div><div class="section" title="2.5.2.6. Bigger Regions"><div class="titlepage"><div><div><h4 class="title"><a name="bigger.regions"></a>2.5.2.6. Bigger Regions</h4></div></div></div><p>
|
||||
Consider going to larger regions to cut down on the total number of regions
|
||||
on your cluster. Generally less Regions to manage makes for a smoother running
|
||||
cluster (You can always later manually split the big Regions should one prove
|
||||
hot and you want to spread the request load over the cluster). A lower number of regions is
|
||||
preferred, generally in the range of 20 to low-hundreds
|
||||
per RegionServer. Adjust the regionsize as appropriate to achieve this number.
|
||||
</p><p>For the 0.90.x codebase, the upper-bound of regionsize is about 4Gb, with a default of 256Mb.
|
||||
For 0.92.x codebase, due to the HFile v2 change much larger regionsizes can be supported (e.g., 20Gb).
|
||||
</p><p>You may need to experiment with this setting based on your hardware configuration and application needs.
|
||||
</p><p>Adjust <code class="code">hbase.hregion.max.filesize</code> in your <code class="filename">hbase-site.xml</code>.
|
||||
RegionSize can also be set on a per-table basis via
|
||||
<a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/HTableDescriptor.html" target="_top">HTableDescriptor</a>.
|
||||
</p></div><div class="section" title="2.5.2.7. Managed Splitting"><div class="titlepage"><div><div><h4 class="title"><a name="disable.splitting"></a>2.5.2.7. Managed Splitting</h4></div></div></div><p>
|
||||
Rather than let HBase auto-split your Regions, manage the splitting manually
|
||||
<sup>[<a name="d2121e2440" href="#ftn.d2121e2440" class="footnote">14</a>]</sup>.
|
||||
With growing amounts of data, splits will continually be needed. Since
|
||||
you always know exactly what regions you have, long-term debugging and
|
||||
profiling is much easier with manual splits. It is hard to trace the logs to
|
||||
understand region level problems if it keeps splitting and getting renamed.
|
||||
Data offlining bugs + unknown number of split regions == oh crap! If an
|
||||
<code class="classname">HLog</code> or <code class="classname">StoreFile</code>
|
||||
was mistakenly unprocessed by HBase due to a weird bug and
|
||||
you notice it a day or so later, you can be assured that the regions
|
||||
specified in these files are the same as the current regions and you have
|
||||
less headaches trying to restore/replay your data.
|
||||
You can finely tune your compaction algorithm. With roughly uniform data
|
||||
growth, it's easy to cause split / compaction storms as the regions all
|
||||
roughly hit the same data size at the same time. With manual splits, you can
|
||||
let staggered, time-based major compactions spread out your network IO load.
|
||||
</p><p>
|
||||
How do I turn off automatic splitting? Automatic splitting is determined by the configuration value
|
||||
<code class="code">hbase.hregion.max.filesize</code>. It is not recommended that you set this
|
||||
to <code class="varname">Long.MAX_VALUE</code> in case you forget about manual splits. A suggested setting
|
||||
is 100GB, which would result in > 1hr major compactions if reached.
|
||||
</p><p>What's the optimal number of pre-split regions to create?
|
||||
Mileage will vary depending upon your application.
|
||||
You could start low with 10 pre-split regions / server and watch as data grows
|
||||
over time. It's better to err on the side of too little regions and rolling split later.
|
||||
A more complicated answer is that this depends upon the largest storefile
|
||||
in your region. With a growing data size, this will get larger over time. You
|
||||
want the largest region to be just big enough that the <code class="classname">Store</code> compact
|
||||
selection algorithm only compacts it due to a timed major. If you don't, your
|
||||
cluster can be prone to compaction storms as the algorithm decides to run
|
||||
major compactions on a large series of regions all at once. Note that
|
||||
compaction storms are due to the uniform data growth, not the manual split
|
||||
decision.
|
||||
</p><p> If you pre-split your regions too thin, you can increase the major compaction
|
||||
interval by configuring <code class="varname">HConstants.MAJOR_COMPACTION_PERIOD</code>. If your data size
|
||||
grows too large, use the (post-0.90.0 HBase) <code class="classname">org.apache.hadoop.hbase.util.RegionSplitter</code>
|
||||
script to perform a network IO safe rolling split
|
||||
of all regions.
|
||||
</p></div><div class="section" title="2.5.2.8. Managed Compactions"><div class="titlepage"><div><div><h4 class="title"><a name="managed.compactions"></a>2.5.2.8. Managed Compactions</h4></div></div></div><p>A common administrative technique is to manage major compactions manually, rather than letting
|
||||
HBase do it. By default, <code class="varname">HConstants.MAJOR_COMPACTION_PERIOD</code> is one day and major compactions
|
||||
may kick in when you least desire it - especially on a busy system. To turn off automatic major compactions set
|
||||
the value to <code class="varname">0</code>.
|
||||
</p><p>It is important to stress that major compactions are absolutely necessary for StoreFile cleanup, the only variant is when
|
||||
they occur. They can be administered through the HBase shell, or via
|
||||
<a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/HBaseAdmin.html#majorCompact%28java.lang.String%29" target="_top">HBaseAdmin</a>.
|
||||
</p><p>For more information about compactions and the compaction file selection process, see <a class="xref" href="regions.arch.html#compaction" title="9.7.5.5. Compaction">Section 9.7.5.5, “Compaction”</a></p></div><div class="section" title="2.5.2.9. Speculative Execution"><div class="titlepage"><div><div><h4 class="title"><a name="spec.ex"></a>2.5.2.9. Speculative Execution</h4></div></div></div><p>Speculative Execution of MapReduce tasks is on by default, and for HBase clusters it is generally advised to turn off
|
||||
Speculative Execution at a system-level unless you need it for a specific case, where it can be configured per-job.
|
||||
Set the properties <code class="varname">mapred.map.tasks.speculative.execution</code> and
|
||||
<code class="varname">mapred.reduce.tasks.speculative.execution</code> to false.
|
||||
</p></div></div><div class="section" title="2.5.3. Other Configurations"><div class="titlepage"><div><div><h3 class="title"><a name="other_configuration"></a>2.5.3. Other Configurations</h3></div></div></div><div class="section" title="2.5.3.1. Balancer"><div class="titlepage"><div><div><h4 class="title"><a name="balancer_config"></a>2.5.3.1. Balancer</h4></div></div></div><p>The balancer is a periodic operation which is run on the master to redistribute regions on the cluster. It is configured via
|
||||
<code class="varname">hbase.balancer.period</code> and defaults to 300000 (5 minutes). </p><p>See <a class="xref" href="master.html#master.processes.loadbalancer" title="9.5.4.1. LoadBalancer">Section 9.5.4.1, “LoadBalancer”</a> for more information on the LoadBalancer.
|
||||
</p></div><div class="section" title="2.5.3.2. Disabling Blockcache"><div class="titlepage"><div><div><h4 class="title"><a name="disabling.blockcache"></a>2.5.3.2. Disabling Blockcache</h4></div></div></div><p>Do not turn off block cache (You'd do it by setting <code class="varname">hbase.block.cache.size</code> to zero).
|
||||
Currently we do not do well if you do this because the regionserver will spend all its time loading hfile
|
||||
indices over and over again. If your working set it such that block cache does you no good, at least
|
||||
size the block cache such that hfile indices will stay up in the cache (you can get a rough idea
|
||||
on the size you need by surveying regionserver UIs; you'll see index block size accounted near the
|
||||
top of the webpage).</p></div><div class="section" title="2.5.3.3. Nagle's or the small package problem"><div class="titlepage"><div><div><h4 class="title"><a name="nagles"></a>2.5.3.3. <a class="link" href="http://en.wikipedia.org/wiki/Nagle's_algorithm" target="_top">Nagle's</a> or the small package problem</h4></div></div></div><p>If a big 40ms or so occasional delay is seen in operations against HBase,
|
||||
try the Nagles' setting. For example, see the user mailing list thread,
|
||||
<a class="link" href="http://search-hadoop.com/m/pduLg2fydtE/Inconsistent+scan+performance+with+caching+set+&subj=Re+Inconsistent+scan+performance+with+caching+set+to+1" target="_top">Inconsistent scan performance with caching set to 1</a>
|
||||
and the issue cited therein where setting notcpdelay improved scan speeds. You might also
|
||||
see the graphs on the tail of <a class="link" href="https://issues.apache.org/jira/browse/HBASE-7008" target="_top">HBASE-7008 Set scanner caching to a better default</a>
|
||||
where our Lars Hofhansl tries various data sizes w/ Nagle's on and off measuring the effect.</p></div></div><div class="footnotes"><br><hr width="100" align="left"><div class="footnote"><p><sup>[<a id="ftn.d2121e2440" href="#d2121e2440" class="para">14</a>] </sup>What follows is taken from the javadoc at the head of
|
||||
the <code class="classname">org.apache.hadoop.hbase.util.RegionSplitter</code> tool
|
||||
added to HBase post-0.90.0 release.
|
||||
</p></div></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'important_configurations';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="example_config.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="configuration.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="upgrading.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">2.4. Example Configurations </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> Chapter 3. Upgrading</td></tr></table></div></body></html>
|
|
@ -1,23 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>5.11. Joins</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="datamodel.html" title="Chapter 5. Data Model"><link rel="prev" href="dm.column.metadata.html" title="5.10. Column Metadata"><link rel="next" href="schema.html" title="Chapter 6. HBase and Schema Design"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">5.11. Joins</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="dm.column.metadata.html">Prev</a> </td><th width="60%" align="center">Chapter 5. Data Model</th><td width="20%" align="right"> <a accesskey="n" href="schema.html">Next</a></td></tr></table><hr></div><div class="section" title="5.11. Joins"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="joins"></a>5.11. Joins</h2></div></div></div><p>Whether HBase supports joins is a common question on the dist-list, and there is a simple answer: it doesn't,
|
||||
at not least in the way that RDBMS' support them (e.g., with equi-joins or outer-joins in SQL). As has been illustrated
|
||||
in this chapter, the read data model operations in HBase are Get and Scan.
|
||||
</p><p>However, that doesn't mean that equivalent join functionality can't be supported in your application, but
|
||||
you have to do it yourself. The two primary strategies are either denormalizing the data upon writing to HBase,
|
||||
or to have lookup tables and do the join between HBase tables in your application or MapReduce code (and as RDBMS'
|
||||
demonstrate, there are several strategies for this depending on the size of the tables, e.g., nested loops vs.
|
||||
hash-joins). So which is the best approach? It depends on what you are trying to do, and as such there isn't a single
|
||||
answer that works for every use case.
|
||||
</p></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'joins';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="dm.column.metadata.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="datamodel.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="schema.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">5.10. Column Metadata </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> Chapter 6. HBase and Schema Design</td></tr></table></div></body></html>
|
|
@ -1,39 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>11.3. Java</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="performance.html" title="Chapter 11. Apache HBase (TM) Performance Tuning"><link rel="prev" href="perf.network.html" title="11.2. Network"><link rel="next" href="perf.configurations.html" title="11.4. HBase Configurations"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">11.3. Java</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="perf.network.html">Prev</a> </td><th width="60%" align="center">Chapter 11. Apache HBase (TM) Performance Tuning</th><td width="20%" align="right"> <a accesskey="n" href="perf.configurations.html">Next</a></td></tr></table><hr></div><div class="section" title="11.3. Java"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="jvm"></a>11.3. Java</h2></div></div></div><div class="section" title="11.3.1. The Garbage Collector and Apache HBase"><div class="titlepage"><div><div><h3 class="title"><a name="gc"></a>11.3.1. The Garbage Collector and Apache HBase</h3></div></div></div><div class="section" title="11.3.1.1. Long GC pauses"><div class="titlepage"><div><div><h4 class="title"><a name="gcpause"></a>11.3.1.1. Long GC pauses</h4></div></div></div><p><a name="mslab"></a>In his presentation, <a class="link" href="http://www.slideshare.net/cloudera/hbase-hug-presentation" target="_top">Avoiding
|
||||
Full GCs with MemStore-Local Allocation Buffers</a>, Todd Lipcon
|
||||
describes two cases of stop-the-world garbage collections common in
|
||||
HBase, especially during loading; CMS failure modes and old generation
|
||||
heap fragmentation brought. To address the first, start the CMS
|
||||
earlier than default by adding
|
||||
<code class="code">-XX:CMSInitiatingOccupancyFraction</code> and setting it down
|
||||
from defaults. Start at 60 or 70 percent (The lower you bring down the
|
||||
threshold, the more GCing is done, the more CPU used). To address the
|
||||
second fragmentation issue, Todd added an experimental facility,
|
||||
<a class="indexterm" name="d2121e6672"></a>, that
|
||||
must be explicitly enabled in Apache HBase 0.90.x (Its defaulted to be on in
|
||||
Apache 0.92.x HBase). See <code class="code">hbase.hregion.memstore.mslab.enabled</code>
|
||||
to true in your <code class="classname">Configuration</code>. See the cited
|
||||
slides for background and detail<sup>[<a name="d2121e6682" href="#ftn.d2121e6682" class="footnote">27</a>]</sup>.
|
||||
Be aware that when enabled, each MemStore instance will occupy at least
|
||||
an MSLAB instance of memory. If you have thousands of regions or lots
|
||||
of regions each with many column families, this allocation of MSLAB
|
||||
may be responsible for a good portion of your heap allocation and in
|
||||
an extreme case cause you to OOME. Disable MSLAB in this case, or
|
||||
lower the amount of memory it uses or float less regions per server.
|
||||
</p><p>For more information about GC logs, see <a class="xref" href="trouble.log.html#trouble.log.gc" title="12.2.3. JVM Garbage Collection Logs">Section 12.2.3, “JVM Garbage Collection Logs”</a>.
|
||||
</p></div></div><div class="footnotes"><br><hr width="100" align="left"><div class="footnote"><p><sup>[<a id="ftn.d2121e6682" href="#d2121e6682" class="para">27</a>] </sup>The latest jvms do better
|
||||
regards fragmentation so make sure you are running a recent release.
|
||||
Read down in the message,
|
||||
<a class="link" href="http://osdir.com/ml/hotspot-gc-use/2011-11/msg00002.html" target="_top">Identifying concurrent mode failures caused by fragmentation</a>.</p></div></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'jvm';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="perf.network.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="performance.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="perf.configurations.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">11.2. Network </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> 11.4. HBase Configurations</td></tr></table></div></body></html>
|
|
@ -1,38 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>C.3. LZO</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="compression.html" title="Appendix C. Compression In HBase"><link rel="prev" href="hbase.regionserver.codecs.html" title="C.2. hbase.regionserver.codecs"><link rel="next" href="gzip.compression.html" title="C.4. GZIP"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">C.3.
|
||||
LZO
|
||||
</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="hbase.regionserver.codecs.html">Prev</a> </td><th width="60%" align="center">Appendix C. Compression In HBase</th><td width="20%" align="right"> <a accesskey="n" href="gzip.compression.html">Next</a></td></tr></table><hr></div><div class="section" title="C.3. LZO"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="lzo.compression"></a>C.3.
|
||||
LZO
|
||||
</h2></div></div></div><p>Unfortunately, HBase cannot ship with LZO because of
|
||||
the licensing issues; HBase is Apache-licensed, LZO is GPL.
|
||||
Therefore LZO install is to be done post-HBase install.
|
||||
See the <a class="link" href="http://wiki.apache.org/hadoop/UsingLzoCompression" target="_top">Using LZO Compression</a>
|
||||
wiki page for how to make LZO work with HBase.
|
||||
</p><p>A common problem users run into when using LZO is that while initial
|
||||
setup of the cluster runs smooth, a month goes by and some sysadmin goes to
|
||||
add a machine to the cluster only they'll have forgotten to do the LZO
|
||||
fixup on the new machine. In versions since HBase 0.90.0, we should
|
||||
fail in a way that makes it plain what the problem is, but maybe not. </p><p>See <a class="xref" href="hbase.regionserver.codecs.html" title="C.2. hbase.regionserver.codecs">Section C.2, “
|
||||
<code class="varname">
|
||||
hbase.regionserver.codecs
|
||||
</code>
|
||||
”</a>
|
||||
for a feature to help protect against failed LZO install.</p></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'lzo.compression';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="hbase.regionserver.codecs.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="compression.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="gzip.compression.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">C.2.
|
||||
<code class="varname">
|
||||
hbase.regionserver.codecs
|
||||
</code>
|
||||
</td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> C.4.
|
||||
GZIP
|
||||
</td></tr></table></div></body></html>
|
|
@ -1,251 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>7.2. HBase MapReduce Examples</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="mapreduce.html" title="Chapter 7. HBase and MapReduce"><link rel="prev" href="mapreduce.html" title="Chapter 7. HBase and MapReduce"><link rel="next" href="mapreduce.htable.access.html" title="7.3. Accessing Other HBase Tables in a MapReduce Job"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">7.2. HBase MapReduce Examples</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="mapreduce.html">Prev</a> </td><th width="60%" align="center">Chapter 7. HBase and MapReduce</th><td width="20%" align="right"> <a accesskey="n" href="mapreduce.htable.access.html">Next</a></td></tr></table><hr></div><div class="section" title="7.2. HBase MapReduce Examples"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="mapreduce.example"></a>7.2. HBase MapReduce Examples</h2></div></div></div><div class="section" title="7.2.1. HBase MapReduce Read Example"><div class="titlepage"><div><div><h3 class="title"><a name="mapreduce.example.read"></a>7.2.1. HBase MapReduce Read Example</h3></div></div></div><p>The following is an example of using HBase as a MapReduce source in read-only manner. Specifically,
|
||||
there is a Mapper instance but no Reducer, and nothing is being emitted from the Mapper. There job would be defined
|
||||
as follows...
|
||||
</p><pre class="programlisting">
|
||||
Configuration config = HBaseConfiguration.create();
|
||||
Job job = new Job(config, "ExampleRead");
|
||||
job.setJarByClass(MyReadJob.class); // class that contains mapper
|
||||
|
||||
Scan scan = new Scan();
|
||||
scan.setCaching(500); // 1 is the default in Scan, which will be bad for MapReduce jobs
|
||||
scan.setCacheBlocks(false); // don't set to true for MR jobs
|
||||
// set other scan attrs
|
||||
...
|
||||
|
||||
TableMapReduceUtil.initTableMapperJob(
|
||||
tableName, // input HBase table name
|
||||
scan, // Scan instance to control CF and attribute selection
|
||||
MyMapper.class, // mapper
|
||||
null, // mapper output key
|
||||
null, // mapper output value
|
||||
job);
|
||||
job.setOutputFormatClass(NullOutputFormat.class); // because we aren't emitting anything from mapper
|
||||
|
||||
boolean b = job.waitForCompletion(true);
|
||||
if (!b) {
|
||||
throw new IOException("error with job!");
|
||||
}
|
||||
</pre><p>
|
||||
...and the mapper instance would extend <a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/mapreduce/TableMapper.html" target="_top">TableMapper</a>...
|
||||
</p><pre class="programlisting">
|
||||
public static class MyMapper extends TableMapper<Text, Text> {
|
||||
|
||||
public void map(ImmutableBytesWritable row, Result value, Context context) throws InterruptedException, IOException {
|
||||
// process data for the row from the Result instance.
|
||||
}
|
||||
}
|
||||
</pre><p>
|
||||
</p></div><div class="section" title="7.2.2. HBase MapReduce Read/Write Example"><div class="titlepage"><div><div><h3 class="title"><a name="mapreduce.example.readwrite"></a>7.2.2. HBase MapReduce Read/Write Example</h3></div></div></div><p>The following is an example of using HBase both as a source and as a sink with MapReduce.
|
||||
This example will simply copy data from one table to another.
|
||||
</p><pre class="programlisting">
|
||||
Configuration config = HBaseConfiguration.create();
|
||||
Job job = new Job(config,"ExampleReadWrite");
|
||||
job.setJarByClass(MyReadWriteJob.class); // class that contains mapper
|
||||
|
||||
Scan scan = new Scan();
|
||||
scan.setCaching(500); // 1 is the default in Scan, which will be bad for MapReduce jobs
|
||||
scan.setCacheBlocks(false); // don't set to true for MR jobs
|
||||
// set other scan attrs
|
||||
|
||||
TableMapReduceUtil.initTableMapperJob(
|
||||
sourceTable, // input table
|
||||
scan, // Scan instance to control CF and attribute selection
|
||||
MyMapper.class, // mapper class
|
||||
null, // mapper output key
|
||||
null, // mapper output value
|
||||
job);
|
||||
TableMapReduceUtil.initTableReducerJob(
|
||||
targetTable, // output table
|
||||
null, // reducer class
|
||||
job);
|
||||
job.setNumReduceTasks(0);
|
||||
|
||||
boolean b = job.waitForCompletion(true);
|
||||
if (!b) {
|
||||
throw new IOException("error with job!");
|
||||
}
|
||||
</pre><p>
|
||||
An explanation is required of what <code class="classname">TableMapReduceUtil</code> is doing, especially with the reducer.
|
||||
<a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/mapreduce/TableOutputFormat.html" target="_top">TableOutputFormat</a> is being used
|
||||
as the outputFormat class, and several parameters are being set on the config (e.g., TableOutputFormat.OUTPUT_TABLE), as
|
||||
well as setting the reducer output key to <code class="classname">ImmutableBytesWritable</code> and reducer value to <code class="classname">Writable</code>.
|
||||
These could be set by the programmer on the job and conf, but <code class="classname">TableMapReduceUtil</code> tries to make things easier.
|
||||
</p><p>The following is the example mapper, which will create a <code class="classname">Put</code> and matching the input <code class="classname">Result</code>
|
||||
and emit it. Note: this is what the CopyTable utility does.
|
||||
</p><p>
|
||||
</p><pre class="programlisting">
|
||||
public static class MyMapper extends TableMapper<ImmutableBytesWritable, Put> {
|
||||
|
||||
public void map(ImmutableBytesWritable row, Result value, Context context) throws IOException, InterruptedException {
|
||||
// this example is just copying the data from the source table...
|
||||
context.write(row, resultToPut(row,value));
|
||||
}
|
||||
|
||||
private static Put resultToPut(ImmutableBytesWritable key, Result result) throws IOException {
|
||||
Put put = new Put(key.get());
|
||||
for (KeyValue kv : result.raw()) {
|
||||
put.add(kv);
|
||||
}
|
||||
return put;
|
||||
}
|
||||
}
|
||||
</pre><p>
|
||||
</p><p>There isn't actually a reducer step, so <code class="classname">TableOutputFormat</code> takes care of sending the <code class="classname">Put</code>
|
||||
to the target table.
|
||||
</p><p>
|
||||
</p><p>This is just an example, developers could choose not to use <code class="classname">TableOutputFormat</code> and connect to the
|
||||
target table themselves.
|
||||
</p><p>
|
||||
</p></div><div class="section" title="7.2.3. HBase MapReduce Read/Write Example With Multi-Table Output"><div class="titlepage"><div><div><h3 class="title"><a name="mapreduce.example.readwrite.multi"></a>7.2.3. HBase MapReduce Read/Write Example With Multi-Table Output</h3></div></div></div><p>TODO: example for <code class="classname">MultiTableOutputFormat</code>.
|
||||
</p></div><div class="section" title="7.2.4. HBase MapReduce Summary to HBase Example"><div class="titlepage"><div><div><h3 class="title"><a name="mapreduce.example.summary"></a>7.2.4. HBase MapReduce Summary to HBase Example</h3></div></div></div><p>The following example uses HBase as a MapReduce source and sink with a summarization step. This example will
|
||||
count the number of distinct instances of a value in a table and write those summarized counts in another table.
|
||||
</p><pre class="programlisting">
|
||||
Configuration config = HBaseConfiguration.create();
|
||||
Job job = new Job(config,"ExampleSummary");
|
||||
job.setJarByClass(MySummaryJob.class); // class that contains mapper and reducer
|
||||
|
||||
Scan scan = new Scan();
|
||||
scan.setCaching(500); // 1 is the default in Scan, which will be bad for MapReduce jobs
|
||||
scan.setCacheBlocks(false); // don't set to true for MR jobs
|
||||
// set other scan attrs
|
||||
|
||||
TableMapReduceUtil.initTableMapperJob(
|
||||
sourceTable, // input table
|
||||
scan, // Scan instance to control CF and attribute selection
|
||||
MyMapper.class, // mapper class
|
||||
Text.class, // mapper output key
|
||||
IntWritable.class, // mapper output value
|
||||
job);
|
||||
TableMapReduceUtil.initTableReducerJob(
|
||||
targetTable, // output table
|
||||
MyTableReducer.class, // reducer class
|
||||
job);
|
||||
job.setNumReduceTasks(1); // at least one, adjust as required
|
||||
|
||||
boolean b = job.waitForCompletion(true);
|
||||
if (!b) {
|
||||
throw new IOException("error with job!");
|
||||
}
|
||||
</pre><p>
|
||||
In this example mapper a column with a String-value is chosen as the value to summarize upon.
|
||||
This value is used as the key to emit from the mapper, and an <code class="classname">IntWritable</code> represents an instance counter.
|
||||
</p><pre class="programlisting">
|
||||
public static class MyMapper extends TableMapper<Text, IntWritable> {
|
||||
|
||||
private final IntWritable ONE = new IntWritable(1);
|
||||
private Text text = new Text();
|
||||
|
||||
public void map(ImmutableBytesWritable row, Result value, Context context) throws IOException, InterruptedException {
|
||||
String val = new String(value.getValue(Bytes.toBytes("cf"), Bytes.toBytes("attr1")));
|
||||
text.set(val); // we can only emit Writables...
|
||||
|
||||
context.write(text, ONE);
|
||||
}
|
||||
}
|
||||
</pre><p>
|
||||
In the reducer, the "ones" are counted (just like any other MR example that does this), and then emits a <code class="classname">Put</code>.
|
||||
</p><pre class="programlisting">
|
||||
public static class MyTableReducer extends TableReducer<Text, IntWritable, ImmutableBytesWritable> {
|
||||
|
||||
public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
|
||||
int i = 0;
|
||||
for (IntWritable val : values) {
|
||||
i += val.get();
|
||||
}
|
||||
Put put = new Put(Bytes.toBytes(key.toString()));
|
||||
put.add(Bytes.toBytes("cf"), Bytes.toBytes("count"), Bytes.toBytes(i));
|
||||
|
||||
context.write(null, put);
|
||||
}
|
||||
}
|
||||
</pre><p>
|
||||
</p></div><div class="section" title="7.2.5. HBase MapReduce Summary to File Example"><div class="titlepage"><div><div><h3 class="title"><a name="mapreduce.example.summary.file"></a>7.2.5. HBase MapReduce Summary to File Example</h3></div></div></div><p>This very similar to the summary example above, with exception that this is using HBase as a MapReduce source
|
||||
but HDFS as the sink. The differences are in the job setup and in the reducer. The mapper remains the same.
|
||||
</p><pre class="programlisting">
|
||||
Configuration config = HBaseConfiguration.create();
|
||||
Job job = new Job(config,"ExampleSummaryToFile");
|
||||
job.setJarByClass(MySummaryFileJob.class); // class that contains mapper and reducer
|
||||
|
||||
Scan scan = new Scan();
|
||||
scan.setCaching(500); // 1 is the default in Scan, which will be bad for MapReduce jobs
|
||||
scan.setCacheBlocks(false); // don't set to true for MR jobs
|
||||
// set other scan attrs
|
||||
|
||||
TableMapReduceUtil.initTableMapperJob(
|
||||
sourceTable, // input table
|
||||
scan, // Scan instance to control CF and attribute selection
|
||||
MyMapper.class, // mapper class
|
||||
Text.class, // mapper output key
|
||||
IntWritable.class, // mapper output value
|
||||
job);
|
||||
job.setReducerClass(MyReducer.class); // reducer class
|
||||
job.setNumReduceTasks(1); // at least one, adjust as required
|
||||
FileOutputFormat.setOutputPath(job, new Path("/tmp/mr/mySummaryFile")); // adjust directories as required
|
||||
|
||||
boolean b = job.waitForCompletion(true);
|
||||
if (!b) {
|
||||
throw new IOException("error with job!");
|
||||
}
|
||||
</pre>
|
||||
As stated above, the previous Mapper can run unchanged with this example.
|
||||
As for the Reducer, it is a "generic" Reducer instead of extending TableMapper and emitting Puts.
|
||||
<pre class="programlisting">
|
||||
public static class MyReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
|
||||
|
||||
public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
|
||||
int i = 0;
|
||||
for (IntWritable val : values) {
|
||||
i += val.get();
|
||||
}
|
||||
context.write(key, new IntWritable(i));
|
||||
}
|
||||
}
|
||||
</pre></div><div class="section" title="7.2.6. HBase MapReduce Summary to HBase Without Reducer"><div class="titlepage"><div><div><h3 class="title"><a name="mapreduce.example.summary.noreducer"></a>7.2.6. HBase MapReduce Summary to HBase Without Reducer</h3></div></div></div><p>It is also possible to perform summaries without a reducer - if you use HBase as the reducer.
|
||||
</p><p>An HBase target table would need to exist for the job summary. The HTable method <code class="code">incrementColumnValue</code>
|
||||
would be used to atomically increment values. From a performance perspective, it might make sense to keep a Map
|
||||
of values with their values to be incremeneted for each map-task, and make one update per key at during the <code class="code">
|
||||
cleanup</code> method of the mapper. However, your milage may vary depending on the number of rows to be processed and
|
||||
unique keys.
|
||||
</p><p>In the end, the summary results are in HBase.
|
||||
</p></div><div class="section" title="7.2.7. HBase MapReduce Summary to RDBMS"><div class="titlepage"><div><div><h3 class="title"><a name="mapreduce.example.summary.rdbms"></a>7.2.7. HBase MapReduce Summary to RDBMS</h3></div></div></div><p>Sometimes it is more appropriate to generate summaries to an RDBMS. For these cases, it is possible
|
||||
to generate summaries directly to an RDBMS via a custom reducer. The <code class="code">setup</code> method
|
||||
can connect to an RDBMS (the connection information can be passed via custom parameters in the context) and the
|
||||
cleanup method can close the connection.
|
||||
</p><p>It is critical to understand that number of reducers for the job affects the summarization implementation, and
|
||||
you'll have to design this into your reducer. Specifically, whether it is designed to run as a singleton (one reducer)
|
||||
or multiple reducers. Neither is right or wrong, it depends on your use-case. Recognize that the more reducers that
|
||||
are assigned to the job, the more simultaneous connections to the RDBMS will be created - this will scale, but only to a point.
|
||||
</p><pre class="programlisting">
|
||||
public static class MyRdbmsReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
|
||||
|
||||
private Connection c = null;
|
||||
|
||||
public void setup(Context context) {
|
||||
// create DB connection...
|
||||
}
|
||||
|
||||
public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
|
||||
// do summarization
|
||||
// in this example the keys are Text, but this is just an example
|
||||
}
|
||||
|
||||
public void cleanup(Context context) {
|
||||
// close db connection
|
||||
}
|
||||
|
||||
}
|
||||
</pre><p>In the end, the summary results are written to your RDBMS table/s.
|
||||
</p></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'mapreduce.example';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="mapreduce.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="mapreduce.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="mapreduce.htable.access.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">Chapter 7. HBase and MapReduce </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> 7.3. Accessing Other HBase Tables in a MapReduce Job</td></tr></table></div></body></html>
|
|
@ -1,31 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>7.3. Accessing Other HBase Tables in a MapReduce Job</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="mapreduce.html" title="Chapter 7. HBase and MapReduce"><link rel="prev" href="mapreduce.example.html" title="7.2. HBase MapReduce Examples"><link rel="next" href="mapreduce.specex.html" title="7.4. Speculative Execution"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">7.3. Accessing Other HBase Tables in a MapReduce Job</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="mapreduce.example.html">Prev</a> </td><th width="60%" align="center">Chapter 7. HBase and MapReduce</th><td width="20%" align="right"> <a accesskey="n" href="mapreduce.specex.html">Next</a></td></tr></table><hr></div><div class="section" title="7.3. Accessing Other HBase Tables in a MapReduce Job"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="mapreduce.htable.access"></a>7.3. Accessing Other HBase Tables in a MapReduce Job</h2></div></div></div><p>Although the framework currently allows one HBase table as input to a
|
||||
MapReduce job, other HBase tables can
|
||||
be accessed as lookup tables, etc., in a
|
||||
MapReduce job via creating an HTable instance in the setup method of the Mapper.
|
||||
</p><pre class="programlisting">public class MyMapper extends TableMapper<Text, LongWritable> {
|
||||
private HTable myOtherTable;
|
||||
|
||||
public void setup(Context context) {
|
||||
myOtherTable = new HTable("myOtherTable");
|
||||
}
|
||||
|
||||
public void map(ImmutableBytesWritable row, Result value, Context context) throws IOException, InterruptedException {
|
||||
// process Result...
|
||||
// use 'myOtherTable' for lookups
|
||||
}
|
||||
|
||||
</pre><p>
|
||||
</p></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'mapreduce.htable.access';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="mapreduce.example.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="mapreduce.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="mapreduce.specex.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">7.2. HBase MapReduce Examples </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> 7.4. Speculative Execution</td></tr></table></div></body></html>
|
|
@ -1,24 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>Chapter 7. HBase and MapReduce</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="prev" href="constraints.html" title="6.12. Constraints"><link rel="next" href="mapreduce.example.html" title="7.2. HBase MapReduce Examples"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">Chapter 7. HBase and MapReduce</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="constraints.html">Prev</a> </td><th width="60%" align="center"> </th><td width="20%" align="right"> <a accesskey="n" href="mapreduce.example.html">Next</a></td></tr></table><hr></div><div class="chapter" title="Chapter 7. HBase and MapReduce"><div class="titlepage"><div><div><h2 class="title"><a name="mapreduce"></a>Chapter 7. HBase and MapReduce</h2></div></div></div><div class="toc"><p><b>Table of Contents</b></p><dl><dt><span class="section"><a href="mapreduce.html#splitter">7.1. Map-Task Spitting</a></span></dt><dd><dl><dt><span class="section"><a href="mapreduce.html#splitter.default">7.1.1. The Default HBase MapReduce Splitter</a></span></dt><dt><span class="section"><a href="mapreduce.html#splitter.custom">7.1.2. Custom Splitters</a></span></dt></dl></dd><dt><span class="section"><a href="mapreduce.example.html">7.2. HBase MapReduce Examples</a></span></dt><dd><dl><dt><span class="section"><a href="mapreduce.example.html#mapreduce.example.read">7.2.1. HBase MapReduce Read Example</a></span></dt><dt><span class="section"><a href="mapreduce.example.html#mapreduce.example.readwrite">7.2.2. HBase MapReduce Read/Write Example</a></span></dt><dt><span class="section"><a href="mapreduce.example.html#mapreduce.example.readwrite.multi">7.2.3. HBase MapReduce Read/Write Example With Multi-Table Output</a></span></dt><dt><span class="section"><a href="mapreduce.example.html#mapreduce.example.summary">7.2.4. HBase MapReduce Summary to HBase Example</a></span></dt><dt><span class="section"><a href="mapreduce.example.html#mapreduce.example.summary.file">7.2.5. HBase MapReduce Summary to File Example</a></span></dt><dt><span class="section"><a href="mapreduce.example.html#mapreduce.example.summary.noreducer">7.2.6. HBase MapReduce Summary to HBase Without Reducer</a></span></dt><dt><span class="section"><a href="mapreduce.example.html#mapreduce.example.summary.rdbms">7.2.7. HBase MapReduce Summary to RDBMS</a></span></dt></dl></dd><dt><span class="section"><a href="mapreduce.htable.access.html">7.3. Accessing Other HBase Tables in a MapReduce Job</a></span></dt><dt><span class="section"><a href="mapreduce.specex.html">7.4. Speculative Execution</a></span></dt></dl></div><p>See <a class="link" href="http://hbase.org/apidocs/org/apache/hadoop/hbase/mapreduce/package-summary.html#package_description" target="_top">
|
||||
HBase and MapReduce</a> up in javadocs.
|
||||
Start there. Below is some additional help.</p><p>For more information about MapReduce (i.e., the framework in general), see the
|
||||
<a class="link" href="http://hadoop.apache.org/common/docs/current/mapred_tutorial.html" target="_top">Hadoop MapReduce Tutorial</a>.</p><div class="section" title="7.1. Map-Task Spitting"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="splitter"></a>7.1. Map-Task Spitting</h2></div></div></div><div class="section" title="7.1.1. The Default HBase MapReduce Splitter"><div class="titlepage"><div><div><h3 class="title"><a name="splitter.default"></a>7.1.1. The Default HBase MapReduce Splitter</h3></div></div></div><p>When <a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/mapreduce/TableInputFormat.html" target="_top">TableInputFormat</a>
|
||||
is used to source an HBase table in a MapReduce job,
|
||||
its splitter will make a map task for each region of the table.
|
||||
Thus, if there are 100 regions in the table, there will be
|
||||
100 map-tasks for the job - regardless of how many column families are selected in the Scan.</p></div><div class="section" title="7.1.2. Custom Splitters"><div class="titlepage"><div><div><h3 class="title"><a name="splitter.custom"></a>7.1.2. Custom Splitters</h3></div></div></div><p>For those interested in implementing custom splitters, see the method <code class="code">getSplits</code> in
|
||||
<a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.html" target="_top">TableInputFormatBase</a>.
|
||||
That is where the logic for map-task assignment resides.
|
||||
</p></div></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'mapreduce';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="constraints.html">Prev</a> </td><td width="20%" align="center"> </td><td width="40%" align="right"> <a accesskey="n" href="mapreduce.example.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">6.12. Constraints </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> 7.2. HBase MapReduce Examples</td></tr></table></div></body></html>
|
|
@ -1,21 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>7.4. Speculative Execution</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="mapreduce.html" title="Chapter 7. HBase and MapReduce"><link rel="prev" href="mapreduce.htable.access.html" title="7.3. Accessing Other HBase Tables in a MapReduce Job"><link rel="next" href="security.html" title="Chapter 8. Secure Apache HBase (TM)"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">7.4. Speculative Execution</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="mapreduce.htable.access.html">Prev</a> </td><th width="60%" align="center">Chapter 7. HBase and MapReduce</th><td width="20%" align="right"> <a accesskey="n" href="security.html">Next</a></td></tr></table><hr></div><div class="section" title="7.4. Speculative Execution"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="mapreduce.specex"></a>7.4. Speculative Execution</h2></div></div></div><p>It is generally advisable to turn off speculative execution for
|
||||
MapReduce jobs that use HBase as a source. This can either be done on a
|
||||
per-Job basis through properties, on on the entire cluster. Especially
|
||||
for longer running jobs, speculative execution will create duplicate
|
||||
map-tasks which will double-write your data to HBase; this is probably
|
||||
not what you want.
|
||||
</p><p>See <a class="xref" href="important_configurations.html#spec.ex" title="2.5.2.9. Speculative Execution">Section 2.5.2.9, “Speculative Execution”</a> for more information.
|
||||
</p></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'mapreduce.specex';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="mapreduce.htable.access.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="mapreduce.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="security.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">7.3. Accessing Other HBase Tables in a MapReduce Job </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> Chapter 8. Secure Apache HBase (TM)</td></tr></table></div></body></html>
|
|
@ -1,37 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>9.5. Master</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="architecture.html" title="Chapter 9. Architecture"><link rel="prev" href="client.filter.html" title="9.4. Client Request Filters"><link rel="next" href="regionserver.arch.html" title="9.6. RegionServer"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">9.5. Master</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="client.filter.html">Prev</a> </td><th width="60%" align="center">Chapter 9. Architecture</th><td width="20%" align="right"> <a accesskey="n" href="regionserver.arch.html">Next</a></td></tr></table><hr></div><div class="section" title="9.5. Master"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="master"></a>9.5. Master</h2></div></div></div><p><code class="code">HMaster</code> is the implementation of the Master Server. The Master server
|
||||
is responsible for monitoring all RegionServer instances in the cluster, and is
|
||||
the interface for all metadata changes. In a distributed cluster, the Master typically runs on the <a class="xref" href="arch.hdfs.html#arch.hdfs.nn" title="9.9.1. NameNode">Section 9.9.1, “NameNode”</a><sup>[<a name="d2121e4941" href="#ftn.d2121e4941" class="footnote">24</a>]</sup>
|
||||
</p><div class="section" title="9.5.1. Startup Behavior"><div class="titlepage"><div><div><h3 class="title"><a name="master.startup"></a>9.5.1. Startup Behavior</h3></div></div></div><p>If run in a multi-Master environment, all Masters compete to run the cluster. If the active
|
||||
Master loses its lease in ZooKeeper (or the Master shuts down), then then the remaining Masters jostle to
|
||||
take over the Master role.
|
||||
</p></div><div class="section" title="9.5.2. Runtime Impact"><div class="titlepage"><div><div><h3 class="title"><a name="master.runtime"></a>9.5.2. Runtime Impact</h3></div></div></div><p>A common dist-list question is what happens to an HBase cluster when the Master goes down. Because the
|
||||
HBase client talks directly to the RegionServers, the cluster can still function in a "steady
|
||||
state." Additionally, per <a class="xref" href="arch.catalog.html" title="9.2. Catalog Tables">Section 9.2, “Catalog Tables”</a> ROOT and META exist as HBase tables (i.e., are
|
||||
not resident in the Master). However, the Master controls critical functions such as RegionServer failover and
|
||||
completing region splits. So while the cluster can still run <span class="emphasis"><em>for a time</em></span> without the Master,
|
||||
the Master should be restarted as soon as possible.
|
||||
</p></div><div class="section" title="9.5.3. Interface"><div class="titlepage"><div><div><h3 class="title"><a name="master.api"></a>9.5.3. Interface</h3></div></div></div><p>The methods exposed by <code class="code">HMasterInterface</code> are primarily metadata-oriented methods:
|
||||
</p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem">Table (createTable, modifyTable, removeTable, enable, disable)
|
||||
</li><li class="listitem">ColumnFamily (addColumn, modifyColumn, removeColumn)
|
||||
</li><li class="listitem">Region (move, assign, unassign)
|
||||
</li></ul></div><p>
|
||||
For example, when the <code class="code">HBaseAdmin</code> method <code class="code">disableTable</code> is invoked, it is serviced by the Master server.
|
||||
</p></div><div class="section" title="9.5.4. Processes"><div class="titlepage"><div><div><h3 class="title"><a name="master.processes"></a>9.5.4. Processes</h3></div></div></div><p>The Master runs several background threads:
|
||||
</p><div class="section" title="9.5.4.1. LoadBalancer"><div class="titlepage"><div><div><h4 class="title"><a name="master.processes.loadbalancer"></a>9.5.4.1. LoadBalancer</h4></div></div></div><p>Periodically, and when there are no regions in transition,
|
||||
a load balancer will run and move regions around to balance the cluster's load.
|
||||
See <a class="xref" href="important_configurations.html#balancer_config" title="2.5.3.1. Balancer">Section 2.5.3.1, “Balancer”</a> for configuring this property.</p><p>See <a class="xref" href="regions.arch.html#regions.arch.assignment" title="9.7.2. Region-RegionServer Assignment">Section 9.7.2, “Region-RegionServer Assignment”</a> for more information on region assignment.
|
||||
</p></div><div class="section" title="9.5.4.2. CatalogJanitor"><div class="titlepage"><div><div><h4 class="title"><a name="master.processes.catalog"></a>9.5.4.2. CatalogJanitor</h4></div></div></div><p>Periodically checks and cleans up the .META. table. See <a class="xref" href="arch.catalog.html#arch.catalog.meta" title="9.2.2. META">Section 9.2.2, “META”</a> for more information on META.</p></div></div><div class="footnotes"><br><hr width="100" align="left"><div class="footnote"><p><sup>[<a id="ftn.d2121e4941" href="#d2121e4941" class="para">24</a>] </sup>J Mohamed Zahoor goes into some more detail on the Master Architecture in this blog posting, <a class="link" href="http://blog.zahoor.in/2012/08/hbase-hmaster-architecture/" target="_top">HBase HMaster Architecture
|
||||
</a>.</p></div></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'master';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="client.filter.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="architecture.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="regionserver.arch.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">9.4. Client Request Filters </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> 9.6. RegionServer</td></tr></table></div></body></html>
|
|
@ -1,27 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>15.7. Maven Build Commands</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="developer.html" title="Chapter 15. Building and Developing Apache HBase (TM)"><link rel="prev" href="hbase.tests.html" title="15.6. Tests"><link rel="next" href="getting.involved.html" title="15.8. Getting Involved"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">15.7. Maven Build Commands</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="hbase.tests.html">Prev</a> </td><th width="60%" align="center">Chapter 15. Building and Developing Apache HBase (TM)</th><td width="20%" align="right"> <a accesskey="n" href="getting.involved.html">Next</a></td></tr></table><hr></div><div class="section" title="15.7. Maven Build Commands"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="maven.build.commands"></a>15.7. Maven Build Commands</h2></div></div></div><p>All commands executed from the local HBase project directory.
|
||||
</p><p>Note: use Maven 3 (Maven 2 may work but we suggest you use Maven 3).
|
||||
</p><div class="section" title="15.7.1. Compile"><div class="titlepage"><div><div><h3 class="title"><a name="maven.build.commands.compile"></a>15.7.1. Compile</h3></div></div></div><pre class="programlisting">
|
||||
mvn compile
|
||||
</pre></div><div class="section" title="15.7.2. Running all or individual Unit Tests"><div class="titlepage"><div><div><h3 class="title"><a name="maven.build.commands.unitall"></a>15.7.2. Running all or individual Unit Tests</h3></div></div></div><p>See the <a class="xref" href="hbase.tests.html#hbase.unittests.cmds" title="15.6.3. Running tests">Section 15.6.3, “Running tests”</a> section
|
||||
above in <a class="xref" href="hbase.tests.html#hbase.unittests" title="15.6.2. Unit Tests">Section 15.6.2, “Unit Tests”</a></p></div><div class="section" title="15.7.3. Building against various hadoop versions."><div class="titlepage"><div><div><h3 class="title"><a name="maven.build.hadoop"></a>15.7.3. Building against various hadoop versions.</h3></div></div></div><p>As of 0.96, Apache HBase supports building against Apache Hadoop versions: 1.0.3, 2.0.0-alpha and 3.0.0-SNAPSHOT.
|
||||
By default, we will build with Hadoop-1.0.3. To change the version to run with Hadoop-2.0.0-alpha, you would run:</p><pre class="programlisting">mvn -Dhadoop.profile=2.0 ...</pre><p>
|
||||
That is, designate build with hadoop.profile 2.0. Pass 2.0 for hadoop.profile to build against hadoop 2.0.
|
||||
Tests may not all pass as of this writing so you may need to pass <code class="code">-DskipTests</code> unless you are inclined
|
||||
to fix the failing tests.</p><p>
|
||||
Similarly, for 3.0, you would just replace the profile value. Note that Hadoop-3.0.0-SNAPSHOT does not currently have a deployed maven artificat - you will need to build and install your own in your local maven repository if you want to run against this profile.
|
||||
</p><p>
|
||||
In earilier verions of Apache HBase, you can build against older versions of Apache Hadoop, notably, Hadoop 0.22.x and 0.23.x.
|
||||
If you are running, for example HBase-0.94 and wanted to build against Hadoop 0.23.x, you would run with:</p><pre class="programlisting">mvn -Dhadoop.profile=22 ...</pre></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'maven.build.commands';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="hbase.tests.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="developer.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="getting.involved.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">15.6. Tests </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> 15.8. Getting Involved</td></tr></table></div></body></html>
|
|
@ -1,131 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>15.4. Adding an Apache HBase release to Apache's Maven Repository</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="developer.html" title="Chapter 15. Building and Developing Apache HBase (TM)"><link rel="prev" href="build.html" title="15.3. Building Apache HBase"><link rel="next" href="hbase.org.html" title="15.5. Updating hbase.apache.org"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">15.4. Adding an Apache HBase release to Apache's Maven Repository</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="build.html">Prev</a> </td><th width="60%" align="center">Chapter 15. Building and Developing Apache HBase (TM)</th><td width="20%" align="right"> <a accesskey="n" href="hbase.org.html">Next</a></td></tr></table><hr></div><div class="section" title="15.4. Adding an Apache HBase release to Apache's Maven Repository"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="mvn_repo"></a>15.4. Adding an Apache HBase release to Apache's Maven Repository</h2></div></div></div><p>Follow the instructions at
|
||||
<a class="link" href="http://www.apache.org/dev/publishing-maven-artifacts.html" target="_top">Publishing Maven Artifacts</a> after
|
||||
reading the below miscellaney.
|
||||
</p><p>You must use maven 3.0.x (Check by running <span class="command"><strong>mvn -version</strong></span>).
|
||||
</p><p>Let me list out the commands I used first. The sections that follow dig in more
|
||||
on what is going on. In this example, we are releasing the 0.92.2 jar to the apache
|
||||
maven repository.
|
||||
</p><pre class="programlisting">
|
||||
# First make a copy of the tag we want to release; presumes the release has been tagged already
|
||||
# We do this because we need to make some commits for the mvn release plugin to work.
|
||||
853 svn copy -m "Publishing 0.92.2 to mvn" https://svn.apache.org/repos/asf/hbase/tags/0.92.2 https://svn.apache.org/repos/asf/hbase/tags/0.92.2mvn
|
||||
857 svn checkout https://svn.apache.org/repos/asf/hbase/tags/0.92.2mvn
|
||||
858 cd 0.92.2mvn/
|
||||
# Edit the version making it release version with a '-SNAPSHOT' suffix (See below for more on this)
|
||||
860 vi pom.xml
|
||||
861 svn commit -m "Add SNAPSHOT to the version" pom.xml
|
||||
862 ~/bin/mvn/bin/mvn release:clean
|
||||
865 ~/bin/mvn/bin/mvn release:prepare
|
||||
866 # Answer questions and then ^C to kill the build after the last question. See below for more on this.
|
||||
867 vi release.properties
|
||||
# Change the references to trunk svn to be 0.92.2mvn; the release plugin presumes trunk
|
||||
# Then restart the release:prepare -- it won't ask questions
|
||||
# because the properties file exists.
|
||||
868 ~/bin/mvn/bin/mvn release:prepare
|
||||
# The apache-release profile comes from the apache parent pom and does signing of artifacts published
|
||||
869 ~/bin/mvn/bin/mvn release:perform -Papache-release
|
||||
# When done copying up to apache staging repository,
|
||||
# browse to repository.apache.org, login and finish
|
||||
# the release as according to the above
|
||||
# "Publishing Maven Artifacts.
|
||||
</pre><p>
|
||||
</p><p>Below is more detail on the commmands listed above.</p><p>At the <span class="command"><strong>mvn release:perform</strong></span> step, before starting, if you are for example
|
||||
releasing hbase 0.92.2, you need to make sure the pom.xml version is 0.92.2-SNAPSHOT. This needs
|
||||
to be checked in. Since we do the maven release after actual release, I've been doing this
|
||||
checkin into a copy of the release tag rather than into the actual release tag itself (presumes the release has been properly tagged in svn).
|
||||
So, say we released hbase 0.92.2 and now we want to do the release to the maven repository, in svn, the 0.92.2
|
||||
release will be tagged 0.92.2. Making the maven release, copy the 0.92.2 tag to 0.92.2mvn.
|
||||
Check out this tag and change the version therein and commit.
|
||||
</p><p>
|
||||
Currently, the mvn release wants to go against trunk. I haven't figured how to tell it to do otherwise
|
||||
so I do the below hack. The hack comprises answering the questions put to you by the mvn release plugin properly,
|
||||
then immediately control-C'ing the build after the last question asked as the build release step starts to run.
|
||||
After control-C'ing it, You'll notice a release.properties in your build dir. Review it.
|
||||
Make sure it is using the proper branch -- it tends to use trunk rather than the 0.92.2mvn or whatever
|
||||
that you want it to use -- so hand edit the release.properties file that was put under <code class="varname">${HBASE_HOME}</code>
|
||||
by the <span class="command"><strong>release:perform</strong></span> invocation. When done, resstart the
|
||||
<span class="command"><strong>release:perform</strong></span>.
|
||||
</p><p>Here is how I'd answer the questions at <span class="command"><strong>release:prepare</strong></span> time:
|
||||
</p><pre class="programlisting">What is the release version for "HBase"? (org.apache.hbase:hbase) 0.92.2: :
|
||||
What is SCM release tag or label for "HBase"? (org.apache.hbase:hbase) hbase-0.92.2: : 0.92.2mvn
|
||||
What is the new development version for "HBase"? (org.apache.hbase:hbase) 0.92.3-SNAPSHOT: :
|
||||
[INFO] Transforming 'HBase'...</pre><p>
|
||||
</p><p>When you run <span class="command"><strong>release:perform</strong></span>, pass <span class="command"><strong>-Papache-release</strong></span>
|
||||
else it will not 'sign' the artifacts it uploads.
|
||||
</p><p>A strange issue I ran into was the one where the upload into the apache
|
||||
repository was being sprayed across multiple apache machines making it so I could
|
||||
not release. See <a class="link" href="https://issues.apache.org/jira/browse/INFRA-4482" target="_top">INFRA-4482 Why is my upload to mvn spread across multiple repositories?</a>.</p><p><a name="mvn.settings.file"></a>Here is my <code class="filename">~/.m2/settings.xml</code>.
|
||||
This is read by the release plugin. The apache-release profile will pick up your
|
||||
gpg key setup from here if you've specified it into the file. The password
|
||||
can be maven encrypted as suggested in the "Publishing Maven Artifacts" but plain
|
||||
text password works too (just don't let anyone see your local settings.xml).
|
||||
</p><pre class="programlisting"><settings xmlns="http://maven.apache.org/SETTINGS/1.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/SETTINGS/1.0.0
|
||||
http://maven.apache.org/xsd/settings-1.0.0.xsd">
|
||||
<servers>
|
||||
<!- To publish a snapshot of some part of Maven -->
|
||||
<server>
|
||||
<id>apache.snapshots.https</id>
|
||||
<username>YOUR_APACHE_ID
|
||||
</username>
|
||||
<password>YOUR_APACHE_PASSWORD
|
||||
</password>
|
||||
</server>
|
||||
<!-- To publish a website using Maven -->
|
||||
<!-- To stage a release of some part of Maven -->
|
||||
<server>
|
||||
<id>apache.releases.https</id>
|
||||
<username>YOUR_APACHE_ID
|
||||
</username>
|
||||
<password>YOUR_APACHE_PASSWORD
|
||||
</password>
|
||||
</server>
|
||||
</servers>
|
||||
<profiles>
|
||||
<profile>
|
||||
<id>apache-release</id>
|
||||
<properties>
|
||||
<gpg.keyname>YOUR_KEYNAME</gpg.keyname>
|
||||
<!--Keyname is something like this ... 00A5F21E... do gpg --list-keys to find it-->
|
||||
<gpg.passphrase>YOUR_KEY_PASSWORD
|
||||
</gpg.passphrase>
|
||||
</properties>
|
||||
</profile>
|
||||
</profiles>
|
||||
</settings>
|
||||
</pre><p>
|
||||
</p><p>If you see run into the below, its because you need to edit version in the pom.xml and add
|
||||
<code class="code">-SNAPSHOT</code> to the version (and commit).
|
||||
</p><pre class="programlisting">[INFO] Scanning for projects...
|
||||
[INFO] Searching repository for plugin with prefix: 'release'.
|
||||
[INFO] ------------------------------------------------------------------------
|
||||
[INFO] Building HBase
|
||||
[INFO] task-segment: [release:prepare] (aggregator-style)
|
||||
[INFO] ------------------------------------------------------------------------
|
||||
[INFO] [release:prepare {execution: default-cli}]
|
||||
[INFO] ------------------------------------------------------------------------
|
||||
[ERROR] BUILD FAILURE
|
||||
[INFO] ------------------------------------------------------------------------
|
||||
[INFO] You don't have a SNAPSHOT project in the reactor projects list.
|
||||
[INFO] ------------------------------------------------------------------------
|
||||
[INFO] For more information, run Maven with the -e switch
|
||||
[INFO] ------------------------------------------------------------------------
|
||||
[INFO] Total time: 3 seconds
|
||||
[INFO] Finished at: Sat Mar 26 18:11:07 PDT 2011
|
||||
[INFO] Final Memory: 35M/423M
|
||||
[INFO] -----------------------------------------------------------------------</pre><p>
|
||||
</p></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'mvn_repo';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="build.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="developer.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="hbase.org.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">15.3. Building Apache HBase </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> 15.5. Updating hbase.apache.org</td></tr></table></div></body></html>
|
|
@ -1,122 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>14.3. Node Management</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="ops_mgt.html" title="Chapter 14. Apache HBase (TM) Operational Management"><link rel="prev" href="ops.regionmgt.html" title="14.2. Region Management"><link rel="next" href="hbase_metrics.html" title="14.4. HBase Metrics"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">14.3. Node Management</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="ops.regionmgt.html">Prev</a> </td><th width="60%" align="center">Chapter 14. Apache HBase (TM) Operational Management</th><td width="20%" align="right"> <a accesskey="n" href="hbase_metrics.html">Next</a></td></tr></table><hr></div><div class="section" title="14.3. Node Management"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="node.management"></a>14.3. Node Management</h2></div></div></div><div class="section" title="14.3.1. Node Decommission"><div class="titlepage"><div><div><h3 class="title"><a name="decommission"></a>14.3.1. Node Decommission</h3></div></div></div><p>You can stop an individual RegionServer by running the following
|
||||
script in the HBase directory on the particular node:
|
||||
</p><pre class="programlisting">$ ./bin/hbase-daemon.sh stop regionserver</pre><p>
|
||||
The RegionServer will first close all regions and then shut itself down.
|
||||
On shutdown, the RegionServer's ephemeral node in ZooKeeper will expire.
|
||||
The master will notice the RegionServer gone and will treat it as
|
||||
a 'crashed' server; it will reassign the nodes the RegionServer was carrying.
|
||||
</p><div class="note" title="Disable the Load Balancer before Decommissioning a node" style="margin-left: 0.5in; margin-right: 0.5in;"><h3 class="title">Disable the Load Balancer before Decommissioning a node</h3><p>If the load balancer runs while a node is shutting down, then
|
||||
there could be contention between the Load Balancer and the
|
||||
Master's recovery of the just decommissioned RegionServer.
|
||||
Avoid any problems by disabling the balancer first.
|
||||
See <a class="xref" href="node.management.html#lb" title="Load Balancer">Load Balancer</a> below.
|
||||
</p></div><p>
|
||||
</p><p>
|
||||
A downside to the above stop of a RegionServer is that regions could be offline for
|
||||
a good period of time. Regions are closed in order. If many regions on the server, the
|
||||
first region to close may not be back online until all regions close and after the master
|
||||
notices the RegionServer's znode gone. In Apache HBase 0.90.2, we added facility for having
|
||||
a node gradually shed its load and then shutdown itself down. Apache HBase 0.90.2 added the
|
||||
<code class="filename">graceful_stop.sh</code> script. Here is its usage:
|
||||
</p><pre class="programlisting">$ ./bin/graceful_stop.sh
|
||||
Usage: graceful_stop.sh [--config &conf-dir>] [--restart] [--reload] [--thrift] [--rest] &hostname>
|
||||
thrift If we should stop/start thrift before/after the hbase stop/start
|
||||
rest If we should stop/start rest before/after the hbase stop/start
|
||||
restart If we should restart after graceful stop
|
||||
reload Move offloaded regions back on to the stopped server
|
||||
debug Move offloaded regions back on to the stopped server
|
||||
hostname Hostname of server we are to stop</pre><p>
|
||||
</p><p>
|
||||
To decommission a loaded RegionServer, run the following:
|
||||
</p><pre class="programlisting">$ ./bin/graceful_stop.sh HOSTNAME</pre><p>
|
||||
where <code class="varname">HOSTNAME</code> is the host carrying the RegionServer
|
||||
you would decommission.
|
||||
</p><div class="note" title="On HOSTNAME" style="margin-left: 0.5in; margin-right: 0.5in;"><h3 class="title">On <code class="varname">HOSTNAME</code></h3><p>The <code class="varname">HOSTNAME</code> passed to <code class="filename">graceful_stop.sh</code>
|
||||
must match the hostname that hbase is using to identify RegionServers.
|
||||
Check the list of RegionServers in the master UI for how HBase is
|
||||
referring to servers. Its usually hostname but can also be FQDN.
|
||||
Whatever HBase is using, this is what you should pass the
|
||||
<code class="filename">graceful_stop.sh</code> decommission
|
||||
script. If you pass IPs, the script is not yet smart enough to make
|
||||
a hostname (or FQDN) of it and so it will fail when it checks if server is
|
||||
currently running; the graceful unloading of regions will not run.
|
||||
</p></div><p> The <code class="filename">graceful_stop.sh</code> script will move the regions off the
|
||||
decommissioned RegionServer one at a time to minimize region churn.
|
||||
It will verify the region deployed in the new location before it
|
||||
will moves the next region and so on until the decommissioned server
|
||||
is carrying zero regions. At this point, the <code class="filename">graceful_stop.sh</code>
|
||||
tells the RegionServer <span class="command"><strong>stop</strong></span>. The master will at this point notice the
|
||||
RegionServer gone but all regions will have already been redeployed
|
||||
and because the RegionServer went down cleanly, there will be no
|
||||
WAL logs to split.
|
||||
</p><div class="note" title="Load Balancer" style="margin-left: 0.5in; margin-right: 0.5in;"><h3 class="title"><a name="lb"></a>Load Balancer</h3><p>
|
||||
It is assumed that the Region Load Balancer is disabled while the
|
||||
<span class="command"><strong>graceful_stop</strong></span> script runs (otherwise the balancer
|
||||
and the decommission script will end up fighting over region deployments).
|
||||
Use the shell to disable the balancer:
|
||||
</p><pre class="programlisting">hbase(main):001:0> balance_switch false
|
||||
true
|
||||
0 row(s) in 0.3590 seconds</pre><p>
|
||||
This turns the balancer OFF. To reenable, do:
|
||||
</p><pre class="programlisting">hbase(main):001:0> balance_switch true
|
||||
false
|
||||
0 row(s) in 0.3590 seconds</pre><p>
|
||||
</p></div><p>
|
||||
</p><div class="section" title="14.3.1.1. Bad or Failing Disk"><div class="titlepage"><div><div><h4 class="title"><a name="bad.disk"></a>14.3.1.1. Bad or Failing Disk</h4></div></div></div><p>It is good having <a class="xref" href="important_configurations.html#dfs.datanode.failed.volumes.tolerated" title="2.5.2.2.1. dfs.datanode.failed.volumes.tolerated">Section 2.5.2.2.1, “dfs.datanode.failed.volumes.tolerated”</a> set if you have a decent number of disks
|
||||
per machine for the case where a disk plain dies. But usually disks do the "John Wayne" -- i.e. take a while
|
||||
to go down spewing errors in <code class="filename">dmesg</code> -- or for some reason, run much slower than their
|
||||
companions. In this case you want to decommission the disk. You have two options. You can
|
||||
<span style="color: red"><xlink>decommission the datanode</xlink></span>
|
||||
or, less disruptive in that only the bad disks data will be rereplicated, can stop the datanode,
|
||||
unmount the bad volume (You can't umount a volume while the datanode is using it), and then restart the
|
||||
datanode (presuming you have set dfs.datanode.failed.volumes.tolerated > 0). The regionserver will
|
||||
throw some errors in its logs as it recalibrates where to get its data from -- it will likely
|
||||
roll its WAL log too -- but in general but for some latency spikes, it should keep on chugging.
|
||||
</p><div class="note" title="Note" style="margin-left: 0.5in; margin-right: 0.5in;"><h3 class="title">Note</h3><p>If you are doing short-circuit reads, you will have to move the regions off the regionserver
|
||||
before you stop the datanode; when short-circuiting reading, though chmod'd so regionserver cannot
|
||||
have access, because it already has the files open, it will be able to keep reading the file blocks
|
||||
from the bad disk even though the datanode is down. Move the regions back after you restart the
|
||||
datanode.</p></div><p>
|
||||
</p></div></div><div class="section" title="14.3.2. Rolling Restart"><div class="titlepage"><div><div><h3 class="title"><a name="rolling"></a>14.3.2. Rolling Restart</h3></div></div></div><p>
|
||||
You can also ask this script to restart a RegionServer after the shutdown
|
||||
AND move its old regions back into place. The latter you might do to
|
||||
retain data locality. A primitive rolling restart might be effected by
|
||||
running something like the following:
|
||||
</p><pre class="programlisting">$ for i in `cat conf/regionservers|sort`; do ./bin/graceful_stop.sh --restart --reload --debug $i; done &> /tmp/log.txt &
|
||||
</pre><p>
|
||||
Tail the output of <code class="filename">/tmp/log.txt</code> to follow the scripts
|
||||
progress. The above does RegionServers only. Be sure to disable the
|
||||
load balancer before doing the above. You'd need to do the master
|
||||
update separately. Do it before you run the above script.
|
||||
Here is a pseudo-script for how you might craft a rolling restart script:
|
||||
</p><div class="orderedlist"><ol class="orderedlist" type="1"><li class="listitem"><p>Untar your release, make sure of its configuration and
|
||||
then rsync it across the cluster. If this is 0.90.2, patch it
|
||||
with HBASE-3744 and HBASE-3756.
|
||||
</p></li><li class="listitem"><p>Run hbck to ensure the cluster consistent
|
||||
</p><pre class="programlisting">$ ./bin/hbase hbck</pre><p>
|
||||
Effect repairs if inconsistent.
|
||||
</p></li><li class="listitem"><p>Restart the Master: </p><pre class="programlisting">$ ./bin/hbase-daemon.sh stop master; ./bin/hbase-daemon.sh start master</pre><p>
|
||||
</p></li><li class="listitem"><p>
|
||||
Disable the region balancer:</p><pre class="programlisting">$ echo "balance_switch false" | ./bin/hbase shell</pre><p>
|
||||
</p></li><li class="listitem"><p>Run the <code class="filename">graceful_stop.sh</code> script per RegionServer. For example:
|
||||
</p><pre class="programlisting">$ for i in `cat conf/regionservers|sort`; do ./bin/graceful_stop.sh --restart --reload --debug $i; done &> /tmp/log.txt &
|
||||
</pre><p>
|
||||
If you are running thrift or rest servers on the RegionServer, pass --thrift or --rest options (See usage
|
||||
for <code class="filename">graceful_stop.sh</code> script).
|
||||
</p></li><li class="listitem"><p>Restart the Master again. This will clear out dead servers list and reenable the balancer.
|
||||
</p></li><li class="listitem"><p>Run hbck to ensure the cluster is consistent.
|
||||
</p></li></ol></div><p>
|
||||
</p></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'node.management';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="ops.regionmgt.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="ops_mgt.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="hbase_metrics.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">14.2. Region Management </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> 14.4. HBase Metrics</td></tr></table></div></body></html>
|
|
@ -1,32 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>6.2. On the number of column families</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="schema.html" title="Chapter 6. HBase and Schema Design"><link rel="prev" href="schema.html" title="Chapter 6. HBase and Schema Design"><link rel="next" href="rowkey.design.html" title="6.3. Rowkey Design"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">6.2.
|
||||
On the number of column families
|
||||
</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="schema.html">Prev</a> </td><th width="60%" align="center">Chapter 6. HBase and Schema Design</th><td width="20%" align="right"> <a accesskey="n" href="rowkey.design.html">Next</a></td></tr></table><hr></div><div class="section" title="6.2. On the number of column families"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="number.of.cfs"></a>6.2.
|
||||
On the number of column families
|
||||
</h2></div></div></div><p>
|
||||
HBase currently does not do well with anything above two or three column families so keep the number
|
||||
of column families in your schema low. Currently, flushing and compactions are done on a per Region basis so
|
||||
if one column family is carrying the bulk of the data bringing on flushes, the adjacent families
|
||||
will also be flushed though the amount of data they carry is small. When many column families the
|
||||
flushing and compaction interaction can make for a bunch of needless i/o loading (To be addressed by
|
||||
changing flushing and compaction to work on a per column family basis). For more information
|
||||
on compactions, see <a class="xref" href="regions.arch.html#compaction" title="9.7.5.5. Compaction">Section 9.7.5.5, “Compaction”</a>.
|
||||
</p><p>Try to make do with one column family if you can in your schemas. Only introduce a
|
||||
second and third column family in the case where data access is usually column scoped;
|
||||
i.e. you query one column family or the other but usually not both at the one time.
|
||||
</p><div class="section" title="6.2.1. Cardinality of ColumnFamilies"><div class="titlepage"><div><div><h3 class="title"><a name="number.of.cfs.card"></a>6.2.1. Cardinality of ColumnFamilies</h3></div></div></div><p>Where multiple ColumnFamilies exist in a single table, be aware of the cardinality (i.e., number of rows).
|
||||
If ColumnFamilyA has 1 million rows and ColumnFamilyB has 1 billion rows, ColumnFamilyA's data will likely be spread
|
||||
across many, many regions (and RegionServers). This makes mass scans for ColumnFamilyA less efficient.
|
||||
</p></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'number.of.cfs';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="schema.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="schema.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="rowkey.design.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">Chapter 6. HBase and Schema Design </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> 6.3. Rowkey Design</td></tr></table></div></body></html>
|
|
@ -1,36 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>14.7. HBase Backup</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="ops_mgt.html" title="Chapter 14. Apache HBase (TM) Operational Management"><link rel="prev" href="cluster_replication.html" title="14.6. Cluster Replication"><link rel="next" href="ops.capacity.html" title="14.8. Capacity Planning"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">14.7. HBase Backup</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="cluster_replication.html">Prev</a> </td><th width="60%" align="center">Chapter 14. Apache HBase (TM) Operational Management</th><td width="20%" align="right"> <a accesskey="n" href="ops.capacity.html">Next</a></td></tr></table><hr></div><div class="section" title="14.7. HBase Backup"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="ops.backup"></a>14.7. HBase Backup</h2></div></div></div><p>There are two broad strategies for performing HBase backups: backing up with a full cluster shutdown, and backing up on a live cluster.
|
||||
Each approach has pros and cons.
|
||||
</p><p>For additional information, see <a class="link" href="http://blog.sematext.com/2011/03/11/hbase-backup-options/" target="_top">HBase Backup Options</a> over on the Sematext Blog.
|
||||
</p><div class="section" title="14.7.1. Full Shutdown Backup"><div class="titlepage"><div><div><h3 class="title"><a name="ops.backup.fullshutdown"></a>14.7.1. Full Shutdown Backup</h3></div></div></div><p>Some environments can tolerate a periodic full shutdown of their HBase cluster, for example if it is being used a back-end analytic capacity
|
||||
and not serving front-end web-pages. The benefits are that the NameNode/Master are RegionServers are down, so there is no chance of missing
|
||||
any in-flight changes to either StoreFiles or metadata. The obvious con is that the cluster is down. The steps include:
|
||||
</p><div class="section" title="14.7.1.1. Stop HBase"><div class="titlepage"><div><div><h4 class="title"><a name="ops.backup.fullshutdown.stop"></a>14.7.1.1. Stop HBase</h4></div></div></div><p>
|
||||
</p></div><div class="section" title="14.7.1.2. Distcp"><div class="titlepage"><div><div><h4 class="title"><a name="ops.backup.fullshutdown.distcp"></a>14.7.1.2. Distcp</h4></div></div></div><p>Distcp could be used to either copy the contents of the HBase directory in HDFS to either the same cluster in another directory, or
|
||||
to a different cluster.
|
||||
</p><p>Note: Distcp works in this situation because the cluster is down and there are no in-flight edits to files.
|
||||
Distcp-ing of files in the HBase directory is not generally recommended on a live cluster.
|
||||
</p></div><div class="section" title="14.7.1.3. Restore (if needed)"><div class="titlepage"><div><div><h4 class="title"><a name="ops.backup.fullshutdown.restore"></a>14.7.1.3. Restore (if needed)</h4></div></div></div><p>The backup of the hbase directory from HDFS is copied onto the 'real' hbase directory via distcp. The act of copying these files
|
||||
creates new HDFS metadata, which is why a restore of the NameNode edits from the time of the HBase backup isn't required for this kind of
|
||||
restore, because it's a restore (via distcp) of a specific HDFS directory (i.e., the HBase part) not the entire HDFS file-system.
|
||||
</p></div></div><div class="section" title="14.7.2. Live Cluster Backup - Replication"><div class="titlepage"><div><div><h3 class="title"><a name="ops.backup.live.replication"></a>14.7.2. Live Cluster Backup - Replication</h3></div></div></div><p>This approach assumes that there is a second cluster.
|
||||
See the HBase page on <a class="link" href="http://hbase.apache.org/replication.html" target="_top">replication</a> for more information.
|
||||
</p></div><div class="section" title="14.7.3. Live Cluster Backup - CopyTable"><div class="titlepage"><div><div><h3 class="title"><a name="ops.backup.live.copytable"></a>14.7.3. Live Cluster Backup - CopyTable</h3></div></div></div><p>The <a class="xref" href="ops_mgt.html#copytable" title="14.1.6. CopyTable">Section 14.1.6, “CopyTable”</a> utility could either be used to copy data from one table to another on the
|
||||
same cluster, or to copy data to another table on another cluster.
|
||||
</p><p>Since the cluster is up, there is a risk that edits could be missed in the copy process.
|
||||
</p></div><div class="section" title="14.7.4. Live Cluster Backup - Export"><div class="titlepage"><div><div><h3 class="title"><a name="ops.backup.live.export"></a>14.7.4. Live Cluster Backup - Export</h3></div></div></div><p>The <a class="xref" href="ops_mgt.html#export" title="14.1.7. Export">Section 14.1.7, “Export”</a> approach dumps the content of a table to HDFS on the same cluster. To restore the data, the
|
||||
<a class="xref" href="ops_mgt.html#import" title="14.1.8. Import">Section 14.1.8, “Import”</a> utility would be used.
|
||||
</p><p>Since the cluster is up, there is a risk that edits could be missed in the export process.
|
||||
</p></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'ops.backup';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="cluster_replication.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="ops_mgt.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="ops.capacity.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">14.6. Cluster Replication </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> 14.8. Capacity Planning</td></tr></table></div></body></html>
|
|
@ -1,27 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>14.8. Capacity Planning</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="ops_mgt.html" title="Chapter 14. Apache HBase (TM) Operational Management"><link rel="prev" href="ops.backup.html" title="14.7. HBase Backup"><link rel="next" href="developer.html" title="Chapter 15. Building and Developing Apache HBase (TM)"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">14.8. Capacity Planning</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="ops.backup.html">Prev</a> </td><th width="60%" align="center">Chapter 14. Apache HBase (TM) Operational Management</th><td width="20%" align="right"> <a accesskey="n" href="developer.html">Next</a></td></tr></table><hr></div><div class="section" title="14.8. Capacity Planning"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="ops.capacity"></a>14.8. Capacity Planning</h2></div></div></div><div class="section" title="14.8.1. Storage"><div class="titlepage"><div><div><h3 class="title"><a name="ops.capacity.storage"></a>14.8.1. Storage</h3></div></div></div><p>A common question for HBase administrators is estimating how much storage will be required for an HBase cluster.
|
||||
There are several apsects to consider, the most important of which is what data load into the cluster. Start
|
||||
with a solid understanding of how HBase handles data internally (KeyValue).
|
||||
</p><div class="section" title="14.8.1.1. KeyValue"><div class="titlepage"><div><div><h4 class="title"><a name="ops.capacity.storage.kv"></a>14.8.1.1. KeyValue</h4></div></div></div><p>HBase storage will be dominated by KeyValues. See <a class="xref" href="regions.arch.html#keyvalue" title="9.7.5.4. KeyValue">Section 9.7.5.4, “KeyValue”</a> and <a class="xref" href="rowkey.design.html#keysize" title="6.3.2. Try to minimize row and column sizes">Section 6.3.2, “Try to minimize row and column sizes”</a> for
|
||||
how HBase stores data internally.
|
||||
</p><p>It is critical to understand that there is a KeyValue instance for every attribute stored in a row, and the
|
||||
rowkey-length, ColumnFamily name-length and attribute lengths will drive the size of the database more than any other
|
||||
factor.
|
||||
</p></div><div class="section" title="14.8.1.2. StoreFiles and Blocks"><div class="titlepage"><div><div><h4 class="title"><a name="ops.capacity.storage.sf"></a>14.8.1.2. StoreFiles and Blocks</h4></div></div></div><p>KeyValue instances are aggregated into blocks, and the blocksize is configurable on a per-ColumnFamily basis.
|
||||
Blocks are aggregated into StoreFile's. See <a class="xref" href="regions.arch.html" title="9.7. Regions">Section 9.7, “Regions”</a>.
|
||||
</p></div><div class="section" title="14.8.1.3. HDFS Block Replication"><div class="titlepage"><div><div><h4 class="title"><a name="ops.capacity.storage.hdfs"></a>14.8.1.3. HDFS Block Replication</h4></div></div></div><p>Because HBase runs on top of HDFS, factor in HDFS block replication into storage calculations.
|
||||
</p></div></div><div class="section" title="14.8.2. Regions"><div class="titlepage"><div><div><h3 class="title"><a name="ops.capacity.regions"></a>14.8.2. Regions</h3></div></div></div><p>Another common question for HBase administrators is determining the right number of regions per
|
||||
RegionServer. This affects both storage and hardware planning. See <a class="xref" href="perf.configurations.html#perf.number.of.regions" title="11.4.1. Number of Regions">Section 11.4.1, “Number of Regions”</a>.
|
||||
</p></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'ops.capacity';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="ops.backup.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="ops_mgt.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="developer.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">14.7. HBase Backup </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> Chapter 15. Building and Developing Apache HBase (TM)</td></tr></table></div></body></html>
|
|
@ -1,38 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>14.5. HBase Monitoring</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="ops_mgt.html" title="Chapter 14. Apache HBase (TM) Operational Management"><link rel="prev" href="hbase_metrics.html" title="14.4. HBase Metrics"><link rel="next" href="cluster_replication.html" title="14.6. Cluster Replication"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">14.5. HBase Monitoring</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="hbase_metrics.html">Prev</a> </td><th width="60%" align="center">Chapter 14. Apache HBase (TM) Operational Management</th><td width="20%" align="right"> <a accesskey="n" href="cluster_replication.html">Next</a></td></tr></table><hr></div><div class="section" title="14.5. HBase Monitoring"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="ops.monitoring"></a>14.5. HBase Monitoring</h2></div></div></div><div class="section" title="14.5.1. Overview"><div class="titlepage"><div><div><h3 class="title"><a name="ops.monitoring.overview"></a>14.5.1. Overview</h3></div></div></div><p>The following metrics are arguably the most important to monitor for each RegionServer for
|
||||
"macro monitoring", preferably with a system like <a class="link" href="http://opentsdb.net/" target="_top">OpenTSDB</a>.
|
||||
If your cluster is having performance issues it's likely that you'll see something unusual with
|
||||
this group.
|
||||
</p><p>HBase:
|
||||
</p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem">Requests</li><li class="listitem">Compactions queue</li></ul></div><p>
|
||||
</p><p>OS:
|
||||
</p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem">IO Wait</li><li class="listitem">User CPU</li></ul></div><p>
|
||||
</p><p>Java:
|
||||
</p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem">GC</li></ul></div><p>
|
||||
</p><p>
|
||||
</p><p>
|
||||
For more information on HBase metrics, see <a class="xref" href="hbase_metrics.html" title="14.4. HBase Metrics">Section 14.4, “HBase Metrics”</a>.
|
||||
</p></div><div class="section" title="14.5.2. Slow Query Log"><div class="titlepage"><div><div><h3 class="title"><a name="ops.slow.query"></a>14.5.2. Slow Query Log</h3></div></div></div><p>The HBase slow query log consists of parseable JSON structures describing the properties of those client operations (Gets, Puts, Deletes, etc.) that either took too long to run, or produced too much output. The thresholds for "too long to run" and "too much output" are configurable, as described below. The output is produced inline in the main region server logs so that it is easy to discover further details from context with other logged events. It is also prepended with identifying tags <code class="constant">(responseTooSlow)</code>, <code class="constant">(responseTooLarge)</code>, <code class="constant">(operationTooSlow)</code>, and <code class="constant">(operationTooLarge)</code> in order to enable easy filtering with grep, in case the user desires to see only slow queries.
|
||||
</p><div class="section" title="14.5.2.1. Configuration"><div class="titlepage"><div><div><h4 class="title"><a name="d2121e9272"></a>14.5.2.1. Configuration</h4></div></div></div><p>There are two configuration knobs that can be used to adjust the thresholds for when queries are logged.
|
||||
</p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><code class="varname">hbase.ipc.warn.response.time</code> Maximum number of milliseconds that a query can be run without being logged. Defaults to 10000, or 10 seconds. Can be set to -1 to disable logging by time.
|
||||
</li><li class="listitem"><code class="varname">hbase.ipc.warn.response.size</code> Maximum byte size of response that a query can return without being logged. Defaults to 100 megabytes. Can be set to -1 to disable logging by size.
|
||||
</li></ul></div></div><div class="section" title="14.5.2.2. Metrics"><div class="titlepage"><div><div><h4 class="title"><a name="d2121e9286"></a>14.5.2.2. Metrics</h4></div></div></div><p>The slow query log exposes to metrics to JMX.
|
||||
</p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><code class="varname">hadoop.regionserver_rpc_slowResponse</code> a global metric reflecting the durations of all responses that triggered logging.</li><li class="listitem"><code class="varname">hadoop.regionserver_rpc_methodName.aboveOneSec</code> A metric reflecting the durations of all responses that lasted for more than one second.</li></ul></div><p>
|
||||
</p></div><div class="section" title="14.5.2.3. Output"><div class="titlepage"><div><div><h4 class="title"><a name="d2121e9301"></a>14.5.2.3. Output</h4></div></div></div><p>The output is tagged with operation e.g. <code class="constant">(operationTooSlow)</code> if the call was a client operation, such as a Put, Get, or Delete, which we expose detailed fingerprint information for. If not, it is tagged <code class="constant">(responseTooSlow)</code> and still produces parseable JSON output, but with less verbose information solely regarding its duration and size in the RPC itself. <code class="constant">TooLarge</code> is substituted for <code class="constant">TooSlow</code> if the response size triggered the logging, with <code class="constant">TooLarge</code> appearing even in the case that both size and duration triggered logging.
|
||||
</p></div><div class="section" title="14.5.2.4. Example"><div class="titlepage"><div><div><h4 class="title"><a name="d2121e9321"></a>14.5.2.4. Example</h4></div></div></div><p>
|
||||
</p><pre class="programlisting">2011-09-08 10:01:25,824 WARN org.apache.hadoop.ipc.HBaseServer: (operationTooSlow): {"tables":{"riley2":{"puts":[{"totalColumns":11,"families":{"actions":[{"timestamp":1315501284459,"qualifier":"0","vlen":9667580},{"timestamp":1315501284459,"qualifier":"1","vlen":10122412},{"timestamp":1315501284459,"qualifier":"2","vlen":11104617},{"timestamp":1315501284459,"qualifier":"3","vlen":13430635}]},"row":"cfcd208495d565ef66e7dff9f98764da:0"}],"families":["actions"]}},"processingtimems":956,"client":"10.47.34.63:33623","starttimems":1315501284456,"queuetimems":0,"totalPuts":1,"class":"HRegionServer","responsesize":0,"method":"multiPut"}</pre><p>
|
||||
</p><p>Note that everything inside the "tables" structure is output produced by MultiPut's fingerprint, while the rest of the information is RPC-specific, such as processing time and client IP/port. Other client operations follow the same pattern and the same general structure, with necessary differences due to the nature of the individual operations. In the case that the call is not a client operation, that detailed fingerprint information will be completely absent.
|
||||
</p><p>This particular example, for example, would indicate that the likely cause of slowness is simply a very large (on the order of 100MB) multiput, as we can tell by the "vlen," or value length, fields of each put in the multiPut.
|
||||
</p></div></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'ops.monitoring';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="hbase_metrics.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="ops_mgt.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="cluster_replication.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">14.4. HBase Metrics </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> 14.6. Cluster Replication</td></tr></table></div></body></html>
|
|
@ -1,24 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>14.2. Region Management</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="ops_mgt.html" title="Chapter 14. Apache HBase (TM) Operational Management"><link rel="prev" href="ops_mgt.html" title="Chapter 14. Apache HBase (TM) Operational Management"><link rel="next" href="node.management.html" title="14.3. Node Management"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">14.2. Region Management</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="ops_mgt.html">Prev</a> </td><th width="60%" align="center">Chapter 14. Apache HBase (TM) Operational Management</th><td width="20%" align="right"> <a accesskey="n" href="node.management.html">Next</a></td></tr></table><hr></div><div class="section" title="14.2. Region Management"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="ops.regionmgt"></a>14.2. Region Management</h2></div></div></div><div class="section" title="14.2.1. Major Compaction"><div class="titlepage"><div><div><h3 class="title"><a name="ops.regionmgt.majorcompact"></a>14.2.1. Major Compaction</h3></div></div></div><p>Major compactions can be requested via the HBase shell or <a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/HBaseAdmin.html#majorCompact%28java.lang.String%29" target="_top">HBaseAdmin.majorCompact</a>.
|
||||
</p><p>Note: major compactions do NOT do region merges. See <a class="xref" href="regions.arch.html#compaction" title="9.7.5.5. Compaction">Section 9.7.5.5, “Compaction”</a> for more information about compactions.
|
||||
|
||||
</p></div><div class="section" title="14.2.2. Merge"><div class="titlepage"><div><div><h3 class="title"><a name="ops.regionmgt.merge"></a>14.2.2. Merge</h3></div></div></div><p>Merge is a utility that can merge adjoining regions in the same table (see org.apache.hadoop.hbase.util.Merge).</p><pre class="programlisting">$ bin/hbase org.apache.hbase.util.Merge <tablename> <region1> <region2>
|
||||
</pre><p>If you feel you have too many regions and want to consolidate them, Merge is the utility you need. Merge must
|
||||
run be done when the cluster is down.
|
||||
See the <a class="link" href="http://ofps.oreilly.com/titles/9781449396107/performance.html" target="_top">O'Reilly HBase Book</a> for
|
||||
an example of usage.
|
||||
</p><p>Additionally, there is a Ruby script attached to <a class="link" href="https://issues.apache.org/jira/browse/HBASE-1621" target="_top">HBASE-1621</a>
|
||||
for region merging.
|
||||
</p></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'ops.regionmgt';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="ops_mgt.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="ops_mgt.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="node.management.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">Chapter 14. Apache HBase (TM) Operational Management </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> 14.3. Node Management</td></tr></table></div></body></html>
|
File diff suppressed because one or more lines are too long
|
@ -1,15 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>F.6. Hadoop Books</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="other.info.html" title="Appendix F. Other Information About HBase"><link rel="prev" href="other.info.books.html" title="F.5. HBase Books"><link rel="next" href="hbase.history.html" title="Appendix G. HBase History"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">F.6. Hadoop Books</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="other.info.books.html">Prev</a> </td><th width="60%" align="center">Appendix F. Other Information About HBase</th><td width="20%" align="right"> <a accesskey="n" href="hbase.history.html">Next</a></td></tr></table><hr></div><div class="section" title="F.6. Hadoop Books"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="other.info.books.hadoop"></a>F.6. Hadoop Books</h2></div></div></div><p><a class="link" href="http://shop.oreilly.com/product/9780596521981.do" target="_top">Hadoop: The Definitive Guide</a> by Tom White.
|
||||
</p></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'other.info.books.hadoop';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="other.info.books.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="other.info.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="hbase.history.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">F.5. HBase Books </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> Appendix G. HBase History</td></tr></table></div></body></html>
|
|
@ -1,15 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>F.5. HBase Books</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="other.info.html" title="Appendix F. Other Information About HBase"><link rel="prev" href="other.info.sites.html" title="F.4. HBase Sites"><link rel="next" href="other.info.books.hadoop.html" title="F.6. Hadoop Books"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">F.5. HBase Books</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="other.info.sites.html">Prev</a> </td><th width="60%" align="center">Appendix F. Other Information About HBase</th><td width="20%" align="right"> <a accesskey="n" href="other.info.books.hadoop.html">Next</a></td></tr></table><hr></div><div class="section" title="F.5. HBase Books"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="other.info.books"></a>F.5. HBase Books</h2></div></div></div><p><a class="link" href="http://shop.oreilly.com/product/0636920014348.do" target="_top">HBase: The Definitive Guide</a> by Lars George.
|
||||
</p></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'other.info.books';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="other.info.sites.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="other.info.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="other.info.books.hadoop.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">F.4. HBase Sites </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> F.6. Hadoop Books</td></tr></table></div></body></html>
|
|
@ -1,22 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>Appendix F. Other Information About HBase</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="prev" href="apes03.html" title="E.3. HBase file format with inline blocks (version 2)"><link rel="next" href="other.info.pres.html" title="F.2. HBase Presentations (Slides)"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">Appendix F. Other Information About HBase</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="apes03.html">Prev</a> </td><th width="60%" align="center"> </th><td width="20%" align="right"> <a accesskey="n" href="other.info.pres.html">Next</a></td></tr></table><hr></div><div class="appendix" title="Appendix F. Other Information About HBase"><div class="titlepage"><div><div><h2 class="title"><a name="other.info"></a>Appendix F. Other Information About HBase</h2></div></div></div><div class="toc"><p><b>Table of Contents</b></p><dl><dt><span class="section"><a href="other.info.html#other.info.videos">F.1. HBase Videos</a></span></dt><dt><span class="section"><a href="other.info.pres.html">F.2. HBase Presentations (Slides)</a></span></dt><dt><span class="section"><a href="other.info.papers.html">F.3. HBase Papers</a></span></dt><dt><span class="section"><a href="other.info.sites.html">F.4. HBase Sites</a></span></dt><dt><span class="section"><a href="other.info.books.html">F.5. HBase Books</a></span></dt><dt><span class="section"><a href="other.info.books.hadoop.html">F.6. Hadoop Books</a></span></dt></dl></div><div class="section" title="F.1. HBase Videos"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="other.info.videos"></a>F.1. HBase Videos</h2></div></div></div><p>Introduction to HBase
|
||||
</p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><a class="link" href="http://www.cloudera.com/videos/chicago_data_summit_apache_hbase_an_introduction_todd_lipcon" target="_top">Introduction to HBase</a> by Todd Lipcon (Chicago Data Summit 2011).
|
||||
</li><li class="listitem"><a class="link" href="http://www.cloudera.com/videos/intorduction-hbase-todd-lipcon" target="_top">Introduction to HBase</a> by Todd Lipcon (2010).
|
||||
</li></ul></div><p>
|
||||
</p><p><a class="link" href="http://www.cloudera.com/videos/hadoop-world-2011-presentation-video-building-realtime-big-data-services-at-facebook-with-hadoop-and-hbase" target="_top">Building Real Time Services at Facebook with HBase</a> by Jonathan Gray (Hadoop World 2011).
|
||||
</p><p><a class="link" href="http://www.cloudera.com/videos/hw10_video_how_stumbleupon_built_and_advertising_platform_using_hbase_and_hadoop" target="_top">HBase and Hadoop, Mixing Real-Time and Batch Processing at StumbleUpon</a> by JD Cryans (Hadoop World 2010).
|
||||
</p></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'other.info';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="apes03.html">Prev</a> </td><td width="20%" align="center"> </td><td width="40%" align="right"> <a accesskey="n" href="other.info.pres.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">E.3.
|
||||
HBase file format with inline blocks (version 2)
|
||||
</td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> F.2. HBase Presentations (Slides)</td></tr></table></div></body></html>
|
|
@ -1,17 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>F.3. HBase Papers</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="other.info.html" title="Appendix F. Other Information About HBase"><link rel="prev" href="other.info.pres.html" title="F.2. HBase Presentations (Slides)"><link rel="next" href="other.info.sites.html" title="F.4. HBase Sites"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">F.3. HBase Papers</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="other.info.pres.html">Prev</a> </td><th width="60%" align="center">Appendix F. Other Information About HBase</th><td width="20%" align="right"> <a accesskey="n" href="other.info.sites.html">Next</a></td></tr></table><hr></div><div class="section" title="F.3. HBase Papers"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="other.info.papers"></a>F.3. HBase Papers</h2></div></div></div><p><a class="link" href="http://research.google.com/archive/bigtable.html" target="_top">BigTable</a> by Google (2006).
|
||||
</p><p><a class="link" href="http://www.larsgeorge.com/2010/05/hbase-file-locality-in-hdfs.html" target="_top">HBase and HDFS Locality</a> by Lars George (2010).
|
||||
</p><p><a class="link" href="http://ianvarley.com/UT/MR/Varley_MastersReport_Full_2009-08-07.pdf" target="_top">No Relation: The Mixed Blessings of Non-Relational Databases</a> by Ian Varley (2009).
|
||||
</p></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'other.info.papers';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="other.info.pres.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="other.info.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="other.info.sites.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">F.2. HBase Presentations (Slides) </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> F.4. HBase Sites</td></tr></table></div></body></html>
|
|
@ -1,17 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>F.2. HBase Presentations (Slides)</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="other.info.html" title="Appendix F. Other Information About HBase"><link rel="prev" href="other.info.html" title="Appendix F. Other Information About HBase"><link rel="next" href="other.info.papers.html" title="F.3. HBase Papers"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">F.2. HBase Presentations (Slides)</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="other.info.html">Prev</a> </td><th width="60%" align="center">Appendix F. Other Information About HBase</th><td width="20%" align="right"> <a accesskey="n" href="other.info.papers.html">Next</a></td></tr></table><hr></div><div class="section" title="F.2. HBase Presentations (Slides)"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="other.info.pres"></a>F.2. HBase Presentations (Slides)</h2></div></div></div><p><a class="link" href="http://www.cloudera.com/resource/hadoop-world-2011-presentation-slides-advanced-hbase-schema-design" target="_top">Advanced HBase Schema Design</a> by Lars George (Hadoop World 2011).
|
||||
</p><p><a class="link" href="http://www.slideshare.net/cloudera/chicago-data-summit-apache-hbase-an-introduction" target="_top">Introduction to HBase</a> by Todd Lipcon (Chicago Data Summit 2011).
|
||||
</p><p><a class="link" href="http://www.slideshare.net/cloudera/hw09-practical-h-base-getting-the-most-from-your-h-base-install" target="_top">Getting The Most From Your HBase Install</a> by Ryan Rawson, Jonathan Gray (Hadoop World 2009).
|
||||
</p></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'other.info.pres';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="other.info.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="other.info.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="other.info.papers.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">Appendix F. Other Information About HBase </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> F.3. HBase Papers</td></tr></table></div></body></html>
|
|
@ -1,20 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>F.4. HBase Sites</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="other.info.html" title="Appendix F. Other Information About HBase"><link rel="prev" href="other.info.papers.html" title="F.3. HBase Papers"><link rel="next" href="other.info.books.html" title="F.5. HBase Books"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">F.4. HBase Sites</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="other.info.papers.html">Prev</a> </td><th width="60%" align="center">Appendix F. Other Information About HBase</th><td width="20%" align="right"> <a accesskey="n" href="other.info.books.html">Next</a></td></tr></table><hr></div><div class="section" title="F.4. HBase Sites"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="other.info.sites"></a>F.4. HBase Sites</h2></div></div></div><p><a class="link" href="http://www.cloudera.com/blog/category/hbase/" target="_top">Cloudera's HBase Blog</a> has a lot of links to useful HBase information.
|
||||
</p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><a class="link" href="http://www.cloudera.com/blog/2010/04/cap-confusion-problems-with-partition-tolerance/" target="_top">CAP Confusion</a> is a relevant entry for background information on
|
||||
distributed storage systems.
|
||||
</li></ul></div><p>
|
||||
</p><p><a class="link" href="http://wiki.apache.org/hadoop/HBase/HBasePresentations" target="_top">HBase Wiki</a> has a page with a number of presentations.
|
||||
</p><p><a class="link" href="http://refcardz.dzone.com/refcardz/hbase" target="_top">HBase RefCard</a> from DZone.
|
||||
</p></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'other.info.sites';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="other.info.papers.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="other.info.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="other.info.books.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">F.3. HBase Papers </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> F.5. HBase Books</td></tr></table></div></body></html>
|
|
@ -1,15 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>11.12. Case Studies</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="performance.html" title="Chapter 11. Apache HBase (TM) Performance Tuning"><link rel="prev" href="perf.ec2.html" title="11.11. Amazon EC2"><link rel="next" href="trouble.html" title="Chapter 12. Troubleshooting and Debugging Apache HBase (TM)"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">11.12. Case Studies</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="perf.ec2.html">Prev</a> </td><th width="60%" align="center">Chapter 11. Apache HBase (TM) Performance Tuning</th><td width="20%" align="right"> <a accesskey="n" href="trouble.html">Next</a></td></tr></table><hr></div><div class="section" title="11.12. Case Studies"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="perf.casestudy"></a>11.12. Case Studies</h2></div></div></div><p>For Performance and Troubleshooting Case Studies, see <a class="xref" href="casestudies.html" title="Chapter 13. Apache HBase (TM) Case Studies">Chapter 13, <i>Apache HBase (TM) Case Studies</i></a>.
|
||||
</p></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'perf.casestudy';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="perf.ec2.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="performance.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="trouble.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">11.11. Amazon EC2 </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> Chapter 12. Troubleshooting and Debugging Apache HBase (TM)</td></tr></table></div></body></html>
|
|
@ -1,30 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>11.4. HBase Configurations</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="performance.html" title="Chapter 11. Apache HBase (TM) Performance Tuning"><link rel="prev" href="jvm.html" title="11.3. Java"><link rel="next" href="perf.zookeeper.html" title="11.5. ZooKeeper"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">11.4. HBase Configurations</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="jvm.html">Prev</a> </td><th width="60%" align="center">Chapter 11. Apache HBase (TM) Performance Tuning</th><td width="20%" align="right"> <a accesskey="n" href="perf.zookeeper.html">Next</a></td></tr></table><hr></div><div class="section" title="11.4. HBase Configurations"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="perf.configurations"></a>11.4. HBase Configurations</h2></div></div></div><p>See <a class="xref" href="important_configurations.html#recommended_configurations" title="2.5.2. Recommended Configurations">Section 2.5.2, “Recommended Configurations”</a>.</p><div class="section" title="11.4.1. Number of Regions"><div class="titlepage"><div><div><h3 class="title"><a name="perf.number.of.regions"></a>11.4.1. Number of Regions</h3></div></div></div><p>The number of regions for an HBase table is driven by the <a class="xref" href="important_configurations.html#bigger.regions" title="2.5.2.6. Bigger Regions">Section 2.5.2.6, “Bigger Regions”</a>. Also, see the architecture
|
||||
section on <a class="xref" href="regions.arch.html#arch.regions.size" title="9.7.1. Region Size">Section 9.7.1, “Region Size”</a></p></div><div class="section" title="11.4.2. Managing Compactions"><div class="titlepage"><div><div><h3 class="title"><a name="perf.compactions.and.splits"></a>11.4.2. Managing Compactions</h3></div></div></div><p>For larger systems, managing <a class="link" href="important_configurations.html#disable.splitting" title="2.5.2.7. Managed Splitting">compactions and splits</a> may be
|
||||
something you want to consider.</p></div><div class="section" title="11.4.3. hbase.regionserver.handler.count"><div class="titlepage"><div><div><h3 class="title"><a name="perf.handlers"></a>11.4.3. <code class="varname">hbase.regionserver.handler.count</code></h3></div></div></div><p>See <a class="xref" href="config.files.html#hbase.regionserver.handler.count" title="hbase.regionserver.handler.count"><code class="varname">hbase.regionserver.handler.count</code></a>.
|
||||
</p></div><div class="section" title="11.4.4. hfile.block.cache.size"><div class="titlepage"><div><div><h3 class="title"><a name="perf.hfile.block.cache.size"></a>11.4.4. <code class="varname">hfile.block.cache.size</code></h3></div></div></div><p>See <a class="xref" href="config.files.html#hfile.block.cache.size" title="hfile.block.cache.size"><code class="varname">hfile.block.cache.size</code></a>.
|
||||
A memory setting for the RegionServer process.
|
||||
</p></div><div class="section" title="11.4.5. hbase.regionserver.global.memstore.upperLimit"><div class="titlepage"><div><div><h3 class="title"><a name="perf.rs.memstore.upperlimit"></a>11.4.5. <code class="varname">hbase.regionserver.global.memstore.upperLimit</code></h3></div></div></div><p>See <a class="xref" href="config.files.html#hbase.regionserver.global.memstore.upperLimit" title="hbase.regionserver.global.memstore.upperLimit"><code class="varname">hbase.regionserver.global.memstore.upperLimit</code></a>.
|
||||
This memory setting is often adjusted for the RegionServer process depending on needs.
|
||||
</p></div><div class="section" title="11.4.6. hbase.regionserver.global.memstore.lowerLimit"><div class="titlepage"><div><div><h3 class="title"><a name="perf.rs.memstore.lowerlimit"></a>11.4.6. <code class="varname">hbase.regionserver.global.memstore.lowerLimit</code></h3></div></div></div><p>See <a class="xref" href="config.files.html#hbase.regionserver.global.memstore.lowerLimit" title="hbase.regionserver.global.memstore.lowerLimit"><code class="varname">hbase.regionserver.global.memstore.lowerLimit</code></a>.
|
||||
This memory setting is often adjusted for the RegionServer process depending on needs.
|
||||
</p></div><div class="section" title="11.4.7. hbase.hstore.blockingStoreFiles"><div class="titlepage"><div><div><h3 class="title"><a name="perf.hstore.blockingstorefiles"></a>11.4.7. <code class="varname">hbase.hstore.blockingStoreFiles</code></h3></div></div></div><p>See <a class="xref" href="config.files.html#hbase.hstore.blockingStoreFiles" title="hbase.hstore.blockingStoreFiles"><code class="varname">hbase.hstore.blockingStoreFiles</code></a>.
|
||||
If there is blocking in the RegionServer logs, increasing this can help.
|
||||
</p></div><div class="section" title="11.4.8. hbase.hregion.memstore.block.multiplier"><div class="titlepage"><div><div><h3 class="title"><a name="perf.hregion.memstore.block.multiplier"></a>11.4.8. <code class="varname">hbase.hregion.memstore.block.multiplier</code></h3></div></div></div><p>See <a class="xref" href="config.files.html#hbase.hregion.memstore.block.multiplier" title="hbase.hregion.memstore.block.multiplier"><code class="varname">hbase.hregion.memstore.block.multiplier</code></a>.
|
||||
If there is enough RAM, increasing this can help.
|
||||
</p></div><div class="section" title="11.4.9. hbase.regionserver.checksum.verify"><div class="titlepage"><div><div><h3 class="title"><a name="hbase.regionserver.checksum.verify"></a>11.4.9. <code class="varname">hbase.regionserver.checksum.verify</code></h3></div></div></div><p>Have HBase write the checksum into the datablock and save
|
||||
having to do the checksum seek whenever you read. See the
|
||||
release note on <a class="link" href="https://issues.apache.org/jira/browse/HBASE-5074" target="_top">HBASE-5074 support checksums in HBase block cache</a>.
|
||||
</p></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'perf.configurations';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="jvm.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="performance.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="perf.zookeeper.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">11.3. Java </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> 11.5. ZooKeeper</td></tr></table></div></body></html>
|
|
@ -1,21 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>11.9. Deleting from HBase</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="performance.html" title="Chapter 11. Apache HBase (TM) Performance Tuning"><link rel="prev" href="perf.reading.html" title="11.8. Reading from HBase"><link rel="next" href="perf.hdfs.html" title="11.10. HDFS"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">11.9. Deleting from HBase</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="perf.reading.html">Prev</a> </td><th width="60%" align="center">Chapter 11. Apache HBase (TM) Performance Tuning</th><td width="20%" align="right"> <a accesskey="n" href="perf.hdfs.html">Next</a></td></tr></table><hr></div><div class="section" title="11.9. Deleting from HBase"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="perf.deleting"></a>11.9. Deleting from HBase</h2></div></div></div><div class="section" title="11.9.1. Using HBase Tables as Queues"><div class="titlepage"><div><div><h3 class="title"><a name="perf.deleting.queue"></a>11.9.1. Using HBase Tables as Queues</h3></div></div></div><p>HBase tables are sometimes used as queues. In this case, special care must be taken to regularly perform major compactions on tables used in
|
||||
this manner. As is documented in <a class="xref" href="datamodel.html" title="Chapter 5. Data Model">Chapter 5, <i>Data Model</i></a>, marking rows as deleted creates additional StoreFiles which then need to be processed
|
||||
on reads. Tombstones only get cleaned up with major compactions.
|
||||
</p><p>See also <a class="xref" href="regions.arch.html#compaction" title="9.7.5.5. Compaction">Section 9.7.5.5, “Compaction”</a> and <a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/HBaseAdmin.html#majorCompact%28java.lang.String%29" target="_top">HBaseAdmin.majorCompact</a>.
|
||||
</p></div><div class="section" title="11.9.2. Delete RPC Behavior"><div class="titlepage"><div><div><h3 class="title"><a name="perf.deleting.rpc"></a>11.9.2. Delete RPC Behavior</h3></div></div></div><p>Be aware that <code class="code">htable.delete(Delete)</code> doesn't use the writeBuffer. It will execute an RegionServer RPC with each invocation.
|
||||
For a large number of deletes, consider <code class="code">htable.delete(List)</code>.
|
||||
</p><p>See <a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/HTable.html#delete%28org.apache.hadoop.hbase.client.Delete%29" target="_top">http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/HTable.html#delete%28org.apache.hadoop.hbase.client.Delete%29</a>
|
||||
</p></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'perf.deleting';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="perf.reading.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="performance.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="perf.hdfs.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">11.8. Reading from HBase </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> 11.10. HDFS</td></tr></table></div></body></html>
|
|
@ -1,19 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>11.11. Amazon EC2</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="performance.html" title="Chapter 11. Apache HBase (TM) Performance Tuning"><link rel="prev" href="perf.hdfs.html" title="11.10. HDFS"><link rel="next" href="perf.casestudy.html" title="11.12. Case Studies"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">11.11. Amazon EC2</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="perf.hdfs.html">Prev</a> </td><th width="60%" align="center">Chapter 11. Apache HBase (TM) Performance Tuning</th><td width="20%" align="right"> <a accesskey="n" href="perf.casestudy.html">Next</a></td></tr></table><hr></div><div class="section" title="11.11. Amazon EC2"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="perf.ec2"></a>11.11. Amazon EC2</h2></div></div></div><p>Performance questions are common on Amazon EC2 environments because it is a shared environment. You will
|
||||
not see the same throughput as a dedicated server. In terms of running tests on EC2, run them several times for the same
|
||||
reason (i.e., it's a shared environment and you don't know what else is happening on the server).
|
||||
</p><p>If you are running on EC2 and post performance questions on the dist-list, please state this fact up-front that
|
||||
because EC2 issues are practically a separate class of performance issues.
|
||||
</p></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'perf.ec2';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="perf.hdfs.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="performance.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="perf.casestudy.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">11.10. HDFS </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> 11.12. Case Studies</td></tr></table></div></body></html>
|
|
@ -1,51 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>11.10. HDFS</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="performance.html" title="Chapter 11. Apache HBase (TM) Performance Tuning"><link rel="prev" href="perf.deleting.html" title="11.9. Deleting from HBase"><link rel="next" href="perf.ec2.html" title="11.11. Amazon EC2"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">11.10. HDFS</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="perf.deleting.html">Prev</a> </td><th width="60%" align="center">Chapter 11. Apache HBase (TM) Performance Tuning</th><td width="20%" align="right"> <a accesskey="n" href="perf.ec2.html">Next</a></td></tr></table><hr></div><div class="section" title="11.10. HDFS"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="perf.hdfs"></a>11.10. HDFS</h2></div></div></div><p>Because HBase runs on <a class="xref" href="arch.hdfs.html" title="9.9. HDFS">Section 9.9, “HDFS”</a> it is important to understand how it works and how it affects
|
||||
HBase.
|
||||
</p><div class="section" title="11.10.1. Current Issues With Low-Latency Reads"><div class="titlepage"><div><div><h3 class="title"><a name="perf.hdfs.curr"></a>11.10.1. Current Issues With Low-Latency Reads</h3></div></div></div><p>The original use-case for HDFS was batch processing. As such, there low-latency reads were historically not a priority.
|
||||
With the increased adoption of Apache HBase this is changing, and several improvements are already in development.
|
||||
See the
|
||||
<a class="link" href="https://issues.apache.org/jira/browse/HDFS-1599" target="_top">Umbrella Jira Ticket for HDFS Improvements for HBase</a>.
|
||||
</p></div><div class="section" title="11.10.2. Leveraging local data"><div class="titlepage"><div><div><h3 class="title"><a name="perf.hdfs.configs.localread"></a>11.10.2. Leveraging local data</h3></div></div></div><p>Since Hadoop 1.0.0 (also 0.22.1, 0.23.1, CDH3u3 and HDP 1.0) via
|
||||
<a class="link" href="https://issues.apache.org/jira/browse/HDFS-2246" target="_top">HDFS-2246</a>,
|
||||
it is possible for the DFSClient to take a "short circuit" and
|
||||
read directly from disk instead of going through the DataNode when the
|
||||
data is local. What this means for HBase is that the RegionServers can
|
||||
read directly off their machine's disks instead of having to open a
|
||||
socket to talk to the DataNode, the former being generally much
|
||||
faster<sup>[<a name="d2121e7324" href="#ftn.d2121e7324" class="footnote">30</a>]</sup>.
|
||||
Also see <a class="link" href="http://search-hadoop.com/m/zV6dKrLCVh1" target="_top">HBase, mail # dev - read short circuit</a> thread for
|
||||
more discussion around short circuit reads.
|
||||
</p><p>To enable "short circuit" reads, you must set two configurations.
|
||||
First, the hdfs-site.xml needs to be amended. Set
|
||||
the property <code class="varname">dfs.block.local-path-access.user</code>
|
||||
to be the <span class="emphasis"><em>only</em></span> user that can use the shortcut.
|
||||
This has to be the user that started HBase. Then in hbase-site.xml,
|
||||
set <code class="varname">dfs.client.read.shortcircuit</code> to be <code class="varname">true</code>
|
||||
</p><p>
|
||||
For optimal performance when short-circuit reads are enabled, it is recommended that HDFS checksums are disabled.
|
||||
To maintain data integrity with HDFS checksums disabled, HBase can be configured to write its own checksums into
|
||||
its datablocks and verify against these. See <a class="xref" href="perf.configurations.html#hbase.regionserver.checksum.verify" title="11.4.9. hbase.regionserver.checksum.verify">Section 11.4.9, “<code class="varname">hbase.regionserver.checksum.verify</code>”</a>.
|
||||
</p><p>
|
||||
The DataNodes need to be restarted in order to pick up the new
|
||||
configuration. Be aware that if a process started under another
|
||||
username than the one configured here also has the shortcircuit
|
||||
enabled, it will get an Exception regarding an unauthorized access but
|
||||
the data will still be read.
|
||||
</p></div><div class="section" title="11.10.3. Performance Comparisons of HBase vs. HDFS"><div class="titlepage"><div><div><h3 class="title"><a name="perf.hdfs.comp"></a>11.10.3. Performance Comparisons of HBase vs. HDFS</h3></div></div></div><p>A fairly common question on the dist-list is why HBase isn't as performant as HDFS files in a batch context (e.g., as
|
||||
a MapReduce source or sink). The short answer is that HBase is doing a lot more than HDFS (e.g., reading the KeyValues,
|
||||
returning the most current row or specified timestamps, etc.), and as such HBase is 4-5 times slower than HDFS in this
|
||||
processing context. Not that there isn't room for improvement (and this gap will, over time, be reduced), but HDFS
|
||||
will always be faster in this use-case.
|
||||
</p></div><div class="footnotes"><br><hr width="100" align="left"><div class="footnote"><p><sup>[<a id="ftn.d2121e7324" href="#d2121e7324" class="para">30</a>] </sup>See JD's <a class="link" href="http://files.meetup.com/1350427/hug_ebay_jdcryans.pdf" target="_top">Performance Talk</a></p></div></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'perf.hdfs';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="perf.deleting.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="performance.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="perf.ec2.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">11.9. Deleting from HBase </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> 11.11. Amazon EC2</td></tr></table></div></body></html>
|
|
@ -1,37 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>11.2. Network</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="performance.html" title="Chapter 11. Apache HBase (TM) Performance Tuning"><link rel="prev" href="performance.html" title="Chapter 11. Apache HBase (TM) Performance Tuning"><link rel="next" href="jvm.html" title="11.3. Java"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">11.2. Network</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="performance.html">Prev</a> </td><th width="60%" align="center">Chapter 11. Apache HBase (TM) Performance Tuning</th><td width="20%" align="right"> <a accesskey="n" href="jvm.html">Next</a></td></tr></table><hr></div><div class="section" title="11.2. Network"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="perf.network"></a>11.2. Network</h2></div></div></div><p>
|
||||
Perhaps the most important factor in avoiding network issues degrading Hadoop and HBbase performance is the switching hardware
|
||||
that is used, decisions made early in the scope of the project can cause major problems when you double or triple the size of your cluster (or more).
|
||||
</p><p>
|
||||
Important items to consider:
|
||||
</p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem">Switching capacity of the device</li><li class="listitem">Number of systems connected</li><li class="listitem">Uplink capacity</li></ul></div><p>
|
||||
</p><div class="section" title="11.2.1. Single Switch"><div class="titlepage"><div><div><h3 class="title"><a name="perf.network.1switch"></a>11.2.1. Single Switch</h3></div></div></div><p>The single most important factor in this configuration is that the switching capacity of the hardware is capable of
|
||||
handling the traffic which can be generated by all systems connected to the switch. Some lower priced commodity hardware
|
||||
can have a slower switching capacity than could be utilized by a full switch.
|
||||
</p></div><div class="section" title="11.2.2. Multiple Switches"><div class="titlepage"><div><div><h3 class="title"><a name="perf.network.2switch"></a>11.2.2. Multiple Switches</h3></div></div></div><p>Multiple switches are a potential pitfall in the architecture. The most common configuration of lower priced hardware is a
|
||||
simple 1Gbps uplink from one switch to another. This often overlooked pinch point can easily become a bottleneck for cluster communication.
|
||||
Especially with MapReduce jobs that are both reading and writing a lot of data the communication across this uplink could be saturated.
|
||||
</p><p>Mitigation of this issue is fairly simple and can be accomplished in multiple ways:
|
||||
</p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem">Use appropriate hardware for the scale of the cluster which you're attempting to build.</li><li class="listitem">Use larger single switch configurations i.e. single 48 port as opposed to 2x 24 port</li><li class="listitem">Configure port trunking for uplinks to utilize multiple interfaces to increase cross switch bandwidth.</li></ul></div><p>
|
||||
</p></div><div class="section" title="11.2.3. Multiple Racks"><div class="titlepage"><div><div><h3 class="title"><a name="perf.network.multirack"></a>11.2.3. Multiple Racks</h3></div></div></div><p>Multiple rack configurations carry the same potential issues as multiple switches, and can suffer performance degradation from two main areas:
|
||||
</p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem">Poor switch capacity performance</li><li class="listitem">Insufficient uplink to another rack</li></ul></div><p>
|
||||
If the the switches in your rack have appropriate switching capacity to handle all the hosts at full speed, the next most likely issue will be caused by homing
|
||||
more of your cluster across racks. The easiest way to avoid issues when spanning multiple racks is to use port trunking to create a bonded uplink to other racks.
|
||||
The downside of this method however, is in the overhead of ports that could potentially be used. An example of this is, creating an 8Gbps port channel from rack
|
||||
A to rack B, using 8 of your 24 ports to communicate between racks gives you a poor ROI, using too few however can mean you're not getting the most out of your cluster.
|
||||
</p><p>Using 10Gbe links between racks will greatly increase performance, and assuming your switches support a 10Gbe uplink or allow for an expansion card will allow you to
|
||||
save your ports for machines as opposed to uplinks.
|
||||
</p></div><div class="section" title="11.2.4. Network Interfaces"><div class="titlepage"><div><div><h3 class="title"><a name="perf.network.ints"></a>11.2.4. Network Interfaces</h3></div></div></div><p>Are all the network interfaces functioning correctly? Are you sure? See the Troubleshooting Case Study in <a class="xref" href="casestudies.perftroub.html#casestudies.slownode" title="13.3.1. Case Study #1 (Performance Issue On A Single Node)">Section 13.3.1, “Case Study #1 (Performance Issue On A Single Node)”</a>.
|
||||
</p></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'perf.network';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="performance.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="performance.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="jvm.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">Chapter 11. Apache HBase (TM) Performance Tuning </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> 11.3. Java</td></tr></table></div></body></html>
|
|
@ -1,101 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>11.8. Reading from HBase</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="performance.html" title="Chapter 11. Apache HBase (TM) Performance Tuning"><link rel="prev" href="perf.writing.html" title="11.7. Writing to HBase"><link rel="next" href="perf.deleting.html" title="11.9. Deleting from HBase"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">11.8. Reading from HBase</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="perf.writing.html">Prev</a> </td><th width="60%" align="center">Chapter 11. Apache HBase (TM) Performance Tuning</th><td width="20%" align="right"> <a accesskey="n" href="perf.deleting.html">Next</a></td></tr></table><hr></div><div class="section" title="11.8. Reading from HBase"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="perf.reading"></a>11.8. Reading from HBase</h2></div></div></div><div class="section" title="11.8.1. Scan Caching"><div class="titlepage"><div><div><h3 class="title"><a name="perf.hbase.client.caching"></a>11.8.1. Scan Caching</h3></div></div></div><p>If HBase is used as an input source for a MapReduce job, for
|
||||
example, make sure that the input <a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Scan.html" target="_top">Scan</a>
|
||||
instance to the MapReduce job has <code class="methodname">setCaching</code> set to something greater
|
||||
than the default (which is 1). Using the default value means that the
|
||||
map-task will make call back to the region-server for every record
|
||||
processed. Setting this value to 500, for example, will transfer 500
|
||||
rows at a time to the client to be processed. There is a cost/benefit to
|
||||
have the cache value be large because it costs more in memory for both
|
||||
client and RegionServer, so bigger isn't always better.</p><div class="section" title="11.8.1.1. Scan Caching in MapReduce Jobs"><div class="titlepage"><div><div><h4 class="title"><a name="perf.hbase.client.caching.mr"></a>11.8.1.1. Scan Caching in MapReduce Jobs</h4></div></div></div><p>Scan settings in MapReduce jobs deserve special attention. Timeouts can result (e.g., UnknownScannerException)
|
||||
in Map tasks if it takes longer to process a batch of records before the client goes back to the RegionServer for the
|
||||
next set of data. This problem can occur because there is non-trivial processing occuring per row. If you process
|
||||
rows quickly, set caching higher. If you process rows more slowly (e.g., lots of transformations per row, writes),
|
||||
then set caching lower.
|
||||
</p><p>Timeouts can also happen in a non-MapReduce use case (i.e., single threaded HBase client doing a Scan), but the
|
||||
processing that is often performed in MapReduce jobs tends to exacerbate this issue.
|
||||
</p></div></div><div class="section" title="11.8.2. Scan Attribute Selection"><div class="titlepage"><div><div><h3 class="title"><a name="perf.hbase.client.selection"></a>11.8.2. Scan Attribute Selection</h3></div></div></div><p>Whenever a Scan is used to process large numbers of rows (and especially when used
|
||||
as a MapReduce source), be aware of which attributes are selected. If <code class="code">scan.addFamily</code> is called
|
||||
then <span class="emphasis"><em>all</em></span> of the attributes in the specified ColumnFamily will be returned to the client.
|
||||
If only a small number of the available attributes are to be processed, then only those attributes should be specified
|
||||
in the input scan because attribute over-selection is a non-trivial performance penalty over large datasets.
|
||||
</p></div><div class="section" title="11.8.3. MapReduce - Input Splits"><div class="titlepage"><div><div><h3 class="title"><a name="perf.hbase.mr.input"></a>11.8.3. MapReduce - Input Splits</h3></div></div></div><p>For MapReduce jobs that use HBase tables as a source, if there a pattern where the "slow" map tasks seem to
|
||||
have the same Input Split (i.e., the RegionServer serving the data), see the
|
||||
Troubleshooting Case Study in <a class="xref" href="casestudies.perftroub.html#casestudies.slownode" title="13.3.1. Case Study #1 (Performance Issue On A Single Node)">Section 13.3.1, “Case Study #1 (Performance Issue On A Single Node)”</a>.
|
||||
</p></div><div class="section" title="11.8.4. Close ResultScanners"><div class="titlepage"><div><div><h3 class="title"><a name="perf.hbase.client.scannerclose"></a>11.8.4. Close ResultScanners</h3></div></div></div><p>This isn't so much about improving performance but rather
|
||||
<span class="emphasis"><em>avoiding</em></span> performance problems. If you forget to
|
||||
close <a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/ResultScanner.html" target="_top">ResultScanners</a>
|
||||
you can cause problems on the RegionServers. Always have ResultScanner
|
||||
processing enclosed in try/catch blocks... </p><pre class="programlisting">
|
||||
Scan scan = new Scan();
|
||||
// set attrs...
|
||||
ResultScanner rs = htable.getScanner(scan);
|
||||
try {
|
||||
for (Result r = rs.next(); r != null; r = rs.next()) {
|
||||
// process result...
|
||||
} finally {
|
||||
rs.close(); // always close the ResultScanner!
|
||||
}
|
||||
htable.close();</pre></div><div class="section" title="11.8.5. Block Cache"><div class="titlepage"><div><div><h3 class="title"><a name="perf.hbase.client.blockcache"></a>11.8.5. Block Cache</h3></div></div></div><p><a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Scan.html" target="_top">Scan</a>
|
||||
instances can be set to use the block cache in the RegionServer via the
|
||||
<code class="methodname">setCacheBlocks</code> method. For input Scans to MapReduce jobs, this should be
|
||||
<code class="varname">false</code>. For frequently accessed rows, it is advisable to use the block
|
||||
cache.</p></div><div class="section" title="11.8.6. Optimal Loading of Row Keys"><div class="titlepage"><div><div><h3 class="title"><a name="perf.hbase.client.rowkeyonly"></a>11.8.6. Optimal Loading of Row Keys</h3></div></div></div><p>When performing a table <a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Scan.html" target="_top">scan</a>
|
||||
where only the row keys are needed (no families, qualifiers, values or timestamps), add a FilterList with a
|
||||
<code class="varname">MUST_PASS_ALL</code> operator to the scanner using <code class="methodname">setFilter</code>. The filter list
|
||||
should include both a <a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/filter/FirstKeyOnlyFilter.html" target="_top">FirstKeyOnlyFilter</a>
|
||||
and a <a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/filter/KeyOnlyFilter.html" target="_top">KeyOnlyFilter</a>.
|
||||
Using this filter combination will result in a worst case scenario of a RegionServer reading a single value from disk
|
||||
and minimal network traffic to the client for a single row.
|
||||
</p></div><div class="section" title="11.8.7. Concurrency: Monitor Data Spread"><div class="titlepage"><div><div><h3 class="title"><a name="perf.hbase.read.dist"></a>11.8.7. Concurrency: Monitor Data Spread</h3></div></div></div><p>When performing a high number of concurrent reads, monitor the data spread of the target tables. If the target table(s) have
|
||||
too few regions then the reads could likely be served from too few nodes. </p><p>See <a class="xref" href="perf.writing.html#precreate.regions" title="11.7.2. Table Creation: Pre-Creating Regions">Section 11.7.2, “
|
||||
Table Creation: Pre-Creating Regions
|
||||
”</a>, as well as <a class="xref" href="perf.configurations.html" title="11.4. HBase Configurations">Section 11.4, “HBase Configurations”</a> </p></div><div class="section" title="11.8.8. Bloom Filters"><div class="titlepage"><div><div><h3 class="title"><a name="blooms"></a>11.8.8. Bloom Filters</h3></div></div></div><p>Enabling Bloom Filters can save your having to go to disk and
|
||||
can help improve read latencys.</p><p><a class="link" href="http://en.wikipedia.org/wiki/Bloom_filter" target="_top">Bloom filters</a> were developed over in <a class="link" href="https://issues.apache.org/jira/browse/HBASE-1200" target="_top">HBase-1200
|
||||
Add bloomfilters</a>.<sup>[<a name="d2121e7149" href="#ftn.d2121e7149" class="footnote">28</a>]</sup><sup>[<a name="d2121e7161" href="#ftn.d2121e7161" class="footnote">29</a>]</sup></p><p>See also <a class="xref" href="perf.schema.html#schema.bloom" title="11.6.4. Bloom Filters">Section 11.6.4, “Bloom Filters”</a>.
|
||||
</p><div class="section" title="11.8.8.1. Bloom StoreFile footprint"><div class="titlepage"><div><div><h4 class="title"><a name="bloom_footprint"></a>11.8.8.1. Bloom StoreFile footprint</h4></div></div></div><p>Bloom filters add an entry to the <code class="classname">StoreFile</code>
|
||||
general <code class="classname">FileInfo</code> data structure and then two
|
||||
extra entries to the <code class="classname">StoreFile</code> metadata
|
||||
section.</p><div class="section" title="11.8.8.1.1. BloomFilter in the StoreFile FileInfo data structure"><div class="titlepage"><div><div><h5 class="title"><a name="d2121e7185"></a>11.8.8.1.1. BloomFilter in the <code class="classname">StoreFile</code>
|
||||
<code class="classname">FileInfo</code> data structure</h5></div></div></div><p><code class="classname">FileInfo</code> has a
|
||||
<code class="varname">BLOOM_FILTER_TYPE</code> entry which is set to
|
||||
<code class="varname">NONE</code>, <code class="varname">ROW</code> or
|
||||
<code class="varname">ROWCOL.</code></p></div><div class="section" title="11.8.8.1.2. BloomFilter entries in StoreFile metadata"><div class="titlepage"><div><div><h5 class="title"><a name="d2121e7209"></a>11.8.8.1.2. BloomFilter entries in <code class="classname">StoreFile</code>
|
||||
metadata</h5></div></div></div><p><code class="varname">BLOOM_FILTER_META</code> holds Bloom Size, Hash
|
||||
Function used, etc. Its small in size and is cached on
|
||||
<code class="classname">StoreFile.Reader</code> load</p><p><code class="varname">BLOOM_FILTER_DATA</code> is the actual bloomfilter
|
||||
data. Obtained on-demand. Stored in the LRU cache, if it is enabled
|
||||
(Its enabled by default).</p></div></div><div class="section" title="11.8.8.2. Bloom Filter Configuration"><div class="titlepage"><div><div><h4 class="title"><a name="config.bloom"></a>11.8.8.2. Bloom Filter Configuration</h4></div></div></div><div class="section" title="11.8.8.2.1. io.hfile.bloom.enabled global kill switch"><div class="titlepage"><div><div><h5 class="title"><a name="d2121e7229"></a>11.8.8.2.1. <code class="varname">io.hfile.bloom.enabled</code> global kill
|
||||
switch</h5></div></div></div><p><code class="code">io.hfile.bloom.enabled</code> in
|
||||
<code class="classname">Configuration</code> serves as the kill switch in case
|
||||
something goes wrong. Default = <code class="varname">true</code>.</p></div><div class="section" title="11.8.8.2.2. io.hfile.bloom.error.rate"><div class="titlepage"><div><div><h5 class="title"><a name="d2121e7244"></a>11.8.8.2.2. <code class="varname">io.hfile.bloom.error.rate</code></h5></div></div></div><p><code class="varname">io.hfile.bloom.error.rate</code> = average false
|
||||
positive rate. Default = 1%. Decrease rate by ½ (e.g. to .5%) == +1
|
||||
bit per bloom entry.</p></div><div class="section" title="11.8.8.2.3. io.hfile.bloom.max.fold"><div class="titlepage"><div><div><h5 class="title"><a name="d2121e7252"></a>11.8.8.2.3. <code class="varname">io.hfile.bloom.max.fold</code></h5></div></div></div><p><code class="varname">io.hfile.bloom.max.fold</code> = guaranteed minimum
|
||||
fold rate. Most people should leave this alone. Default = 7, or can
|
||||
collapse to at least 1/128th of original size. See the
|
||||
<span class="emphasis"><em>Development Process</em></span> section of the document <a class="link" href="https://issues.apache.org/jira/secure/attachment/12444007/Bloom_Filters_in_HBase.pdf" target="_top">BloomFilters
|
||||
in HBase</a> for more on what this option means.</p></div></div></div><div class="footnotes"><br><hr width="100" align="left"><div class="footnote"><p><sup>[<a id="ftn.d2121e7149" href="#d2121e7149" class="para">28</a>] </sup>For description of the development process -- why static blooms
|
||||
rather than dynamic -- and for an overview of the unique properties
|
||||
that pertain to blooms in HBase, as well as possible future
|
||||
directions, see the <span class="emphasis"><em>Development Process</em></span> section
|
||||
of the document <a class="link" href="https://issues.apache.org/jira/secure/attachment/12444007/Bloom_Filters_in_HBase.pdf" target="_top">BloomFilters
|
||||
in HBase</a> attached to <a class="link" href="https://issues.apache.org/jira/browse/HBASE-1200" target="_top">HBase-1200</a>.</p></div><div class="footnote"><p><sup>[<a id="ftn.d2121e7161" href="#d2121e7161" class="para">29</a>] </sup>The bloom filters described here are actually version two of
|
||||
blooms in HBase. In versions up to 0.19.x, HBase had a dynamic bloom
|
||||
option based on work done by the <a class="link" href="http://www.one-lab.org" target="_top">European Commission One-Lab
|
||||
Project 034819</a>. The core of the HBase bloom work was later
|
||||
pulled up into Hadoop to implement org.apache.hadoop.io.BloomMapFile.
|
||||
Version 1 of HBase blooms never worked that well. Version 2 is a
|
||||
rewrite from scratch though again it starts with the one-lab
|
||||
work.</p></div></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'perf.reading';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="perf.writing.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="performance.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="perf.deleting.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">11.7. Writing to HBase </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> 11.9. Deleting from HBase</td></tr></table></div></body></html>
|
|
@ -1,46 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>11.6. Schema Design</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="performance.html" title="Chapter 11. Apache HBase (TM) Performance Tuning"><link rel="prev" href="perf.zookeeper.html" title="11.5. ZooKeeper"><link rel="next" href="perf.writing.html" title="11.7. Writing to HBase"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">11.6. Schema Design</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="perf.zookeeper.html">Prev</a> </td><th width="60%" align="center">Chapter 11. Apache HBase (TM) Performance Tuning</th><td width="20%" align="right"> <a accesskey="n" href="perf.writing.html">Next</a></td></tr></table><hr></div><div class="section" title="11.6. Schema Design"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="perf.schema"></a>11.6. Schema Design</h2></div></div></div><div class="section" title="11.6.1. Number of Column Families"><div class="titlepage"><div><div><h3 class="title"><a name="perf.number.of.cfs"></a>11.6.1. Number of Column Families</h3></div></div></div><p>See <a class="xref" href="number.of.cfs.html" title="6.2. On the number of column families">Section 6.2, “
|
||||
On the number of column families
|
||||
”</a>.</p></div><div class="section" title="11.6.2. Key and Attribute Lengths"><div class="titlepage"><div><div><h3 class="title"><a name="perf.schema.keys"></a>11.6.2. Key and Attribute Lengths</h3></div></div></div><p>See <a class="xref" href="rowkey.design.html#keysize" title="6.3.2. Try to minimize row and column sizes">Section 6.3.2, “Try to minimize row and column sizes”</a>. See also <a class="xref" href="perf.schema.html#perf.compression.however" title="11.6.7.1. However...">Section 11.6.7.1, “However...”</a> for
|
||||
compression caveats.</p></div><div class="section" title="11.6.3. Table RegionSize"><div class="titlepage"><div><div><h3 class="title"><a name="schema.regionsize"></a>11.6.3. Table RegionSize</h3></div></div></div><p>The regionsize can be set on a per-table basis via <code class="code">setFileSize</code> on
|
||||
<a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/HTableDescriptor.html" target="_top">HTableDescriptor</a> in the
|
||||
event where certain tables require different regionsizes than the configured default regionsize.
|
||||
</p><p>See <a class="xref" href="perf.configurations.html#perf.number.of.regions" title="11.4.1. Number of Regions">Section 11.4.1, “Number of Regions”</a> for more information.
|
||||
</p></div><div class="section" title="11.6.4. Bloom Filters"><div class="titlepage"><div><div><h3 class="title"><a name="schema.bloom"></a>11.6.4. Bloom Filters</h3></div></div></div><p>Bloom Filters can be enabled per-ColumnFamily.
|
||||
Use <code class="code">HColumnDescriptor.setBloomFilterType(NONE | ROW |
|
||||
ROWCOL)</code> to enable blooms per Column Family. Default =
|
||||
<code class="varname">NONE</code> for no bloom filters. If
|
||||
<code class="varname">ROW</code>, the hash of the row will be added to the bloom
|
||||
on each insert. If <code class="varname">ROWCOL</code>, the hash of the row +
|
||||
column family + column family qualifier will be added to the bloom on
|
||||
each key insert.</p><p>See <a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/HColumnDescriptor.html" target="_top">HColumnDescriptor</a> and
|
||||
<a class="xref" href="perf.reading.html#blooms" title="11.8.8. Bloom Filters">Section 11.8.8, “Bloom Filters”</a> for more information or this answer up in quora,
|
||||
<a class="link" href="http://www.quora.com/How-are-bloom-filters-used-in-HBase" target="_top">How are bloom filters used in HBase?</a>.
|
||||
</p></div><div class="section" title="11.6.5. ColumnFamily BlockSize"><div class="titlepage"><div><div><h3 class="title"><a name="schema.cf.blocksize"></a>11.6.5. ColumnFamily BlockSize</h3></div></div></div><p>The blocksize can be configured for each ColumnFamily in a table, and this defaults to 64k. Larger cell values require larger blocksizes.
|
||||
There is an inverse relationship between blocksize and the resulting StoreFile indexes (i.e., if the blocksize is doubled then the resulting
|
||||
indexes should be roughly halved).
|
||||
</p><p>See <a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/HColumnDescriptor.html" target="_top">HColumnDescriptor</a>
|
||||
and <a class="xref" href="regions.arch.html#store" title="9.7.5. Store">Section 9.7.5, “Store”</a>for more information.
|
||||
</p></div><div class="section" title="11.6.6. In-Memory ColumnFamilies"><div class="titlepage"><div><div><h3 class="title"><a name="cf.in.memory"></a>11.6.6. In-Memory ColumnFamilies</h3></div></div></div><p>ColumnFamilies can optionally be defined as in-memory. Data is still persisted to disk, just like any other ColumnFamily.
|
||||
In-memory blocks have the highest priority in the <a class="xref" href="regionserver.arch.html#block.cache" title="9.6.4. Block Cache">Section 9.6.4, “Block Cache”</a>, but it is not a guarantee that the entire table
|
||||
will be in memory.
|
||||
</p><p>See <a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/HColumnDescriptor.html" target="_top">HColumnDescriptor</a> for more information.
|
||||
</p></div><div class="section" title="11.6.7. Compression"><div class="titlepage"><div><div><h3 class="title"><a name="perf.compression"></a>11.6.7. Compression</h3></div></div></div><p>Production systems should use compression with their ColumnFamily definitions. See <a class="xref" href="compression.html" title="Appendix C. Compression In HBase">Appendix C, <i>Compression In HBase</i></a> for more information.
|
||||
</p><div class="section" title="11.6.7.1. However..."><div class="titlepage"><div><div><h4 class="title"><a name="perf.compression.however"></a>11.6.7.1. However...</h4></div></div></div><p>Compression deflates data <span class="emphasis"><em>on disk</em></span>. When it's in-memory (e.g., in the
|
||||
MemStore) or on the wire (e.g., transferring between RegionServer and Client) it's inflated.
|
||||
So while using ColumnFamily compression is a best practice, but it's not going to completely eliminate
|
||||
the impact of over-sized Keys, over-sized ColumnFamily names, or over-sized Column names.
|
||||
</p><p>See <a class="xref" href="rowkey.design.html#keysize" title="6.3.2. Try to minimize row and column sizes">Section 6.3.2, “Try to minimize row and column sizes”</a> on for schema design tips, and <a class="xref" href="regions.arch.html#keyvalue" title="9.7.5.4. KeyValue">Section 9.7.5.4, “KeyValue”</a> for more information on HBase stores data internally.
|
||||
</p></div></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'perf.schema';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="perf.zookeeper.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="performance.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="perf.writing.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">11.5. ZooKeeper </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> 11.7. Writing to HBase</td></tr></table></div></body></html>
|
|
@ -1,76 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>11.7. Writing to HBase</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="performance.html" title="Chapter 11. Apache HBase (TM) Performance Tuning"><link rel="prev" href="perf.schema.html" title="11.6. Schema Design"><link rel="next" href="perf.reading.html" title="11.8. Reading from HBase"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">11.7. Writing to HBase</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="perf.schema.html">Prev</a> </td><th width="60%" align="center">Chapter 11. Apache HBase (TM) Performance Tuning</th><td width="20%" align="right"> <a accesskey="n" href="perf.reading.html">Next</a></td></tr></table><hr></div><div class="section" title="11.7. Writing to HBase"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="perf.writing"></a>11.7. Writing to HBase</h2></div></div></div><div class="section" title="11.7.1. Batch Loading"><div class="titlepage"><div><div><h3 class="title"><a name="perf.batch.loading"></a>11.7.1. Batch Loading</h3></div></div></div><p>Use the bulk load tool if you can. See
|
||||
<a class="xref" href="arch.bulk.load.html" title="9.8. Bulk Loading">Section 9.8, “Bulk Loading”</a>.
|
||||
Otherwise, pay attention to the below.
|
||||
</p></div><div class="section" title="11.7.2. Table Creation: Pre-Creating Regions"><div class="titlepage"><div><div><h3 class="title"><a name="precreate.regions"></a>11.7.2.
|
||||
Table Creation: Pre-Creating Regions
|
||||
</h3></div></div></div><p>
|
||||
Tables in HBase are initially created with one region by default. For bulk imports, this means that all clients will write to the same region
|
||||
until it is large enough to split and become distributed across the cluster. A useful pattern to speed up the bulk import process is to pre-create empty regions.
|
||||
Be somewhat conservative in this, because too-many regions can actually degrade performance.
|
||||
</p><p>There are two different approaches to pre-creating splits. The first approach is to rely on the default <code class="code">HBaseAdmin</code> strategy
|
||||
(which is implemented in <code class="code">Bytes.split</code>)...
|
||||
</p><pre class="programlisting">
|
||||
byte[] startKey = ...; // your lowest keuy
|
||||
byte[] endKey = ...; // your highest key
|
||||
int numberOfRegions = ...; // # of regions to create
|
||||
admin.createTable(table, startKey, endKey, numberOfRegions);
|
||||
</pre><p>And the other approach is to define the splits yourself...
|
||||
</p><pre class="programlisting">
|
||||
byte[][] splits = ...; // create your own splits
|
||||
admin.createTable(table, splits);
|
||||
</pre><p>
|
||||
See <a class="xref" href="rowkey.design.html#rowkey.regionsplits" title="6.3.6. Relationship Between RowKeys and Region Splits">Section 6.3.6, “Relationship Between RowKeys and Region Splits”</a> for issues related to understanding your keyspace and pre-creating regions.
|
||||
</p></div><div class="section" title="11.7.3. Table Creation: Deferred Log Flush"><div class="titlepage"><div><div><h3 class="title"><a name="def.log.flush"></a>11.7.3.
|
||||
Table Creation: Deferred Log Flush
|
||||
</h3></div></div></div><p>
|
||||
The default behavior for Puts using the Write Ahead Log (WAL) is that <code class="classname">HLog</code> edits will be written immediately. If deferred log flush is used,
|
||||
WAL edits are kept in memory until the flush period. The benefit is aggregated and asynchronous <code class="classname">HLog</code>- writes, but the potential downside is that if
|
||||
the RegionServer goes down the yet-to-be-flushed edits are lost. This is safer, however, than not using WAL at all with Puts.
|
||||
</p><p>
|
||||
Deferred log flush can be configured on tables via <a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/HTableDescriptor.html" target="_top">HTableDescriptor</a>. The default value of <code class="varname">hbase.regionserver.optionallogflushinterval</code> is 1000ms.
|
||||
</p></div><div class="section" title="11.7.4. HBase Client: AutoFlush"><div class="titlepage"><div><div><h3 class="title"><a name="perf.hbase.client.autoflush"></a>11.7.4. HBase Client: AutoFlush</h3></div></div></div><p>When performing a lot of Puts, make sure that setAutoFlush is set
|
||||
to false on your <a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/HTable.html" target="_top">HTable</a>
|
||||
instance. Otherwise, the Puts will be sent one at a time to the
|
||||
RegionServer. Puts added via <code class="code"> htable.add(Put)</code> and <code class="code"> htable.add( <List> Put)</code>
|
||||
wind up in the same write buffer. If <code class="code">autoFlush = false</code>,
|
||||
these messages are not sent until the write-buffer is filled. To
|
||||
explicitly flush the messages, call <code class="methodname">flushCommits</code>.
|
||||
Calling <code class="methodname">close</code> on the <code class="classname">HTable</code>
|
||||
instance will invoke <code class="methodname">flushCommits</code>.</p></div><div class="section" title="11.7.5. HBase Client: Turn off WAL on Puts"><div class="titlepage"><div><div><h3 class="title"><a name="perf.hbase.client.putwal"></a>11.7.5. HBase Client: Turn off WAL on Puts</h3></div></div></div><p>A frequently discussed option for increasing throughput on <code class="classname">Put</code>s is to call <code class="code">writeToWAL(false)</code>. Turning this off means
|
||||
that the RegionServer will <span class="emphasis"><em>not</em></span> write the <code class="classname">Put</code> to the Write Ahead Log,
|
||||
only into the memstore, HOWEVER the consequence is that if there
|
||||
is a RegionServer failure <span class="emphasis"><em>there will be data loss</em></span>.
|
||||
If <code class="code">writeToWAL(false)</code> is used, do so with extreme caution. You may find in actuality that
|
||||
it makes little difference if your load is well distributed across the cluster.
|
||||
</p><p>In general, it is best to use WAL for Puts, and where loading throughput
|
||||
is a concern to use <a class="link" href="perf.writing.html#perf.batch.loading" title="11.7.1. Batch Loading">bulk loading</a> techniques instead.
|
||||
</p></div><div class="section" title="11.7.6. HBase Client: Group Puts by RegionServer"><div class="titlepage"><div><div><h3 class="title"><a name="perf.hbase.client.regiongroup"></a>11.7.6. HBase Client: Group Puts by RegionServer</h3></div></div></div><p>In addition to using the writeBuffer, grouping <code class="classname">Put</code>s by RegionServer can reduce the number of client RPC calls per writeBuffer flush.
|
||||
There is a utility <code class="classname">HTableUtil</code> currently on TRUNK that does this, but you can either copy that or implement your own verison for
|
||||
those still on 0.90.x or earlier.
|
||||
</p></div><div class="section" title="11.7.7. MapReduce: Skip The Reducer"><div class="titlepage"><div><div><h3 class="title"><a name="perf.hbase.write.mr.reducer"></a>11.7.7. MapReduce: Skip The Reducer</h3></div></div></div><p>When writing a lot of data to an HBase table from a MR job (e.g., with <a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/mapreduce/TableOutputFormat.html" target="_top">TableOutputFormat</a>), and specifically where Puts are being emitted
|
||||
from the Mapper, skip the Reducer step. When a Reducer step is used, all of the output (Puts) from the Mapper will get spooled to disk, then sorted/shuffled to other
|
||||
Reducers that will most likely be off-node. It's far more efficient to just write directly to HBase.
|
||||
</p><p>For summary jobs where HBase is used as a source and a sink, then writes will be coming from the Reducer step (e.g., summarize values then write out result).
|
||||
This is a different processing problem than from the the above case.
|
||||
</p></div><div class="section" title="11.7.8. Anti-Pattern: One Hot Region"><div class="titlepage"><div><div><h3 class="title"><a name="perf.one.region"></a>11.7.8. Anti-Pattern: One Hot Region</h3></div></div></div><p>If all your data is being written to one region at a time, then re-read the
|
||||
section on processing <a class="link" href="rowkey.design.html#timeseries" title="6.3.1. Monotonically Increasing Row Keys/Timeseries Data">timeseries</a> data.</p><p>Also, if you are pre-splitting regions and all your data is <span class="emphasis"><em>still</em></span> winding up in a single region even though
|
||||
your keys aren't monotonically increasing, confirm that your keyspace actually works with the split strategy. There are a
|
||||
variety of reasons that regions may appear "well split" but won't work with your data. As
|
||||
the HBase client communicates directly with the RegionServers, this can be obtained via
|
||||
<a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/HTable.html#getRegionLocation%28byte[]%29" target="_top">HTable.getRegionLocation</a>.
|
||||
</p><p>See <a class="xref" href="perf.writing.html#precreate.regions" title="11.7.2. Table Creation: Pre-Creating Regions">Section 11.7.2, “
|
||||
Table Creation: Pre-Creating Regions
|
||||
”</a>, as well as <a class="xref" href="perf.configurations.html" title="11.4. HBase Configurations">Section 11.4, “HBase Configurations”</a> </p></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'perf.writing';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="perf.schema.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="performance.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="perf.reading.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">11.6. Schema Design </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> 11.8. Reading from HBase</td></tr></table></div></body></html>
|
|
@ -1,16 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>11.5. ZooKeeper</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="performance.html" title="Chapter 11. Apache HBase (TM) Performance Tuning"><link rel="prev" href="perf.configurations.html" title="11.4. HBase Configurations"><link rel="next" href="perf.schema.html" title="11.6. Schema Design"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">11.5. ZooKeeper</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="perf.configurations.html">Prev</a> </td><th width="60%" align="center">Chapter 11. Apache HBase (TM) Performance Tuning</th><td width="20%" align="right"> <a accesskey="n" href="perf.schema.html">Next</a></td></tr></table><hr></div><div class="section" title="11.5. ZooKeeper"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="perf.zookeeper"></a>11.5. ZooKeeper</h2></div></div></div><p>See <a class="xref" href="zookeeper.html" title="Chapter 16. ZooKeeper">Chapter 16, <i>ZooKeeper</i></a> for information on configuring ZooKeeper, and see the part
|
||||
about having a dedicated disk.
|
||||
</p></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'perf.zookeeper';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="perf.configurations.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="performance.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="perf.schema.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">11.4. HBase Configurations </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> 11.6. Schema Design</td></tr></table></div></body></html>
|
File diff suppressed because one or more lines are too long
|
@ -1,36 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>5.2. Physical View</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="datamodel.html" title="Chapter 5. Data Model"><link rel="prev" href="datamodel.html" title="Chapter 5. Data Model"><link rel="next" href="table.html" title="5.3. Table"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">5.2. Physical View</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="datamodel.html">Prev</a> </td><th width="60%" align="center">Chapter 5. Data Model</th><td width="20%" align="right"> <a accesskey="n" href="table.html">Next</a></td></tr></table><hr></div><div class="section" title="5.2. Physical View"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="physical.view"></a>5.2. Physical View</h2></div></div></div><p>
|
||||
Although at a conceptual level tables may be viewed as a sparse set of rows.
|
||||
Physically they are stored on a per-column family basis. New columns
|
||||
(i.e., <code class="varname">columnfamily:column</code>) can be added to any
|
||||
column family without pre-announcing them.
|
||||
</p><div class="table"><a name="d2121e3006"></a><p class="title"><b>Table 5.2. ColumnFamily <code class="varname">anchor</code></b></p><div class="table-contents"><table summary="ColumnFamily anchor" border="1"><colgroup><col align="left" class="c1"><col align="left" class="c2"><col align="left" class="c3"></colgroup><thead><tr><th align="left">Row Key</th><th align="left">Time Stamp</th><th align="left">Column Family <code class="varname">anchor</code></th></tr></thead><tbody><tr><td align="left">"com.cnn.www"</td><td align="left">t9</td><td align="left"><code class="varname">anchor:cnnsi.com</code> = "CNN"</td></tr><tr><td align="left">"com.cnn.www"</td><td align="left">t8</td><td align="left"><code class="varname">anchor:my.look.ca</code> = "CNN.com"</td></tr></tbody></table></div></div><p><br class="table-break">
|
||||
</p><div class="table"><a name="d2121e3045"></a><p class="title"><b>Table 5.3. ColumnFamily <code class="varname">contents</code></b></p><div class="table-contents"><table summary="ColumnFamily contents" border="1"><colgroup><col align="left" class="c1"><col align="left" class="c2"><col align="left" class="c3"></colgroup><thead><tr><th align="left">Row Key</th><th align="left">Time Stamp</th><th align="left">ColumnFamily "contents:"</th></tr></thead><tbody><tr><td align="left">"com.cnn.www"</td><td align="left">t6</td><td align="left"><code class="varname">contents:html</code> = "<html>..."</td></tr><tr><td align="left">"com.cnn.www"</td><td align="left">t5</td><td align="left"><code class="varname">contents:html</code> = "<html>..."</td></tr><tr><td align="left">"com.cnn.www"</td><td align="left">t3</td><td align="left"><code class="varname">contents:html</code> = "<html>..."</td></tr></tbody></table></div></div><p><br class="table-break">
|
||||
It is important to note in the diagram above that the empty cells shown in the
|
||||
conceptual view are not stored since they need not be in a column-oriented
|
||||
storage format. Thus a request for the value of the <code class="varname">contents:html</code>
|
||||
column at time stamp <code class="literal">t8</code> would return no value. Similarly, a
|
||||
request for an <code class="varname">anchor:my.look.ca</code> value at time stamp
|
||||
<code class="literal">t9</code> would return no value. However, if no timestamp is
|
||||
supplied, the most recent value for a particular column would be returned
|
||||
and would also be the first one found since timestamps are stored in
|
||||
descending order. Thus a request for the values of all columns in the row
|
||||
<code class="varname">com.cnn.www</code> if no timestamp is specified would be:
|
||||
the value of <code class="varname">contents:html</code> from time stamp
|
||||
<code class="literal">t6</code>, the value of <code class="varname">anchor:cnnsi.com</code>
|
||||
from time stamp <code class="literal">t9</code>, the value of
|
||||
<code class="varname">anchor:my.look.ca</code> from time stamp <code class="literal">t8</code>.
|
||||
</p><p>For more information about the internals of how Apache HBase stores data, see <a class="xref" href="regions.arch.html" title="9.7. Regions">Section 9.7, “Regions”</a>.
|
||||
</p></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'physical.view';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="datamodel.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="datamodel.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="table.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">Chapter 5. Data Model </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> 5.3. Table</td></tr></table></div></body></html>
|
|
@ -1,40 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>Preface</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="prev" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="next" href="getting_started.html" title="Chapter 1. Getting Started"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">Preface</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="book.html">Prev</a> </td><th width="60%" align="center"> </th><td width="20%" align="right"> <a accesskey="n" href="getting_started.html">Next</a></td></tr></table><hr></div><div class="preface" title="Preface"><div class="titlepage"><div><div><h2 class="title"><a name="preface"></a>Preface</h2></div></div></div><p>This is the official reference guide for the <a class="link" href="http://hbase.apache.org/" target="_top">HBase</a> version it ships with.
|
||||
This document describes HBase version <span class="emphasis"><em>0.95-SNAPSHOT</em></span>.
|
||||
Herein you will find either the definitive documentation on an HBase topic
|
||||
as of its standing when the referenced HBase version shipped, or it
|
||||
will point to the location in <a class="link" href="http://hbase.apache.org/apidocs/index.html" target="_top">javadoc</a>,
|
||||
<a class="link" href="https://issues.apache.org/jira/browse/HBASE" target="_top">JIRA</a>
|
||||
or <a class="link" href="http://wiki.apache.org/hadoop/Hbase" target="_top">wiki</a> where
|
||||
the pertinent information can be found.</p><p>This reference guide is a work in progress. Feel free to add content by adding
|
||||
a patch to an issue up in the HBase <a class="link" href="https://issues.apache.org/jira/browse/HBASE" target="_top">JIRA</a>.</p><div class="note" title="Heads-up" style="margin-left: 0.5in; margin-right: 0.5in;"><h3 class="title"><a name="headsup"></a>Heads-up</h3><p>
|
||||
If this is your first foray into the wonderful world of
|
||||
Distributed Computing, then you are in for
|
||||
some interesting times. First off, distributed systems are
|
||||
hard; making a distributed system hum requires a disparate
|
||||
skillset that spans systems (hardware and software) and
|
||||
networking. Your cluster' operation can hiccup because of any
|
||||
of a myriad set of reasons from bugs in HBase itself through misconfigurations
|
||||
-- misconfiguration of HBase but also operating system misconfigurations --
|
||||
through to hardware problems whether it be a bug in your network card
|
||||
drivers or an underprovisioned RAM bus (to mention two recent
|
||||
examples of hardware issues that manifested as "HBase is slow").
|
||||
You will also need to do a recalibration if up to this your
|
||||
computing has been bound to a single box. Here is one good
|
||||
starting point:
|
||||
<a class="link" href="http://en.wikipedia.org/wiki/Fallacies_of_Distributed_Computing" target="_top">Fallacies of Distributed Computing</a>.
|
||||
</p></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'preface';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="book.html">Prev</a> </td><td width="20%" align="center"> </td><td width="40%" align="right"> <a accesskey="n" href="getting_started.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">
|
||||
The Apache HBase™ Reference Guide
|
||||
</td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> Chapter 1. Getting Started</td></tr></table></div></body></html>
|
|
@ -1,110 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>1.2. Quick Start</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="getting_started.html" title="Chapter 1. Getting Started"><link rel="prev" href="getting_started.html" title="Chapter 1. Getting Started"><link rel="next" href="configuration.html" title="Chapter 2. Apache HBase (TM) Configuration"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">1.2. Quick Start</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="getting_started.html">Prev</a> </td><th width="60%" align="center">Chapter 1. Getting Started</th><td width="20%" align="right"> <a accesskey="n" href="configuration.html">Next</a></td></tr></table><hr></div><div class="section" title="1.2. Quick Start"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="quickstart"></a>1.2. Quick Start</h2></div></div></div><p>This guide describes setup of a standalone HBase instance that uses
|
||||
the local filesystem. It leads you through creating a table, inserting
|
||||
rows via the HBase <span class="command"><strong>shell</strong></span>, and then cleaning
|
||||
up and shutting down your standalone HBase instance. The below exercise
|
||||
should take no more than ten minutes (not including download time).</p><p>Before we proceed, make sure you are good on the below loopback prerequisite.</p><div class="note" title="Loopback IP" style="margin-left: 0.5in; margin-right: 0.5in;"><h3 class="title"><a name="loopback.ip.getting.started"></a>Loopback IP</h3><p>HBase expects the loopback IP address to be 127.0.0.1. Ubuntu and some other distributions,
|
||||
for example, will default to 127.0.1.1 and this will cause problems for you.
|
||||
</p><p><code class="filename">/etc/hosts</code> should look something like this:
|
||||
</p><pre class="programlisting">
|
||||
127.0.0.1 localhost
|
||||
127.0.0.1 ubuntu.ubuntu-domain ubuntu
|
||||
</pre><p>
|
||||
</p></div><div class="section" title="1.2.1. Download and unpack the latest stable release."><div class="titlepage"><div><div><h3 class="title"><a name="d2121e105"></a>1.2.1. Download and unpack the latest stable release.</h3></div></div></div><p>Choose a download site from this list of <a class="link" href="http://www.apache.org/dyn/closer.cgi/hbase/" target="_top">Apache Download
|
||||
Mirrors</a>. Click on the suggested top link. This will take you to a
|
||||
mirror of <span class="emphasis"><em>HBase Releases</em></span>. Click on the folder named
|
||||
<code class="filename">stable</code> and then download the file that ends in
|
||||
<code class="filename">.tar.gz</code> to your local filesystem; e.g.
|
||||
<code class="filename">hbase-0.94.2.tar.gz</code>.</p><p>Decompress and untar your download and then change into the
|
||||
unpacked directory.</p><pre class="programlisting">$ tar xfz hbase-0.95-SNAPSHOT.tar.gz
|
||||
$ cd hbase-0.95-SNAPSHOT
|
||||
</pre><p>At this point, you are ready to start HBase. But before starting
|
||||
it, edit <code class="filename">conf/hbase-site.xml</code>, the file you write
|
||||
your site-specific configurations into. Set
|
||||
<code class="varname">hbase.rootdir</code>, the directory HBase writes data to,
|
||||
and <code class="varname">hbase.zookeeper.property.dataDir</code>, the director
|
||||
ZooKeeper writes its data too:
|
||||
</p><pre class="programlisting"><?xml version="1.0"?>
|
||||
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||
<configuration>
|
||||
<property>
|
||||
<name>hbase.rootdir</name>
|
||||
<value>file:///DIRECTORY/hbase</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hbase.zookeeper.property.dataDir</name>
|
||||
<value>/DIRECTORY/zookeeper</value>
|
||||
</property>
|
||||
</configuration></pre><p> Replace <code class="varname">DIRECTORY</code> in the above with the
|
||||
path to the directory you would have HBase and ZooKeeper write their data. By default,
|
||||
<code class="varname">hbase.rootdir</code> is set to <code class="filename">/tmp/hbase-${user.name}</code>
|
||||
and similarly so for the default ZooKeeper data location which means you'll lose all
|
||||
your data whenever your server reboots unless you change it (Most operating systems clear
|
||||
<code class="filename">/tmp</code> on restart).</p></div><div class="section" title="1.2.2. Start HBase"><div class="titlepage"><div><div><h3 class="title"><a name="start_hbase"></a>1.2.2. Start HBase</h3></div></div></div><p>Now start HBase:</p><pre class="programlisting">$ ./bin/start-hbase.sh
|
||||
starting Master, logging to logs/hbase-user-master-example.org.out</pre><p>You should now have a running standalone HBase instance. In
|
||||
standalone mode, HBase runs all daemons in the the one JVM; i.e. both
|
||||
the HBase and ZooKeeper daemons. HBase logs can be found in the
|
||||
<code class="filename">logs</code> subdirectory. Check them out especially if
|
||||
it seems HBase had trouble starting.</p><div class="note" title="Is java installed?" style="margin-left: 0.5in; margin-right: 0.5in;"><h3 class="title">Is <span class="application">java</span> installed?</h3><p>All of the above presumes a 1.6 version of Oracle
|
||||
<span class="application">java</span> is installed on your machine and
|
||||
available on your path (See <a class="xref" href="configuration.html#java" title="2.1.1. Java">Section 2.1.1, “Java”</a>); i.e. when you type
|
||||
<span class="application">java</span>, you see output that describes the
|
||||
options the java program takes (HBase requires java 6). If this is not
|
||||
the case, HBase will not start. Install java, edit
|
||||
<code class="filename">conf/hbase-env.sh</code>, uncommenting the
|
||||
<code class="envar">JAVA_HOME</code> line pointing it to your java install, then,
|
||||
retry the steps above.</p></div></div><div class="section" title="1.2.3. Shell Exercises"><div class="titlepage"><div><div><h3 class="title"><a name="shell_exercises"></a>1.2.3. Shell Exercises</h3></div></div></div><p>Connect to your running HBase via the <span class="command"><strong>shell</strong></span>.</p><pre class="programlisting">$ ./bin/hbase shell
|
||||
HBase Shell; enter 'help<RETURN>' for list of supported commands.
|
||||
Type "exit<RETURN>" to leave the HBase Shell
|
||||
Version: 0.90.0, r1001068, Fri Sep 24 13:55:42 PDT 2010
|
||||
|
||||
hbase(main):001:0> </pre><p>Type <span class="command"><strong>help</strong></span> and then
|
||||
<span class="command"><strong><RETURN></strong></span> to see a listing of shell commands and
|
||||
options. Browse at least the paragraphs at the end of the help emission
|
||||
for the gist of how variables and command arguments are entered into the
|
||||
HBase shell; in particular note how table names, rows, and columns,
|
||||
etc., must be quoted.</p><p>Create a table named <code class="varname">test</code> with a single column family named <code class="varname">cf</code>.
|
||||
Verify its creation by listing all tables and then insert some
|
||||
values.</p><pre class="programlisting">hbase(main):003:0> create 'test', 'cf'
|
||||
0 row(s) in 1.2200 seconds
|
||||
hbase(main):003:0> list 'test'
|
||||
..
|
||||
1 row(s) in 0.0550 seconds
|
||||
hbase(main):004:0> put 'test', 'row1', 'cf:a', 'value1'
|
||||
0 row(s) in 0.0560 seconds
|
||||
hbase(main):005:0> put 'test', 'row2', 'cf:b', 'value2'
|
||||
0 row(s) in 0.0370 seconds
|
||||
hbase(main):006:0> put 'test', 'row3', 'cf:c', 'value3'
|
||||
0 row(s) in 0.0450 seconds</pre><p>Above we inserted 3 values, one at a time. The first insert is at
|
||||
<code class="varname">row1</code>, column <code class="varname">cf:a</code> with a value of
|
||||
<code class="varname">value1</code>. Columns in HBase are comprised of a column family prefix --
|
||||
<code class="varname">cf</code> in this example -- followed by a colon and then a
|
||||
column qualifier suffix (<code class="varname">a</code> in this case).</p><p>Verify the data insert by running a scan of the table as follows</p><pre class="programlisting">hbase(main):007:0> scan 'test'
|
||||
ROW COLUMN+CELL
|
||||
row1 column=cf:a, timestamp=1288380727188, value=value1
|
||||
row2 column=cf:b, timestamp=1288380738440, value=value2
|
||||
row3 column=cf:c, timestamp=1288380747365, value=value3
|
||||
3 row(s) in 0.0590 seconds</pre><p>Get a single row</p><pre class="programlisting">hbase(main):008:0> get 'test', 'row1'
|
||||
COLUMN CELL
|
||||
cf:a timestamp=1288380727188, value=value1
|
||||
1 row(s) in 0.0400 seconds</pre><p>Now, disable and drop your table. This will clean up all done
|
||||
above.</p><pre class="programlisting">hbase(main):012:0> disable 'test'
|
||||
0 row(s) in 1.0930 seconds
|
||||
hbase(main):013:0> drop 'test'
|
||||
0 row(s) in 0.0770 seconds </pre><p>Exit the shell by typing exit.</p><pre class="programlisting">hbase(main):014:0> exit</pre></div><div class="section" title="1.2.4. Stopping HBase"><div class="titlepage"><div><div><h3 class="title"><a name="stopping"></a>1.2.4. Stopping HBase</h3></div></div></div><p>Stop your hbase instance by running the stop script.</p><pre class="programlisting">$ ./bin/stop-hbase.sh
|
||||
stopping hbase...............</pre></div><div class="section" title="1.2.5. Where to go next"><div class="titlepage"><div><div><h3 class="title"><a name="d2121e265"></a>1.2.5. Where to go next</h3></div></div></div><p>The above described standalone setup is good for testing and
|
||||
experiments only. In the next chapter, <a class="xref" href="configuration.html" title="Chapter 2. Apache HBase (TM) Configuration">Chapter 2, <i>Apache HBase (TM) Configuration</i></a>,
|
||||
we'll go into depth on the different HBase run modes, system requirements
|
||||
running HBase, and critical configurations setting up a distributed HBase deploy.</p></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'quickstart';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="getting_started.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="getting_started.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="configuration.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">Chapter 1. Getting Started </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> Chapter 2. Apache HBase (TM) Configuration</td></tr></table></div></body></html>
|
|
@ -1,179 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>9.7. Regions</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="architecture.html" title="Chapter 9. Architecture"><link rel="prev" href="regionserver.arch.html" title="9.6. RegionServer"><link rel="next" href="arch.bulk.load.html" title="9.8. Bulk Loading"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">9.7. Regions</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="regionserver.arch.html">Prev</a> </td><th width="60%" align="center">Chapter 9. Architecture</th><td width="20%" align="right"> <a accesskey="n" href="arch.bulk.load.html">Next</a></td></tr></table><hr></div><div class="section" title="9.7. Regions"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="regions.arch"></a>9.7. Regions</h2></div></div></div><p>Regions are the basic element of availability and
|
||||
distribution for tables, and are comprised of a Store per Column Family. The heirarchy of objects
|
||||
is as follows:
|
||||
</p><pre class="programlisting">
|
||||
<code class="filename">Table</code> (HBase table)
|
||||
<code class="filename">Region</code> (Regions for the table)
|
||||
<code class="filename">Store</code> (Store per ColumnFamily for each Region for the table)
|
||||
<code class="filename">MemStore</code> (MemStore for each Store for each Region for the table)
|
||||
<code class="filename">StoreFile</code> (StoreFiles for each Store for each Region for the table)
|
||||
<code class="filename">Block</code> (Blocks within a StoreFile within a Store for each Region for the table)
|
||||
</pre><p>
|
||||
For a description of what HBase files look like when written to HDFS, see <a class="xref" href="trouble.namenode.html#trouble.namenode.hbase.objects" title="12.7.2. Browsing HDFS for HBase Objects">Section 12.7.2, “Browsing HDFS for HBase Objects”</a>.
|
||||
</p><div class="section" title="9.7.1. Region Size"><div class="titlepage"><div><div><h3 class="title"><a name="arch.regions.size"></a>9.7.1. Region Size</h3></div></div></div><p>Determining the "right" region size can be tricky, and there are a few factors
|
||||
to consider:</p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><p>HBase scales by having regions across many servers. Thus if
|
||||
you have 2 regions for 16GB data, on a 20 node machine your data
|
||||
will be concentrated on just a few machines - nearly the entire
|
||||
cluster will be idle. This really cant be stressed enough, since a
|
||||
common problem is loading 200MB data into HBase then wondering why
|
||||
your awesome 10 node cluster isn't doing anything.</p></li><li class="listitem"><p>On the other hand, high region count has been known to make things slow.
|
||||
This is getting better with each release of HBase, but it is probably better to have
|
||||
700 regions than 3000 for the same amount of data.</p></li><li class="listitem"><p>There is not much memory footprint difference between 1 region
|
||||
and 10 in terms of indexes, etc, held by the RegionServer.</p></li></ul></div><p>When starting off, it's probably best to stick to the default region-size, perhaps going
|
||||
smaller for hot tables (or manually split hot regions to spread the load over
|
||||
the cluster), or go with larger region sizes if your cell sizes tend to be
|
||||
largish (100k and up).</p><p>See <a class="xref" href="important_configurations.html#bigger.regions" title="2.5.2.6. Bigger Regions">Section 2.5.2.6, “Bigger Regions”</a> for more information on configuration.
|
||||
</p></div><div class="section" title="9.7.2. Region-RegionServer Assignment"><div class="titlepage"><div><div><h3 class="title"><a name="regions.arch.assignment"></a>9.7.2. Region-RegionServer Assignment</h3></div></div></div><p>This section describes how Regions are assigned to RegionServers.
|
||||
</p><div class="section" title="9.7.2.1. Startup"><div class="titlepage"><div><div><h4 class="title"><a name="regions.arch.assignment.startup"></a>9.7.2.1. Startup</h4></div></div></div><p>When HBase starts regions are assigned as follows (short version):
|
||||
</p><div class="orderedlist"><ol class="orderedlist" type="1"><li class="listitem">The Master invokes the <code class="code">AssignmentManager</code> upon startup.
|
||||
</li><li class="listitem">The <code class="code">AssignmentManager</code> looks at the existing region assignments in META.
|
||||
</li><li class="listitem">If the region assignment is still valid (i.e., if the RegionServer is still online)
|
||||
then the assignment is kept.
|
||||
</li><li class="listitem">If the assignment is invalid, then the <code class="code">LoadBalancerFactory</code> is invoked to assign the
|
||||
region. The <code class="code">DefaultLoadBalancer</code> will randomly assign the region to a RegionServer.
|
||||
</li><li class="listitem">META is updated with the RegionServer assignment (if needed) and the RegionServer start codes
|
||||
(start time of the RegionServer process) upon region opening by the RegionServer.
|
||||
</li></ol></div><p>
|
||||
</p></div><div class="section" title="9.7.2.2. Failover"><div class="titlepage"><div><div><h4 class="title"><a name="regions.arch.assignment.failover"></a>9.7.2.2. Failover</h4></div></div></div><p>When a RegionServer fails (short version):
|
||||
</p><div class="orderedlist"><ol class="orderedlist" type="1"><li class="listitem">The regions immediately become unavailable because the RegionServer is down.
|
||||
</li><li class="listitem">The Master will detect that the RegionServer has failed.
|
||||
</li><li class="listitem">The region assignments will be considered invalid and will be re-assigned just
|
||||
like the startup sequence.
|
||||
</li></ol></div><p>
|
||||
</p></div><div class="section" title="9.7.2.3. Region Load Balancing"><div class="titlepage"><div><div><h4 class="title"><a name="regions.arch.balancer"></a>9.7.2.3. Region Load Balancing</h4></div></div></div><p>
|
||||
Regions can be periodically moved by the <a class="xref" href="master.html#master.processes.loadbalancer" title="9.5.4.1. LoadBalancer">Section 9.5.4.1, “LoadBalancer”</a>.
|
||||
</p></div></div><div class="section" title="9.7.3. Region-RegionServer Locality"><div class="titlepage"><div><div><h3 class="title"><a name="regions.arch.locality"></a>9.7.3. Region-RegionServer Locality</h3></div></div></div><p>Over time, Region-RegionServer locality is achieved via HDFS block replication.
|
||||
The HDFS client does the following by default when choosing locations to write replicas:
|
||||
</p><div class="orderedlist"><ol class="orderedlist" type="1"><li class="listitem">First replica is written to local node
|
||||
</li><li class="listitem">Second replica is written to another node in same rack
|
||||
</li><li class="listitem">Third replica is written to a node in another rack (if sufficient nodes)
|
||||
</li></ol></div><p>
|
||||
Thus, HBase eventually achieves locality for a region after a flush or a compaction.
|
||||
In a RegionServer failover situation a RegionServer may be assigned regions with non-local
|
||||
StoreFiles (because none of the replicas are local), however as new data is written
|
||||
in the region, or the table is compacted and StoreFiles are re-written, they will become "local"
|
||||
to the RegionServer.
|
||||
</p><p>For more information, see <a class="link" href="http://hadoop.apache.org/common/docs/r0.20.205.0/hdfs_design.html#Replica+Placement%3A+The+First+Baby+Steps" target="_top">HDFS Design on Replica Placement</a>
|
||||
and also Lars George's blog on <a class="link" href="http://www.larsgeorge.com/2010/05/hbase-file-locality-in-hdfs.html" target="_top">HBase and HDFS locality</a>.
|
||||
</p></div><div class="section" title="9.7.4. Region Splits"><div class="titlepage"><div><div><h3 class="title"><a name="d2121e5338"></a>9.7.4. Region Splits</h3></div></div></div><p>Splits run unaided on the RegionServer; i.e. the Master does not
|
||||
participate. The RegionServer splits a region, offlines the split
|
||||
region and then adds the daughter regions to META, opens daughters on
|
||||
the parent's hosting RegionServer and then reports the split to the
|
||||
Master. See <a class="xref" href="important_configurations.html#disable.splitting" title="2.5.2.7. Managed Splitting">Section 2.5.2.7, “Managed Splitting”</a> for how to manually manage
|
||||
splits (and for why you might do this)</p><div class="section" title="9.7.4.1. Custom Split Policies"><div class="titlepage"><div><div><h4 class="title"><a name="d2121e5345"></a>9.7.4.1. Custom Split Policies</h4></div></div></div><p>The default split policy can be overwritten using a custom <a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/regionserver/RegionSplitPolicy.html" target="_top">RegionSplitPolicy</a> (HBase 0.94+).
|
||||
Typically a custom split policy should extend HBase's default split policy: <a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/regionserver/ConstantSizeRegionSplitPolicy.html" target="_top">ConstantSizeRegionSplitPolicy</a>.
|
||||
</p><p>The policy can set globally through the HBaseConfiguration used or on a per table basis:
|
||||
</p><pre class="programlisting">
|
||||
HTableDescriptor myHtd = ...;
|
||||
myHtd.setValue(HTableDescriptor.SPLIT_POLICY, MyCustomSplitPolicy.class.getName());
|
||||
</pre><p>
|
||||
</p></div></div><div class="section" title="9.7.5. Store"><div class="titlepage"><div><div><h3 class="title"><a name="store"></a>9.7.5. Store</h3></div></div></div><p>A Store hosts a MemStore and 0 or more StoreFiles (HFiles). A Store corresponds to a column family for a table for a given region.
|
||||
</p><div class="section" title="9.7.5.1. MemStore"><div class="titlepage"><div><div><h4 class="title"><a name="store.memstore"></a>9.7.5.1. MemStore</h4></div></div></div><p>The MemStore holds in-memory modifications to the Store. Modifications are KeyValues.
|
||||
When asked to flush, current memstore is moved to snapshot and is cleared.
|
||||
HBase continues to serve edits out of new memstore and backing snapshot until flusher reports in that the
|
||||
flush succeeded. At this point the snapshot is let go.</p></div><div class="section" title="9.7.5.2. StoreFile (HFile)"><div class="titlepage"><div><div><h4 class="title"><a name="hfile"></a>9.7.5.2. StoreFile (HFile)</h4></div></div></div><p>StoreFiles are where your data lives.
|
||||
</p><div class="section" title="9.7.5.2.1. HFile Format"><div class="titlepage"><div><div><h5 class="title"><a name="d2121e5376"></a>9.7.5.2.1. HFile Format</h5></div></div></div><p>The <span class="emphasis"><em>hfile</em></span> file format is based on
|
||||
the SSTable file described in the <a class="link" href="http://research.google.com/archive/bigtable.html" target="_top">BigTable [2006]</a> paper and on
|
||||
Hadoop's <a class="link" href="http://hadoop.apache.org/common/docs/current/api/org/apache/hadoop/io/file/tfile/TFile.html" target="_top">tfile</a>
|
||||
(The unit test suite and the compression harness were taken directly from tfile).
|
||||
Schubert Zhang's blog post on HFile: A Block-Indexed File Format to Store Sorted Key-Value Pairs makes for a thorough introduction to HBase's hfile. Matteo Bertozzi has also put up a
|
||||
helpful description, <a class="link" href="http://th30z.blogspot.com/2011/02/hbase-io-hfile.html?spref=tw" target="_top">HBase I/O: HFile</a>.
|
||||
</p><p>For more information, see the <a class="link" href="http://hbase.apache.org/xref/org/apache/hadoop/hbase/io/hfile/HFile.html" target="_top">HFile source code</a>.
|
||||
Also see <a class="xref" href="hfilev2.html" title="Appendix E. HFile format version 2">Appendix E, <i>HFile format version 2</i></a> for information about the HFile v2 format that was included in 0.92.
|
||||
</p></div><div class="section" title="9.7.5.2.2. HFile Tool"><div class="titlepage"><div><div><h5 class="title"><a name="hfile_tool"></a>9.7.5.2.2. HFile Tool</h5></div></div></div><p>To view a textualized version of hfile content, you can do use
|
||||
the <code class="classname">org.apache.hadoop.hbase.io.hfile.HFile
|
||||
</code>tool. Type the following to see usage:</p><pre class="programlisting"><code class="code">$ ${HBASE_HOME}/bin/hbase org.apache.hadoop.hbase.io.hfile.HFile </code> </pre><p>For
|
||||
example, to view the content of the file
|
||||
<code class="filename">hdfs://10.81.47.41:8020/hbase/TEST/1418428042/DSMP/4759508618286845475</code>,
|
||||
type the following:</p><pre class="programlisting"> <code class="code">$ ${HBASE_HOME}/bin/hbase org.apache.hadoop.hbase.io.hfile.HFile -v -f hdfs://10.81.47.41:8020/hbase/TEST/1418428042/DSMP/4759508618286845475 </code> </pre><p>If
|
||||
you leave off the option -v to see just a summary on the hfile. See
|
||||
usage for other things to do with the <code class="classname">HFile</code>
|
||||
tool.</p></div><div class="section" title="9.7.5.2.3. StoreFile Directory Structure on HDFS"><div class="titlepage"><div><div><h5 class="title"><a name="store.file.dir"></a>9.7.5.2.3. StoreFile Directory Structure on HDFS</h5></div></div></div><p>For more information of what StoreFiles look like on HDFS with respect to the directory structure, see <a class="xref" href="trouble.namenode.html#trouble.namenode.hbase.objects" title="12.7.2. Browsing HDFS for HBase Objects">Section 12.7.2, “Browsing HDFS for HBase Objects”</a>.
|
||||
</p></div></div><div class="section" title="9.7.5.3. Blocks"><div class="titlepage"><div><div><h4 class="title"><a name="hfile.blocks"></a>9.7.5.3. Blocks</h4></div></div></div><p>StoreFiles are composed of blocks. The blocksize is configured on a per-ColumnFamily basis.
|
||||
</p><p>Compression happens at the block level within StoreFiles. For more information on compression, see <a class="xref" href="compression.html" title="Appendix C. Compression In HBase">Appendix C, <i>Compression In HBase</i></a>.
|
||||
</p><p>For more information on blocks, see the <a class="link" href="http://hbase.apache.org/xref/org/apache/hadoop/hbase/io/hfile/HFileBlock.html" target="_top">HFileBlock source code</a>.
|
||||
</p></div><div class="section" title="9.7.5.4. KeyValue"><div class="titlepage"><div><div><h4 class="title"><a name="keyvalue"></a>9.7.5.4. KeyValue</h4></div></div></div><p>The KeyValue class is the heart of data storage in HBase. KeyValue wraps a byte array and takes offsets and lengths into passed array
|
||||
at where to start interpreting the content as KeyValue.
|
||||
</p><p>The KeyValue format inside a byte array is:
|
||||
</p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem">keylength</li><li class="listitem">valuelength</li><li class="listitem">key</li><li class="listitem">value</li></ul></div><p>
|
||||
</p><p>The Key is further decomposed as:
|
||||
</p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem">rowlength</li><li class="listitem">row (i.e., the rowkey)</li><li class="listitem">columnfamilylength</li><li class="listitem">columnfamily</li><li class="listitem">columnqualifier</li><li class="listitem">timestamp</li><li class="listitem">keytype (e.g., Put, Delete, DeleteColumn, DeleteFamily)</li></ul></div><p>
|
||||
</p><p>KeyValue instances are <span class="emphasis"><em>not</em></span> split across blocks.
|
||||
For example, if there is an 8 MB KeyValue, even if the block-size is 64kb this KeyValue will be read
|
||||
in as a coherent block. For more information, see the <a class="link" href="http://hbase.apache.org/xref/org/apache/hadoop/hbase/KeyValue.html" target="_top">KeyValue source code</a>.
|
||||
</p><div class="section" title="9.7.5.4.1. Example"><div class="titlepage"><div><div><h5 class="title"><a name="keyvalue.example"></a>9.7.5.4.1. Example</h5></div></div></div><p>To emphasize the points above, examine what happens with two Puts for two different columns for the same row:</p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem">Put #1: <code class="code">rowkey=row1, cf:attr1=value1</code></li><li class="listitem">Put #2: <code class="code">rowkey=row1, cf:attr2=value2</code></li></ul></div><p>Even though these are for the same row, a KeyValue is created for each column:</p><p>Key portion for Put #1:
|
||||
</p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem">rowlength <code class="code">------------> 4</code></li><li class="listitem">row <code class="code">-----------------> row1</code></li><li class="listitem">columnfamilylength <code class="code">---> 2</code></li><li class="listitem">columnfamily <code class="code">--------> cf</code></li><li class="listitem">columnqualifier <code class="code">------> attr1</code></li><li class="listitem">timestamp <code class="code">-----------> server time of Put</code></li><li class="listitem">keytype <code class="code">-------------> Put</code></li></ul></div><p>
|
||||
</p><p>Key portion for Put #2:
|
||||
</p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem">rowlength <code class="code">------------> 4</code></li><li class="listitem">row <code class="code">-----------------> row1</code></li><li class="listitem">columnfamilylength <code class="code">---> 2</code></li><li class="listitem">columnfamily <code class="code">--------> cf</code></li><li class="listitem">columnqualifier <code class="code">------> attr2</code></li><li class="listitem">timestamp <code class="code">-----------> server time of Put</code></li><li class="listitem">keytype <code class="code">-------------> Put</code></li></ul></div><p>
|
||||
|
||||
</p></div><p>It is critical to understand that the rowkey, ColumnFamily, and column (aka columnqualifier) are embedded within
|
||||
the KeyValue instance. The longer these identifiers are, the bigger the KeyValue is.</p></div><div class="section" title="9.7.5.5. Compaction"><div class="titlepage"><div><div><h4 class="title"><a name="compaction"></a>9.7.5.5. Compaction</h4></div></div></div><p>There are two types of compactions: minor and major. Minor compactions will usually pick up a couple of the smaller adjacent
|
||||
StoreFiles and rewrite them as one. Minors do not drop deletes or expired cells, only major compactions do this. Sometimes a minor compaction
|
||||
will pick up all the StoreFiles in the Store and in this case it actually promotes itself to being a major compaction.
|
||||
</p><p>After a major compaction runs there will be a single StoreFile per Store, and this will help performance usually. Caution: major compactions rewrite all of the Stores data and on a loaded system, this may not be tenable;
|
||||
major compactions will usually have to be done manually on large systems. See <a class="xref" href="important_configurations.html#managed.compactions" title="2.5.2.8. Managed Compactions">Section 2.5.2.8, “Managed Compactions”</a>.
|
||||
</p><p>Compactions will <span class="emphasis"><em>not</em></span> perform region merges. See <a class="xref" href="ops.regionmgt.html#ops.regionmgt.merge" title="14.2.2. Merge">Section 14.2.2, “Merge”</a> for more information on region merging.
|
||||
</p><div class="section" title="9.7.5.5.1. Compaction File Selection"><div class="titlepage"><div><div><h5 class="title"><a name="compaction.file.selection"></a>9.7.5.5.1. Compaction File Selection</h5></div></div></div><p>To understand the core algorithm for StoreFile selection, there is some ASCII-art in the <a class="link" href="http://hbase.apache.org/xref/org/apache/hadoop/hbase/regionserver/Store.html#836" target="_top">Store source code</a> that
|
||||
will serve as useful reference. It has been copied below:
|
||||
</p><pre class="programlisting">
|
||||
/* normal skew:
|
||||
*
|
||||
* older ----> newer
|
||||
* _
|
||||
* | | _
|
||||
* | | | | _
|
||||
* --|-|- |-|- |-|---_-------_------- minCompactSize
|
||||
* | | | | | | | | _ | |
|
||||
* | | | | | | | | | | | |
|
||||
* | | | | | | | | | | | |
|
||||
*/
|
||||
</pre><p>
|
||||
Important knobs:
|
||||
</p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><code class="code">hbase.store.compaction.ratio</code> Ratio used in compaction
|
||||
file selection algorithm (default 1.2f). </li><li class="listitem"><code class="code">hbase.hstore.compaction.min</code> (.90 hbase.hstore.compactionThreshold) (files) Minimum number
|
||||
of StoreFiles per Store to be selected for a compaction to occur (default 2).</li><li class="listitem"><code class="code">hbase.hstore.compaction.max</code> (files) Maximum number of StoreFiles to compact per minor compaction (default 10).</li><li class="listitem"><code class="code">hbase.hstore.compaction.min.size</code> (bytes)
|
||||
Any StoreFile smaller than this setting with automatically be a candidate for compaction. Defaults to
|
||||
<code class="code">hbase.hregion.memstore.flush.size</code> (128 mb). </li><li class="listitem"><code class="code">hbase.hstore.compaction.max.size</code> (.92) (bytes)
|
||||
Any StoreFile larger than this setting with automatically be excluded from compaction (default Long.MAX_VALUE). </li></ul></div><p>
|
||||
</p><p>The minor compaction StoreFile selection logic is size based, and selects a file for compaction when the file
|
||||
<= sum(smaller_files) * <code class="code">hbase.hstore.compaction.ratio</code>.
|
||||
</p></div><div class="section" title="9.7.5.5.2. Minor Compaction File Selection - Example #1 (Basic Example)"><div class="titlepage"><div><div><h5 class="title"><a name="compaction.file.selection.example1"></a>9.7.5.5.2. Minor Compaction File Selection - Example #1 (Basic Example)</h5></div></div></div><p>This example mirrors an example from the unit test <code class="code">TestCompactSelection</code>.
|
||||
</p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><code class="code">hbase.store.compaction.ratio</code> = 1.0f </li><li class="listitem"><code class="code">hbase.hstore.compaction.min</code> = 3 (files) </li><li class="listitem"><code class="code">hbase.hstore.compaction.max</code> = 5 (files) </li><li class="listitem"><code class="code">hbase.hstore.compaction.min.size</code> = 10 (bytes) </li><li class="listitem"><code class="code">hbase.hstore.compaction.max.size</code> = 1000 (bytes) </li></ul></div><p>
|
||||
The following StoreFiles exist: 100, 50, 23, 12, and 12 bytes apiece (oldest to newest).
|
||||
With the above parameters, the files that would be selected for minor compaction are 23, 12, and 12.
|
||||
</p><p>Why?
|
||||
</p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem">100 --> No, because sum(50, 23, 12, 12) * 1.0 = 97. </li><li class="listitem">50 --> No, because sum(23, 12, 12) * 1.0 = 47. </li><li class="listitem">23 --> Yes, because sum(12, 12) * 1.0 = 24. </li><li class="listitem">12 --> Yes, because the previous file has been included, and because this
|
||||
does not exceed the the max-file limit of 5 </li><li class="listitem">12 --> Yes, because the previous file had been included, and because this
|
||||
does not exceed the the max-file limit of 5.</li></ul></div><p>
|
||||
</p></div><div class="section" title="9.7.5.5.3. Minor Compaction File Selection - Example #2 (Not Enough Files To Compact)"><div class="titlepage"><div><div><h5 class="title"><a name="compaction.file.selection.example2"></a>9.7.5.5.3. Minor Compaction File Selection - Example #2 (Not Enough Files To Compact)</h5></div></div></div><p>This example mirrors an example from the unit test <code class="code">TestCompactSelection</code>.
|
||||
</p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><code class="code">hbase.store.compaction.ratio</code> = 1.0f </li><li class="listitem"><code class="code">hbase.hstore.compaction.min</code> = 3 (files) </li><li class="listitem"><code class="code">hbase.hstore.compaction.max</code> = 5 (files) </li><li class="listitem"><code class="code">hbase.hstore.compaction.min.size</code> = 10 (bytes) </li><li class="listitem"><code class="code">hbase.hstore.compaction.max.size</code> = 1000 (bytes) </li></ul></div><p>
|
||||
</p><p>The following StoreFiles exist: 100, 25, 12, and 12 bytes apiece (oldest to newest).
|
||||
With the above parameters, the files that would be selected for minor compaction are 23, 12, and 12.
|
||||
</p><p>Why?
|
||||
</p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem">100 --> No, because sum(25, 12, 12) * 1.0 = 47</li><li class="listitem">25 --> No, because sum(12, 12) * 1.0 = 24</li><li class="listitem">12 --> No. Candidate because sum(12) * 1.0 = 12, there are only 2 files to compact and that is less than the threshold of 3</li><li class="listitem">12 --> No. Candidate because the previous StoreFile was, but there are not enough files to compact</li></ul></div><p>
|
||||
</p></div><div class="section" title="9.7.5.5.4. Minor Compaction File Selection - Example #3 (Limiting Files To Compact)"><div class="titlepage"><div><div><h5 class="title"><a name="compaction.file.selection.example2"></a>9.7.5.5.4. Minor Compaction File Selection - Example #3 (Limiting Files To Compact)</h5></div></div></div><p>This example mirrors an example from the unit test <code class="code">TestCompactSelection</code>.
|
||||
</p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem"><code class="code">hbase.store.compaction.ratio</code> = 1.0f </li><li class="listitem"><code class="code">hbase.hstore.compaction.min</code> = 3 (files) </li><li class="listitem"><code class="code">hbase.hstore.compaction.max</code> = 5 (files) </li><li class="listitem"><code class="code">hbase.hstore.compaction.min.size</code> = 10 (bytes) </li><li class="listitem"><code class="code">hbase.hstore.compaction.max.size</code> = 1000 (bytes) </li></ul></div><p>
|
||||
The following StoreFiles exist: 7, 6, 5, 4, 3, 2, and 1 bytes apiece (oldest to newest).
|
||||
With the above parameters, the files that would be selected for minor compaction are 7, 6, 5, 4, 3.
|
||||
</p><p>Why?
|
||||
</p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem">7 --> Yes, because sum(6, 5, 4, 3, 2, 1) * 1.0 = 21. Also, 7 is less than the min-size</li><li class="listitem">6 --> Yes, because sum(5, 4, 3, 2, 1) * 1.0 = 15. Also, 6 is less than the min-size. </li><li class="listitem">5 --> Yes, because sum(4, 3, 2, 1) * 1.0 = 10. Also, 5 is less than the min-size. </li><li class="listitem">4 --> Yes, because sum(3, 2, 1) * 1.0 = 6. Also, 4 is less than the min-size. </li><li class="listitem">3 --> Yes, because sum(2, 1) * 1.0 = 3. Also, 3 is less than the min-size. </li><li class="listitem">2 --> No. Candidate because previous file was selected and 2 is less than the min-size, but the max-number of files to compact has been reached. </li><li class="listitem">1 --> No. Candidate because previous file was selected and 1 is less than the min-size, but max-number of files to compact has been reached. </li></ul></div><p>
|
||||
</p></div><div class="section" title="9.7.5.5.5. Impact of Key Configuration Options"><div class="titlepage"><div><div><h5 class="title"><a name="compaction.config.impact"></a>9.7.5.5.5. Impact of Key Configuration Options</h5></div></div></div><p><code class="code">hbase.store.compaction.ratio</code>. A large ratio (e.g., 10) will produce a single giant file. Conversely, a value of .25 will
|
||||
produce behavior similar to the BigTable compaction algorithm - resulting in 4 StoreFiles.
|
||||
</p><p><code class="code">hbase.hstore.compaction.min.size</code>. Because
|
||||
this limit represents the "automatic include" limit for all StoreFiles smaller than this value, this value may need to
|
||||
be adjusted downwards in write-heavy environments where many 1 or 2 mb StoreFiles are being flushed, because every file
|
||||
will be targeted for compaction and the resulting files may still be under the min-size and require further compaction, etc.
|
||||
</p></div></div></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'regions.arch';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="regionserver.arch.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="architecture.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="arch.bulk.load.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">9.6. RegionServer </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> 9.8. Bulk Loading</td></tr></table></div></body></html>
|
|
@ -1,91 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>9.6. RegionServer</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="architecture.html" title="Chapter 9. Architecture"><link rel="prev" href="master.html" title="9.5. Master"><link rel="next" href="regions.arch.html" title="9.7. Regions"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">9.6. RegionServer</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="master.html">Prev</a> </td><th width="60%" align="center">Chapter 9. Architecture</th><td width="20%" align="right"> <a accesskey="n" href="regions.arch.html">Next</a></td></tr></table><hr></div><div class="section" title="9.6. RegionServer"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="regionserver.arch"></a>9.6. RegionServer</h2></div></div></div><p><code class="code">HRegionServer</code> is the RegionServer implementation. It is responsible for serving and managing regions.
|
||||
In a distributed cluster, a RegionServer runs on a <a class="xref" href="arch.hdfs.html#arch.hdfs.dn" title="9.9.2. DataNode">Section 9.9.2, “DataNode”</a>.
|
||||
</p><div class="section" title="9.6.1. Interface"><div class="titlepage"><div><div><h3 class="title"><a name="regionserver.arch.api"></a>9.6.1. Interface</h3></div></div></div><p>The methods exposed by <code class="code">HRegionRegionInterface</code> contain both data-oriented and region-maintenance methods:
|
||||
</p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem">Data (get, put, delete, next, etc.)
|
||||
</li><li class="listitem">Region (splitRegion, compactRegion, etc.)
|
||||
</li></ul></div><p>
|
||||
For example, when the <code class="code">HBaseAdmin</code> method <code class="code">majorCompact</code> is invoked on a table, the client is actually iterating through
|
||||
all regions for the specified table and requesting a major compaction directly to each region.
|
||||
</p></div><div class="section" title="9.6.2. Processes"><div class="titlepage"><div><div><h3 class="title"><a name="regionserver.arch.processes"></a>9.6.2. Processes</h3></div></div></div><p>The RegionServer runs a variety of background threads:</p><div class="section" title="9.6.2.1. CompactSplitThread"><div class="titlepage"><div><div><h4 class="title"><a name="regionserver.arch.processes.compactsplit"></a>9.6.2.1. CompactSplitThread</h4></div></div></div><p>Checks for splits and handle minor compactions.</p></div><div class="section" title="9.6.2.2. MajorCompactionChecker"><div class="titlepage"><div><div><h4 class="title"><a name="regionserver.arch.processes.majorcompact"></a>9.6.2.2. MajorCompactionChecker</h4></div></div></div><p>Checks for major compactions.</p></div><div class="section" title="9.6.2.3. MemStoreFlusher"><div class="titlepage"><div><div><h4 class="title"><a name="regionserver.arch.processes.memstore"></a>9.6.2.3. MemStoreFlusher</h4></div></div></div><p>Periodically flushes in-memory writes in the MemStore to StoreFiles.</p></div><div class="section" title="9.6.2.4. LogRoller"><div class="titlepage"><div><div><h4 class="title"><a name="regionserver.arch.processes.log"></a>9.6.2.4. LogRoller</h4></div></div></div><p>Periodically checks the RegionServer's HLog.</p></div></div><div class="section" title="9.6.3. Coprocessors"><div class="titlepage"><div><div><h3 class="title"><a name="coprocessors"></a>9.6.3. Coprocessors</h3></div></div></div><p>Coprocessors were added in 0.92. There is a thorough <a class="link" href="https://blogs.apache.org/hbase/entry/coprocessor_introduction" target="_top">Blog Overview of CoProcessors</a>
|
||||
posted. Documentation will eventually move to this reference guide, but the blog is the most current information available at this time.
|
||||
</p></div><div class="section" title="9.6.4. Block Cache"><div class="titlepage"><div><div><h3 class="title"><a name="block.cache"></a>9.6.4. Block Cache</h3></div></div></div><div class="section" title="9.6.4.1. Design"><div class="titlepage"><div><div><h4 class="title"><a name="block.cache.design"></a>9.6.4.1. Design</h4></div></div></div><p>The Block Cache is an LRU cache that contains three levels of block priority to allow for scan-resistance and in-memory ColumnFamilies:
|
||||
</p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem">Single access priority: The first time a block is loaded from HDFS it normally has this priority and it will be part of the first group to be considered
|
||||
during evictions. The advantage is that scanned blocks are more likely to get evicted than blocks that are getting more usage.
|
||||
</li><li class="listitem">Mutli access priority: If a block in the previous priority group is accessed again, it upgrades to this priority. It is thus part of the second group
|
||||
considered during evictions.
|
||||
</li><li class="listitem">In-memory access priority: If the block's family was configured to be "in-memory", it will be part of this priority disregarding the number of times it
|
||||
was accessed. Catalog tables are configured like this. This group is the last one considered during evictions.
|
||||
</li></ul></div><p>
|
||||
For more information, see the <a class="link" href="http://hbase.apache.org/xref/org/apache/hadoop/hbase/io/hfile/LruBlockCache.html" target="_top">LruBlockCache source</a>
|
||||
</p></div><div class="section" title="9.6.4.2. Usage"><div class="titlepage"><div><div><h4 class="title"><a name="block.cache.usage"></a>9.6.4.2. Usage</h4></div></div></div><p>Block caching is enabled by default for all the user tables which means that any read operation will load the LRU cache. This might be good for a large number of use cases,
|
||||
but further tunings are usually required in order to achieve better performance. An important concept is the
|
||||
<a class="link" href="http://en.wikipedia.org/wiki/Working_set_size" target="_top">working set size</a>, or WSS, which is: "the amount of memory needed to compute the answer to a problem".
|
||||
For a website, this would be the data that's needed to answer the queries over a short amount of time.
|
||||
</p><p>The way to calculate how much memory is available in HBase for caching is:
|
||||
</p><pre class="programlisting">
|
||||
number of region servers * heap size * hfile.block.cache.size * 0.85
|
||||
</pre><p>The default value for the block cache is 0.25 which represents 25% of the available heap. The last value (85%) is the default acceptable loading factor in the LRU cache after
|
||||
which eviction is started. The reason it is included in this equation is that it would be unrealistic to say that it is possible to use 100% of the available memory since this would
|
||||
make the process blocking from the point where it loads new blocks. Here are some examples:
|
||||
</p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem">One region server with the default heap size (1GB) and the default block cache size will have 217MB of block cache available.
|
||||
</li><li class="listitem">20 region servers with the heap size set to 8GB and a default block cache size will have 34GB of block cache.
|
||||
</li><li class="listitem">100 region servers with the heap size set to 24GB and a block cache size of 0.5 will have about 1TB of block cache.
|
||||
</li></ul></div><p>Your data isn't the only resident of the block cache, here are others that you may have to take into account:
|
||||
</p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem">Catalog tables: The -ROOT- and .META. tables are forced into the block cache and have the in-memory priority which means that they are harder to evict. The former never uses
|
||||
more than a few hundreds of bytes while the latter can occupy a few MBs (depending on the number of regions).
|
||||
</li><li class="listitem">HFiles indexes: HFile is the file format that HBase uses to store data in HDFS and it contains a multi-layered index in order seek to the data without having to read the whole file.
|
||||
The size of those indexes is a factor of the block size (64KB by default), the size of your keys and the amount of data you are storing. For big data sets it's not unusual to see numbers around
|
||||
1GB per region server, although not all of it will be in cache because the LRU will evict indexes that aren't used.
|
||||
</li><li class="listitem">Keys: Taking into account only the values that are being stored is missing half the picture since every value is stored along with its keys
|
||||
(row key, family, qualifier, and timestamp). See <a class="xref" href="rowkey.design.html#keysize" title="6.3.2. Try to minimize row and column sizes">Section 6.3.2, “Try to minimize row and column sizes”</a>.
|
||||
</li><li class="listitem">Bloom filters: Just like the HFile indexes, those data structures (when enabled) are stored in the LRU.
|
||||
</li></ul></div><p>Currently the recommended way to measure HFile indexes and bloom filters sizes is to look at the region server web UI and checkout the relevant metrics. For keys,
|
||||
sampling can be done by using the HFile command line tool and look for the average key size metric.
|
||||
</p><p>It's generally bad to use block caching when the WSS doesn't fit in memory. This is the case when you have for example 40GB available across all your region servers' block caches
|
||||
but you need to process 1TB of data. One of the reasons is that the churn generated by the evictions will trigger more garbage collections unnecessarily. Here are two use cases:
|
||||
</p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem">Fully random reading pattern: This is a case where you almost never access the same row twice within a short amount of time such that the chance of hitting a cached block is close
|
||||
to 0. Setting block caching on such a table is a waste of memory and CPU cycles, more so that it will generate more garbage to pick up by the JVM. For more information on monitoring GC,
|
||||
see <a class="xref" href="trouble.log.html#trouble.log.gc" title="12.2.3. JVM Garbage Collection Logs">Section 12.2.3, “JVM Garbage Collection Logs”</a>.
|
||||
</li><li class="listitem">Mapping a table: In a typical MapReduce job that takes a table in input, every row will be read only once so there's no need to put them into the block cache. The Scan object has
|
||||
the option of turning this off via the setCaching method (set it to false). You can still keep block caching turned on on this table if you need fast random read access. An example would be
|
||||
counting the number of rows in a table that serves live traffic, caching every block of that table would create massive churn and would surely evict data that's currently in use.
|
||||
</li></ul></div></div></div><div class="section" title="9.6.5. Write Ahead Log (WAL)"><div class="titlepage"><div><div><h3 class="title"><a name="wal"></a>9.6.5. Write Ahead Log (WAL)</h3></div></div></div><div class="section" title="9.6.5.1. Purpose"><div class="titlepage"><div><div><h4 class="title"><a name="purpose.wal"></a>9.6.5.1. Purpose</h4></div></div></div><p>Each RegionServer adds updates (Puts, Deletes) to its write-ahead log (WAL)
|
||||
first, and then to the <a class="xref" href="regions.arch.html#store.memstore" title="9.7.5.1. MemStore">Section 9.7.5.1, “MemStore”</a> for the affected <a class="xref" href="regions.arch.html#store" title="9.7.5. Store">Section 9.7.5, “Store”</a>.
|
||||
This ensures that HBase has durable writes. Without WAL, there is the possibility of data loss in the case of a RegionServer failure
|
||||
before each MemStore is flushed and new StoreFiles are written. <a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/regionserver/wal/HLog.html" target="_top">HLog</a>
|
||||
is the HBase WAL implementation, and there is one HLog instance per RegionServer.
|
||||
</p>The WAL is in HDFS in <code class="filename">/hbase/.logs/</code> with subdirectories per region.
|
||||
<p>
|
||||
For more general information about the concept of write ahead logs, see the Wikipedia
|
||||
<a class="link" href="http://en.wikipedia.org/wiki/Write-ahead_logging" target="_top">Write-Ahead Log</a> article.
|
||||
</p></div><div class="section" title="9.6.5.2. WAL Flushing"><div class="titlepage"><div><div><h4 class="title"><a name="wal_flush"></a>9.6.5.2. WAL Flushing</h4></div></div></div><p>TODO (describe).
|
||||
</p></div><div class="section" title="9.6.5.3. WAL Splitting"><div class="titlepage"><div><div><h4 class="title"><a name="wal_splitting"></a>9.6.5.3. WAL Splitting</h4></div></div></div><div class="section" title="9.6.5.3.1. How edits are recovered from a crashed RegionServer"><div class="titlepage"><div><div><h5 class="title"><a name="d2121e5167"></a>9.6.5.3.1. How edits are recovered from a crashed RegionServer</h5></div></div></div><p>When a RegionServer crashes, it will lose its ephemeral lease in
|
||||
ZooKeeper...TODO</p></div><div class="section" title="9.6.5.3.2. hbase.hlog.split.skip.errors"><div class="titlepage"><div><div><h5 class="title"><a name="d2121e5172"></a>9.6.5.3.2. <code class="varname">hbase.hlog.split.skip.errors</code></h5></div></div></div><p>When set to <code class="constant">true</code>, any error
|
||||
encountered splitting will be logged, the problematic WAL will be
|
||||
moved into the <code class="filename">.corrupt</code> directory under the hbase
|
||||
<code class="varname">rootdir</code>, and processing will continue. If set to
|
||||
<code class="constant">false</code>, the default, the exception will be propagated and the
|
||||
split logged as failed.<sup>[<a name="d2121e5190" href="#ftn.d2121e5190" class="footnote">25</a>]</sup></p></div><div class="section" title="9.6.5.3.3. How EOFExceptions are treated when splitting a crashed RegionServers' WALs"><div class="titlepage"><div><div><h5 class="title"><a name="d2121e5196"></a>9.6.5.3.3. How EOFExceptions are treated when splitting a crashed
|
||||
RegionServers' WALs</h5></div></div></div><p>If we get an EOF while splitting logs, we proceed with the split
|
||||
even when <code class="varname">hbase.hlog.split.skip.errors</code> ==
|
||||
<code class="constant">false</code>. An EOF while reading the last log in the
|
||||
set of files to split is near-guaranteed since the RegionServer likely
|
||||
crashed mid-write of a record. But we'll continue even if we got an
|
||||
EOF reading other than the last file in the set.<sup>[<a name="d2121e5207" href="#ftn.d2121e5207" class="footnote">26</a>]</sup></p></div></div></div><div class="footnotes"><br><hr width="100" align="left"><div class="footnote"><p><sup>[<a id="ftn.d2121e5190" href="#d2121e5190" class="para">25</a>] </sup>See <a class="link" href="https://issues.apache.org/jira/browse/HBASE-2958" target="_top">HBASE-2958
|
||||
When hbase.hlog.split.skip.errors is set to false, we fail the
|
||||
split but thats it</a>. We need to do more than just fail split
|
||||
if this flag is set.</p></div><div class="footnote"><p><sup>[<a id="ftn.d2121e5207" href="#d2121e5207" class="para">26</a>] </sup>For background, see <a class="link" href="https://issues.apache.org/jira/browse/HBASE-2643" target="_top">HBASE-2643
|
||||
Figure how to deal with eof splitting logs</a></p></div></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'regionserver.arch';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="master.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="architecture.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="regions.arch.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">9.5. Master </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> 9.7. Regions</td></tr></table></div></body></html>
|
|
@ -1,16 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>10.2. REST</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="external_apis.html" title="Chapter 10. Apache HBase (TM) External APIs"><link rel="prev" href="external_apis.html" title="Chapter 10. Apache HBase (TM) External APIs"><link rel="next" href="thrift.html" title="10.3. Thrift"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">10.2. REST</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="external_apis.html">Prev</a> </td><th width="60%" align="center">Chapter 10. Apache HBase (TM) External APIs</th><td width="20%" align="right"> <a accesskey="n" href="thrift.html">Next</a></td></tr></table><hr></div><div class="section" title="10.2. REST"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="rest"></a>10.2. REST</h2></div></div></div><p>Currently most of the documentation on REST exists in the
|
||||
<a class="link" href="http://wiki.apache.org/hadoop/Hbase/Stargate" target="_top">Apache HBase Wiki on REST</a>.
|
||||
</p></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'rest';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="external_apis.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="external_apis.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="thrift.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">Chapter 10. Apache HBase (TM) External APIs </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> 10.3. Thrift</td></tr></table></div></body></html>
|
|
@ -1,17 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>5.4. Row</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="datamodel.html" title="Chapter 5. Data Model"><link rel="prev" href="table.html" title="5.3. Table"><link rel="next" href="columnfamily.html" title="5.5. Column Family"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">5.4. Row</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="table.html">Prev</a> </td><th width="60%" align="center">Chapter 5. Data Model</th><td width="20%" align="right"> <a accesskey="n" href="columnfamily.html">Next</a></td></tr></table><hr></div><div class="section" title="5.4. Row"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="row"></a>5.4. Row</h2></div></div></div><p>Row keys are uninterrpreted bytes. Rows are
|
||||
lexicographically sorted with the lowest order appearing first
|
||||
in a table. The empty byte array is used to denote both the
|
||||
start and end of a tables' namespace.</p></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'row';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="table.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="datamodel.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="columnfamily.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">5.3. Table </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> 5.5. Column Family</td></tr></table></div></body></html>
|
|
@ -1,147 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>6.3. Rowkey Design</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="schema.html" title="Chapter 6. HBase and Schema Design"><link rel="prev" href="number.of.cfs.html" title="6.2. On the number of column families"><link rel="next" href="schema.versions.html" title="6.4. Number of Versions"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">6.3. Rowkey Design</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="number.of.cfs.html">Prev</a> </td><th width="60%" align="center">Chapter 6. HBase and Schema Design</th><td width="20%" align="right"> <a accesskey="n" href="schema.versions.html">Next</a></td></tr></table><hr></div><div class="section" title="6.3. Rowkey Design"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="rowkey.design"></a>6.3. Rowkey Design</h2></div></div></div><div class="section" title="6.3.1. Monotonically Increasing Row Keys/Timeseries Data"><div class="titlepage"><div><div><h3 class="title"><a name="timeseries"></a>6.3.1.
|
||||
Monotonically Increasing Row Keys/Timeseries Data
|
||||
</h3></div></div></div><p>
|
||||
In the HBase chapter of Tom White's book Hadoop: The Definitive Guide (O'Reilly) there is a an optimization note on watching out for a phenomenon where an import process walks in lock-step with all clients in concert pounding one of the table's regions (and thus, a single node), then moving onto the next region, etc. With monotonically increasing row-keys (i.e., using a timestamp), this will happen. See this comic by IKai Lan on why monotonically increasing row keys are problematic in BigTable-like datastores:
|
||||
<a class="link" href="http://ikaisays.com/2011/01/25/app-engine-datastore-tip-monotonically-increasing-values-are-bad/" target="_top">monotonically increasing values are bad</a>. The pile-up on a single region brought on
|
||||
by monotonically increasing keys can be mitigated by randomizing the input records to not be in sorted order, but in general it's best to avoid using a timestamp or a sequence (e.g. 1, 2, 3) as the row-key.
|
||||
</p><p>If you do need to upload time series data into HBase, you should
|
||||
study <a class="link" href="http://opentsdb.net/" target="_top">OpenTSDB</a> as a
|
||||
successful example. It has a page describing the <a class="link" href=" http://opentsdb.net/schema.html" target="_top">schema</a> it uses in
|
||||
HBase. The key format in OpenTSDB is effectively [metric_type][event_timestamp], which would appear at first glance to contradict the previous advice about not using a timestamp as the key. However, the difference is that the timestamp is not in the <span class="emphasis"><em>lead</em></span> position of the key, and the design assumption is that there are dozens or hundreds (or more) of different metric types. Thus, even with a continual stream of input data with a mix of metric types, the Puts are distributed across various points of regions in the table.
|
||||
</p></div><div class="section" title="6.3.2. Try to minimize row and column sizes"><div class="titlepage"><div><div><h3 class="title"><a name="keysize"></a>6.3.2. Try to minimize row and column sizes</h3></div><div><h4 class="subtitle">Or why are my StoreFile indices large?</h4></div></div></div><p>In HBase, values are always freighted with their coordinates; as a
|
||||
cell value passes through the system, it'll be accompanied by its
|
||||
row, column name, and timestamp - always. If your rows and column names
|
||||
are large, especially compared to the size of the cell value, then
|
||||
you may run up against some interesting scenarios. One such is
|
||||
the case described by Marc Limotte at the tail of
|
||||
HBASE-3551
|
||||
(recommended!).
|
||||
Therein, the indices that are kept on HBase storefiles (<a class="xref" href="regions.arch.html#hfile" title="9.7.5.2. StoreFile (HFile)">Section 9.7.5.2, “StoreFile (HFile)”</a>)
|
||||
to facilitate random access may end up occupyng large chunks of the HBase
|
||||
allotted RAM because the cell value coordinates are large.
|
||||
Mark in the above cited comment suggests upping the block size so
|
||||
entries in the store file index happen at a larger interval or
|
||||
modify the table schema so it makes for smaller rows and column
|
||||
names.
|
||||
Compression will also make for larger indices. See
|
||||
the thread <a class="link" href="http://search-hadoop.com/m/hemBv1LiN4Q1/a+question+storefileIndexSize&subj=a+question+storefileIndexSize" target="_top">a question storefileIndexSize</a>
|
||||
up on the user mailing list.
|
||||
</p><p>Most of the time small inefficiencies don't matter all that much. Unfortunately,
|
||||
this is a case where they do. Whatever patterns are selected for ColumnFamilies, attributes, and rowkeys they could be repeated
|
||||
several billion times in your data. </p><p>See <a class="xref" href="regions.arch.html#keyvalue" title="9.7.5.4. KeyValue">Section 9.7.5.4, “KeyValue”</a> for more information on HBase stores data internally to see why this is important.</p><div class="section" title="6.3.2.1. Column Families"><div class="titlepage"><div><div><h4 class="title"><a name="keysize.cf"></a>6.3.2.1. Column Families</h4></div></div></div><p>Try to keep the ColumnFamily names as small as possible, preferably one character (e.g. "d" for data/default).
|
||||
</p><p>See <a class="xref" href="regions.arch.html#keyvalue" title="9.7.5.4. KeyValue">Section 9.7.5.4, “KeyValue”</a> for more information on HBase stores data internally to see why this is important.</p></div><div class="section" title="6.3.2.2. Attributes"><div class="titlepage"><div><div><h4 class="title"><a name="keysize.atttributes"></a>6.3.2.2. Attributes</h4></div></div></div><p>Although verbose attribute names (e.g., "myVeryImportantAttribute") are easier to read, prefer shorter attribute names (e.g., "via")
|
||||
to store in HBase.
|
||||
</p><p>See <a class="xref" href="regions.arch.html#keyvalue" title="9.7.5.4. KeyValue">Section 9.7.5.4, “KeyValue”</a> for more information on HBase stores data internally to see why this is important.</p></div><div class="section" title="6.3.2.3. Rowkey Length"><div class="titlepage"><div><div><h4 class="title"><a name="keysize.row"></a>6.3.2.3. Rowkey Length</h4></div></div></div><p>Keep them as short as is reasonable such that they can still be useful for required data access (e.g., Get vs. Scan).
|
||||
A short key that is useless for data access is not better than a longer key with better get/scan properties. Expect tradeoffs
|
||||
when designing rowkeys.
|
||||
</p></div><div class="section" title="6.3.2.4. Byte Patterns"><div class="titlepage"><div><div><h4 class="title"><a name="keysize.patterns"></a>6.3.2.4. Byte Patterns</h4></div></div></div><p>A long is 8 bytes. You can store an unsigned number up to 18,446,744,073,709,551,615 in those eight bytes.
|
||||
If you stored this number as a String -- presuming a byte per character -- you need nearly 3x the bytes.
|
||||
</p><p>Not convinced? Below is some sample code that you can run on your own.
|
||||
</p><pre class="programlisting">
|
||||
// long
|
||||
//
|
||||
long l = 1234567890L;
|
||||
byte[] lb = Bytes.toBytes(l);
|
||||
System.out.println("long bytes length: " + lb.length); // returns 8
|
||||
|
||||
String s = "" + l;
|
||||
byte[] sb = Bytes.toBytes(s);
|
||||
System.out.println("long as string length: " + sb.length); // returns 10
|
||||
|
||||
// hash
|
||||
//
|
||||
MessageDigest md = MessageDigest.getInstance("MD5");
|
||||
byte[] digest = md.digest(Bytes.toBytes(s));
|
||||
System.out.println("md5 digest bytes length: " + digest.length); // returns 16
|
||||
|
||||
String sDigest = new String(digest);
|
||||
byte[] sbDigest = Bytes.toBytes(sDigest);
|
||||
System.out.println("md5 digest as string length: " + sbDigest.length); // returns 26
|
||||
</pre><p>
|
||||
</p></div></div><div class="section" title="6.3.3. Reverse Timestamps"><div class="titlepage"><div><div><h3 class="title"><a name="reverse.timestamp"></a>6.3.3. Reverse Timestamps</h3></div></div></div><p>A common problem in database processing is quickly finding the most recent version of a value. A technique using reverse timestamps
|
||||
as a part of the key can help greatly with a special case of this problem. Also found in the HBase chapter of Tom White's book Hadoop: The Definitive Guide (O'Reilly),
|
||||
the technique involves appending (<code class="code">Long.MAX_VALUE - timestamp</code>) to the end of any key, e.g., [key][reverse_timestamp].
|
||||
</p><p>The most recent value for [key] in a table can be found by performing a Scan for [key] and obtaining the first record. Since HBase keys
|
||||
are in sorted order, this key sorts before any older row-keys for [key] and thus is first.
|
||||
</p><p>This technique would be used instead of using <a class="xref" href="schema.versions.html" title="6.4. Number of Versions">Section 6.4, “
|
||||
Number of Versions
|
||||
”</a> where the intent is to hold onto all versions
|
||||
"forever" (or a very long time) and at the same time quickly obtain access to any other version by using the same Scan technique.
|
||||
</p></div><div class="section" title="6.3.4. Rowkeys and ColumnFamilies"><div class="titlepage"><div><div><h3 class="title"><a name="rowkey.scope"></a>6.3.4. Rowkeys and ColumnFamilies</h3></div></div></div><p>Rowkeys are scoped to ColumnFamilies. Thus, the same rowkey could exist in each ColumnFamily that exists in a table without collision.
|
||||
</p></div><div class="section" title="6.3.5. Immutability of Rowkeys"><div class="titlepage"><div><div><h3 class="title"><a name="changing.rowkeys"></a>6.3.5. Immutability of Rowkeys</h3></div></div></div><p>Rowkeys cannot be changed. The only way they can be "changed" in a table is if the row is deleted and then re-inserted.
|
||||
This is a fairly common question on the HBase dist-list so it pays to get the rowkeys right the first time (and/or before you've
|
||||
inserted a lot of data).
|
||||
</p></div><div class="section" title="6.3.6. Relationship Between RowKeys and Region Splits"><div class="titlepage"><div><div><h3 class="title"><a name="rowkey.regionsplits"></a>6.3.6. Relationship Between RowKeys and Region Splits</h3></div></div></div><p>If you pre-split your table, it is <span class="emphasis"><em>critical</em></span> to understand how your rowkey will be distributed across
|
||||
the region boundaries. As an example of why this is important, consider the example of using displayable hex characters as the
|
||||
lead position of the key (e.g., ""0000000000000000" to "ffffffffffffffff"). Running those key ranges through <code class="code">Bytes.split</code>
|
||||
(which is the split strategy used when creating regions in <code class="code">HBaseAdmin.createTable(byte[] startKey, byte[] endKey, numRegions)</code>
|
||||
for 10 regions will generate the following splits...
|
||||
</p><p>
|
||||
</p><pre class="programlisting">
|
||||
48 48 48 48 48 48 48 48 48 48 48 48 48 48 48 48 // 0
|
||||
54 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 // 6
|
||||
61 -67 -67 -67 -67 -67 -67 -67 -67 -67 -67 -67 -67 -67 -67 -68 // =
|
||||
68 -124 -124 -124 -124 -124 -124 -124 -124 -124 -124 -124 -124 -124 -124 -126 // D
|
||||
75 75 75 75 75 75 75 75 75 75 75 75 75 75 75 72 // K
|
||||
82 18 18 18 18 18 18 18 18 18 18 18 18 18 18 14 // R
|
||||
88 -40 -40 -40 -40 -40 -40 -40 -40 -40 -40 -40 -40 -40 -40 -44 // X
|
||||
95 -97 -97 -97 -97 -97 -97 -97 -97 -97 -97 -97 -97 -97 -97 -102 // _
|
||||
102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 // f
|
||||
</pre><p>
|
||||
... (note: the lead byte is listed to the right as a comment.) Given that the first split is a '0' and the last split is an 'f',
|
||||
everything is great, right? Not so fast.
|
||||
</p><p>The problem is that all the data is going to pile up in the first 2 regions and the last region thus creating a "lumpy" (and
|
||||
possibly "hot") region problem. To understand why, refer to an <a class="link" href="http://www.asciitable.com" target="_top">ASCII Table</a>.
|
||||
'0' is byte 48, and 'f' is byte 102, but there is a huge gap in byte values (bytes 58 to 96) that will <span class="emphasis"><em>never appear in this
|
||||
keyspace</em></span> because the only values are [0-9] and [a-f]. Thus, the middle regions regions will
|
||||
never be used. To make pre-spliting work with this example keyspace, a custom definition of splits (i.e., and not relying on the
|
||||
built-in split method) is required.
|
||||
</p><p>Lesson #1: Pre-splitting tables is generally a best practice, but you need to pre-split them in such a way that all the
|
||||
regions are accessible in the keyspace. While this example demonstrated the problem with a hex-key keyspace, the same problem can happen
|
||||
with <span class="emphasis"><em>any</em></span> keyspace. Know your data.
|
||||
</p><p>Lesson #2: While generally not advisable, using hex-keys (and more generally, displayable data) can still work with pre-split
|
||||
tables as long as all the created regions are accessible in the keyspace.
|
||||
</p><p>To conclude this example, the following is an example of how appropriate splits can be pre-created for hex-keys:.
|
||||
</p><pre class="programlisting">public static boolean createTable(HBaseAdmin admin, HTableDescriptor table, byte[][] splits)
|
||||
throws IOException {
|
||||
try {
|
||||
admin.createTable( table, splits );
|
||||
return true;
|
||||
} catch (TableExistsException e) {
|
||||
logger.info("table " + table.getNameAsString() + " already exists");
|
||||
// the table already exists...
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
public static byte[][] getHexSplits(String startKey, String endKey, int numRegions) {
|
||||
byte[][] splits = new byte[numRegions-1][];
|
||||
BigInteger lowestKey = new BigInteger(startKey, 16);
|
||||
BigInteger highestKey = new BigInteger(endKey, 16);
|
||||
BigInteger range = highestKey.subtract(lowestKey);
|
||||
BigInteger regionIncrement = range.divide(BigInteger.valueOf(numRegions));
|
||||
lowestKey = lowestKey.add(regionIncrement);
|
||||
for(int i=0; i < numRegions-1;i++) {
|
||||
BigInteger key = lowestKey.add(regionIncrement.multiply(BigInteger.valueOf(i)));
|
||||
byte[] b = String.format("%016x", key).getBytes();
|
||||
splits[i] = b;
|
||||
}
|
||||
return splits;
|
||||
}</pre></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'rowkey.design';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="number.of.cfs.html">Prev</a> </td><td width="20%" align="center"><a accesskey="u" href="schema.html">Up</a></td><td width="40%" align="right"> <a accesskey="n" href="schema.versions.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">6.2.
|
||||
On the number of column families
|
||||
</td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> 6.4.
|
||||
Number of Versions
|
||||
</td></tr></table></div></body></html>
|
|
@ -1,71 +0,0 @@
|
|||
<html><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>Chapter 6. HBase and Schema Design</title><link rel="stylesheet" type="text/css" href="../css/freebsd_docbook.css"><meta name="generator" content="DocBook XSL-NS Stylesheets V1.76.1"><link rel="home" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="up" href="book.html" title="The Apache HBase™ Reference Guide"><link rel="prev" href="joins.html" title="5.11. Joins"><link rel="next" href="number.of.cfs.html" title="6.2. On the number of column families"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="navheader"><table width="100%" summary="Navigation header"><tr><th colspan="3" align="center">Chapter 6. HBase and Schema Design</th></tr><tr><td width="20%" align="left"><a accesskey="p" href="joins.html">Prev</a> </td><th width="60%" align="center"> </th><td width="20%" align="right"> <a accesskey="n" href="number.of.cfs.html">Next</a></td></tr></table><hr></div><div class="chapter" title="Chapter 6. HBase and Schema Design"><div class="titlepage"><div><div><h2 class="title"><a name="schema"></a>Chapter 6. HBase and Schema Design</h2></div></div></div><div class="toc"><p><b>Table of Contents</b></p><dl><dt><span class="section"><a href="schema.html#schema.creation">6.1.
|
||||
Schema Creation
|
||||
</a></span></dt><dd><dl><dt><span class="section"><a href="schema.html#schema.updates">6.1.1. Schema Updates</a></span></dt></dl></dd><dt><span class="section"><a href="number.of.cfs.html">6.2.
|
||||
On the number of column families
|
||||
</a></span></dt><dd><dl><dt><span class="section"><a href="number.of.cfs.html#number.of.cfs.card">6.2.1. Cardinality of ColumnFamilies</a></span></dt></dl></dd><dt><span class="section"><a href="rowkey.design.html">6.3. Rowkey Design</a></span></dt><dd><dl><dt><span class="section"><a href="rowkey.design.html#timeseries">6.3.1.
|
||||
Monotonically Increasing Row Keys/Timeseries Data
|
||||
</a></span></dt><dt><span class="section"><a href="rowkey.design.html#keysize">6.3.2. Try to minimize row and column sizes</a></span></dt><dt><span class="section"><a href="rowkey.design.html#reverse.timestamp">6.3.3. Reverse Timestamps</a></span></dt><dt><span class="section"><a href="rowkey.design.html#rowkey.scope">6.3.4. Rowkeys and ColumnFamilies</a></span></dt><dt><span class="section"><a href="rowkey.design.html#changing.rowkeys">6.3.5. Immutability of Rowkeys</a></span></dt><dt><span class="section"><a href="rowkey.design.html#rowkey.regionsplits">6.3.6. Relationship Between RowKeys and Region Splits</a></span></dt></dl></dd><dt><span class="section"><a href="schema.versions.html">6.4.
|
||||
Number of Versions
|
||||
</a></span></dt><dd><dl><dt><span class="section"><a href="schema.versions.html#schema.versions.max">6.4.1. Maximum Number of Versions</a></span></dt><dt><span class="section"><a href="schema.versions.html#schema.minversions">6.4.2.
|
||||
Minimum Number of Versions
|
||||
</a></span></dt></dl></dd><dt><span class="section"><a href="supported.datatypes.html">6.5.
|
||||
Supported Datatypes
|
||||
</a></span></dt><dd><dl><dt><span class="section"><a href="supported.datatypes.html#counters">6.5.1. Counters</a></span></dt></dl></dd><dt><span class="section"><a href="schema.joins.html">6.6. Joins</a></span></dt><dt><span class="section"><a href="ttl.html">6.7. Time To Live (TTL)</a></span></dt><dt><span class="section"><a href="cf.keep.deleted.html">6.8.
|
||||
Keeping Deleted Cells
|
||||
</a></span></dt><dt><span class="section"><a href="secondary.indexes.html">6.9.
|
||||
Secondary Indexes and Alternate Query Paths
|
||||
</a></span></dt><dd><dl><dt><span class="section"><a href="secondary.indexes.html#secondary.indexes.filter">6.9.1.
|
||||
Filter Query
|
||||
</a></span></dt><dt><span class="section"><a href="secondary.indexes.html#secondary.indexes.periodic">6.9.2.
|
||||
Periodic-Update Secondary Index
|
||||
</a></span></dt><dt><span class="section"><a href="secondary.indexes.html#secondary.indexes.dualwrite">6.9.3.
|
||||
Dual-Write Secondary Index
|
||||
</a></span></dt><dt><span class="section"><a href="secondary.indexes.html#secondary.indexes.summary">6.9.4.
|
||||
Summary Tables
|
||||
</a></span></dt><dt><span class="section"><a href="secondary.indexes.html#secondary.indexes.coproc">6.9.5.
|
||||
Coprocessor Secondary Index
|
||||
</a></span></dt></dl></dd><dt><span class="section"><a href="schema.smackdown.html">6.10. Schema Design Smackdown</a></span></dt><dd><dl><dt><span class="section"><a href="schema.smackdown.html#schema.smackdown.rowsversions">6.10.1. Rows vs. Versions</a></span></dt><dt><span class="section"><a href="schema.smackdown.html#schema.smackdown.rowscols">6.10.2. Rows vs. Columns</a></span></dt><dt><span class="section"><a href="schema.smackdown.html#schema.smackdown.rowsascols">6.10.3. Rows as Columns</a></span></dt></dl></dd><dt><span class="section"><a href="schema.ops.html">6.11. Operational and Performance Configuration Options</a></span></dt><dt><span class="section"><a href="constraints.html">6.12. Constraints</a></span></dt></dl></div><p>A good general introduction on the strength and weaknesses modelling on
|
||||
the various non-rdbms datastores is Ian Varley's Master thesis,
|
||||
<a class="link" href="http://ianvarley.com/UT/MR/Varley_MastersReport_Full_2009-08-07.pdf" target="_top">No Relation: The Mixed Blessings of Non-Relational Databases</a>.
|
||||
Recommended. Also, read <a class="xref" href="regions.arch.html#keyvalue" title="9.7.5.4. KeyValue">Section 9.7.5.4, “KeyValue”</a> for how HBase stores data internally.
|
||||
</p><div class="section" title="6.1. Schema Creation"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a name="schema.creation"></a>6.1.
|
||||
Schema Creation
|
||||
</h2></div></div></div><p>HBase schemas can be created or updated with <a class="xref" href="shell.html" title="Chapter 4. The Apache HBase Shell">Chapter 4, <i>The Apache HBase Shell</i></a>
|
||||
or by using <a class="link" href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/HBaseAdmin.html" target="_top">HBaseAdmin</a> in the Java API.
|
||||
</p><p>Tables must be disabled when making ColumnFamily modifications, for example..
|
||||
</p><pre class="programlisting">
|
||||
Configuration config = HBaseConfiguration.create();
|
||||
HBaseAdmin admin = new HBaseAdmin(conf);
|
||||
String table = "myTable";
|
||||
|
||||
admin.disableTable(table);
|
||||
|
||||
HColumnDescriptor cf1 = ...;
|
||||
admin.addColumn(table, cf1); // adding new ColumnFamily
|
||||
HColumnDescriptor cf2 = ...;
|
||||
admin.modifyColumn(table, cf2); // modifying existing ColumnFamily
|
||||
|
||||
admin.enableTable(table);
|
||||
</pre><p>
|
||||
</p>See <a class="xref" href="config.files.html#client_dependencies" title="2.3.4. Client configuration and dependencies connecting to an HBase cluster">Section 2.3.4, “Client configuration and dependencies connecting to an HBase cluster”</a> for more information about configuring client connections.
|
||||
<p>Note: online schema changes are supported in the 0.92.x codebase, but the 0.90.x codebase requires the table
|
||||
to be disabled.
|
||||
</p><div class="section" title="6.1.1. Schema Updates"><div class="titlepage"><div><div><h3 class="title"><a name="schema.updates"></a>6.1.1. Schema Updates</h3></div></div></div><p>When changes are made to either Tables or ColumnFamilies (e.g., region size, block size), these changes
|
||||
take effect the next time there is a major compaction and the StoreFiles get re-written.
|
||||
</p><p>See <a class="xref" href="regions.arch.html#store" title="9.7.5. Store">Section 9.7.5, “Store”</a> for more information on StoreFiles.
|
||||
</p></div></div></div><div id="disqus_thread"></div><script type="text/javascript">
|
||||
var disqus_shortname = 'hbase'; // required: replace example with your forum shortname
|
||||
var disqus_url = 'http://hbase.apache.org/book';
|
||||
var disqus_identifier = 'schema';
|
||||
|
||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script><noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript><a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a><div class="navfooter"><hr><table width="100%" summary="Navigation footer"><tr><td width="40%" align="left"><a accesskey="p" href="joins.html">Prev</a> </td><td width="20%" align="center"> </td><td width="40%" align="right"> <a accesskey="n" href="number.of.cfs.html">Next</a></td></tr><tr><td width="40%" align="left" valign="top">5.11. Joins </td><td width="20%" align="center"><a accesskey="h" href="book.html">Home</a></td><td width="40%" align="right" valign="top"> 6.2.
|
||||
On the number of column families
|
||||
</td></tr></table></div></body></html>
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue