diff --git a/src/site/resources/images/architecture.gif b/hbase-site/src/site/resources/images/architecture.gif similarity index 100% rename from src/site/resources/images/architecture.gif rename to hbase-site/src/site/resources/images/architecture.gif diff --git a/src/site/resources/images/favicon.ico b/hbase-site/src/site/resources/images/favicon.ico similarity index 100% rename from src/site/resources/images/favicon.ico rename to hbase-site/src/site/resources/images/favicon.ico diff --git a/src/site/resources/images/hadoop-logo.jpg b/hbase-site/src/site/resources/images/hadoop-logo.jpg similarity index 100% rename from src/site/resources/images/hadoop-logo.jpg rename to hbase-site/src/site/resources/images/hadoop-logo.jpg diff --git a/src/site/resources/images/hbase_logo.png b/hbase-site/src/site/resources/images/hbase_logo.png similarity index 100% rename from src/site/resources/images/hbase_logo.png rename to hbase-site/src/site/resources/images/hbase_logo.png diff --git a/src/site/resources/images/hfile.png b/hbase-site/src/site/resources/images/hfile.png similarity index 100% rename from src/site/resources/images/hfile.png rename to hbase-site/src/site/resources/images/hfile.png diff --git a/src/site/resources/images/hfilev2.png b/hbase-site/src/site/resources/images/hfilev2.png similarity index 100% rename from src/site/resources/images/hfilev2.png rename to hbase-site/src/site/resources/images/hfilev2.png diff --git a/src/site/resources/images/replication_overview.png b/hbase-site/src/site/resources/images/replication_overview.png similarity index 100% rename from src/site/resources/images/replication_overview.png rename to hbase-site/src/site/resources/images/replication_overview.png diff --git a/src/main/resources/org/apache/hadoop/hbase/mapred/RowCounter_Counters.properties b/src/main/resources/org/apache/hadoop/hbase/mapred/RowCounter_Counters.properties deleted file mode 100644 index 661e56d01e1..00000000000 --- a/src/main/resources/org/apache/hadoop/hbase/mapred/RowCounter_Counters.properties +++ /dev/null @@ -1,21 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# ResourceBundle properties file for RowCounter MR job - -CounterGroupName= RowCounter - -ROWS.name= Rows diff --git a/src/main/resources/org/apache/hadoop/hbase/mapreduce/RowCounter_Counters.properties b/src/main/resources/org/apache/hadoop/hbase/mapreduce/RowCounter_Counters.properties deleted file mode 100644 index 661e56d01e1..00000000000 --- a/src/main/resources/org/apache/hadoop/hbase/mapreduce/RowCounter_Counters.properties +++ /dev/null @@ -1,21 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# ResourceBundle properties file for RowCounter MR job - -CounterGroupName= RowCounter - -ROWS.name= Rows diff --git a/src/main/resources/org/apache/hadoop/hbase/thrift2/hbase.thrift b/src/main/resources/org/apache/hadoop/hbase/thrift2/hbase.thrift deleted file mode 100644 index 5bb0f51cbd3..00000000000 --- a/src/main/resources/org/apache/hadoop/hbase/thrift2/hbase.thrift +++ /dev/null @@ -1,412 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// NOTE: The "required" and "optional" keywords for the service methods are purely for documentation - -namespace java org.apache.hadoop.hbase.thrift2.generated -namespace cpp apache.hadoop.hbase.thrift2 -namespace rb Apache.Hadoop.Hbase.Thrift2 -namespace py hbase -namespace perl Hbase - -struct TTimeRange { - 1: required i64 minStamp, - 2: required i64 maxStamp -} - -/** - * Addresses a single cell or multiple cells - * in a HBase table by column family and optionally - * a column qualifier and timestamp - */ -struct TColumn { - 1: required binary family, - 2: optional binary qualifier, - 3: optional i64 timestamp -} - -/** - * Represents a single cell and its value. - */ -struct TColumnValue { - 1: required binary family, - 2: required binary qualifier, - 3: required binary value, - 4: optional i64 timestamp -} - -/** - * Represents a single cell and the amount to increment it by - */ -struct TColumnIncrement { - 1: required binary family, - 2: required binary qualifier, - 3: optional i64 amount = 1 -} - -/** - * if no Result is found, row and columnValues will not be set. - */ -struct TResult { - 1: optional binary row, - 2: required list columnValues -} - -/** - * Specify type of delete: - * - DELETE_COLUMN means exactly one version will be removed, - * - DELETE_COLUMNS means previous versions will also be removed. - */ -enum TDeleteType { - DELETE_COLUMN = 0, - DELETE_COLUMNS = 1 -} - -/** - * Used to perform Get operations on a single row. - * - * The scope can be further narrowed down by specifying a list of - * columns or column families. - * - * To get everything for a row, instantiate a Get object with just the row to get. - * To further define the scope of what to get you can add a timestamp or time range - * with an optional maximum number of versions to return. - * - * If you specify a time range and a timestamp the range is ignored. - * Timestamps on TColumns are ignored. - * - * TODO: Filter, Locks - */ -struct TGet { - 1: required binary row, - 2: optional list columns, - - 3: optional i64 timestamp, - 4: optional TTimeRange timeRange, - - 5: optional i32 maxVersions, -} - -/** - * Used to perform Put operations for a single row. - * - * Add column values to this object and they'll be added. - * You can provide a default timestamp if the column values - * don't have one. If you don't provide a default timestamp - * the current time is inserted. - * - * You can also specify if this Put should be written - * to the write-ahead Log (WAL) or not. It defaults to true. - */ -struct TPut { - 1: required binary row, - 2: required list columnValues - 3: optional i64 timestamp, - 4: optional bool writeToWal = 1 -} - -/** - * Used to perform Delete operations on a single row. - * - * The scope can be further narrowed down by specifying a list of - * columns or column families as TColumns. - * - * Specifying only a family in a TColumn will delete the whole family. - * If a timestamp is specified all versions with a timestamp less than - * or equal to this will be deleted. If no timestamp is specified the - * current time will be used. - * - * Specifying a family and a column qualifier in a TColumn will delete only - * this qualifier. If a timestamp is specified only versions equal - * to this timestamp will be deleted. If no timestamp is specified the - * most recent version will be deleted. To delete all previous versions, - * specify the DELETE_COLUMNS TDeleteType. - * - * The top level timestamp is only used if a complete row should be deleted - * (i.e. no columns are passed) and if it is specified it works the same way - * as if you had added a TColumn for every column family and this timestamp - * (i.e. all versions older than or equal in all column families will be deleted) - * - */ -struct TDelete { - 1: required binary row, - 2: optional list columns, - 3: optional i64 timestamp, - 4: optional TDeleteType deleteType = 1, - 5: optional bool writeToWal = 1 -} - -/** - * Used to perform Increment operations for a single row. - * - * You can specify if this Increment should be written - * to the write-ahead Log (WAL) or not. It defaults to true. - */ -struct TIncrement { - 1: required binary row, - 2: required list columns, - 3: optional bool writeToWal = 1 -} - -/** - * Any timestamps in the columns are ignored, use timeRange to select by timestamp. - * Max versions defaults to 1. - */ -struct TScan { - 1: optional binary startRow, - 2: optional binary stopRow, - 3: optional list columns - 4: optional i32 caching, - 5: optional i32 maxVersions=1, - 6: optional TTimeRange timeRange, -} - -// -// Exceptions -// - -/** - * A TIOError exception signals that an error occurred communicating - * to the HBase master or a HBase region server. Also used to return - * more general HBase error conditions. - */ -exception TIOError { - 1: optional string message -} - -/** - * A TIllegalArgument exception indicates an illegal or invalid - * argument was passed into a procedure. - */ -exception TIllegalArgument { - 1: optional string message -} - -service THBaseService { - - /** - * Test for the existence of columns in the table, as specified in the TGet. - * - * @return true if the specified TGet matches one or more keys, false if not - */ - bool exists( - /** the table to check on */ - 1: required binary table, - - /** the TGet to check for */ - 2: required TGet get - ) throws (1:TIOError io) - - /** - * Method for getting data from a row. - * - * If the row cannot be found an empty Result is returned. - * This can be checked by the empty field of the TResult - * - * @return the result - */ - TResult get( - /** the table to get from */ - 1: required binary table, - - /** the TGet to fetch */ - 2: required TGet get - ) throws (1: TIOError io) - - /** - * Method for getting multiple rows. - * - * If a row cannot be found there will be a null - * value in the result list for that TGet at the - * same position. - * - * So the Results are in the same order as the TGets. - */ - list getMultiple( - /** the table to get from */ - 1: required binary table, - - /** a list of TGets to fetch, the Result list - will have the Results at corresponding positions - or null if there was an error */ - 2: required list gets - ) throws (1: TIOError io) - - /** - * Commit a TPut to a table. - */ - void put( - /** the table to put data in */ - 1: required binary table, - - /** the TPut to put */ - 2: required TPut put - ) throws (1: TIOError io) - - /** - * Atomically checks if a row/family/qualifier value matches the expected - * value. If it does, it adds the TPut. - * - * @return true if the new put was executed, false otherwise - */ - bool checkAndPut( - /** to check in and put to */ - 1: required binary table, - - /** row to check */ - 2: required binary row, - - /** column family to check */ - 3: required binary family, - - /** column qualifier to check */ - 4: required binary qualifier, - - /** the expected value, if not provided the - check is for the non-existence of the - column in question */ - 5: binary value, - - /** the TPut to put if the check succeeds */ - 6: required TPut put - ) throws (1: TIOError io) - - /** - * Commit a List of Puts to the table. - */ - void putMultiple( - /** the table to put data in */ - 1: required binary table, - - /** a list of TPuts to commit */ - 2: required list puts - ) throws (1: TIOError io) - - /** - * Deletes as specified by the TDelete. - * - * Note: "delete" is a reserved keyword and cannot be used in Thrift - * thus the inconsistent naming scheme from the other functions. - */ - void deleteSingle( - /** the table to delete from */ - 1: required binary table, - - /** the TDelete to delete */ - 2: required TDelete deleteSingle - ) throws (1: TIOError io) - - /** - * Bulk commit a List of TDeletes to the table. - * - * This returns a list of TDeletes that were not - * executed. So if everything succeeds you'll - * receive an empty list. - */ - list deleteMultiple( - /** the table to delete from */ - 1: required binary table, - - /** list of TDeletes to delete */ - 2: required list deletes - ) throws (1: TIOError io) - - /** - * Atomically checks if a row/family/qualifier value matches the expected - * value. If it does, it adds the delete. - * - * @return true if the new delete was executed, false otherwise - */ - bool checkAndDelete( - /** to check in and delete from */ - 1: required binary table, - - /** row to check */ - 2: required binary row, - - /** column family to check */ - 3: required binary family, - - /** column qualifier to check */ - 4: required binary qualifier, - - /** the expected value, if not provided the - check is for the non-existence of the - column in question */ - 5: binary value, - - /** the TDelete to execute if the check succeeds */ - 6: required TDelete deleteSingle - ) throws (1: TIOError io) - - TResult increment( - /** the table to increment the value on */ - 1: required binary table, - - /** the TIncrement to increment */ - 2: required TIncrement increment - ) throws (1: TIOError io) - - /** - * Get a Scanner for the provided TScan object. - * - * @return Scanner Id to be used with other scanner procedures - */ - i32 openScanner( - /** the table to get the Scanner for */ - 1: required binary table, - - /** the scan object to get a Scanner for */ - 2: required TScan scan, - ) throws (1: TIOError io) - - /** - * Grabs multiple rows from a Scanner. - * - * @return Between zero and numRows TResults - */ - list getScannerRows( - /** the Id of the Scanner to return rows from. This is an Id returned from the openScanner function. */ - 1: required i32 scannerId, - - /** number of rows to return */ - 2: i32 numRows = 1 - ) throws ( - 1: TIOError io, - - /** if the scannerId is invalid */ - 2: TIllegalArgument ia - ) - - /** - * Closes the scanner. Should be called if you need to close - * the Scanner before all results are read. - * - * Exhausted scanners are closed automatically. - */ - void closeScanner( - /** the Id of the Scanner to close **/ - 1: required i32 scannerId - ) throws ( - 1: TIOError io, - - /** if the scannerId is invalid */ - 2: TIllegalArgument ia - ) - -} diff --git a/src/main/xslt/configuration_to_docbook_section.xsl b/src/main/xslt/configuration_to_docbook_section.xsl deleted file mode 100644 index 95f7fd5d8c3..00000000000 --- a/src/main/xslt/configuration_to_docbook_section.xsl +++ /dev/null @@ -1,68 +0,0 @@ - - - - - -
-HBase Default Configuration - - - - -HBase Default Configuration - -The documentation below is generated using the default hbase configuration file, -hbase-default.xml, as source. - - - - - - - - - - - - - - Default: - - - - - - - -
-
-
diff --git a/src/site/resources/css/freebsd_docbook.css b/src/site/resources/css/freebsd_docbook.css deleted file mode 100644 index 3d40fa7010c..00000000000 --- a/src/site/resources/css/freebsd_docbook.css +++ /dev/null @@ -1,208 +0,0 @@ -/* - * Copyright (c) 2001, 2003, 2010 The FreeBSD Documentation Project - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD: doc/share/misc/docbook.css,v 1.15 2010/03/20 04:15:01 hrs Exp $ - */ - -BODY ADDRESS { - line-height: 1.3; - margin: .6em 0; -} - -BODY BLOCKQUOTE { - margin-top: .75em; - line-height: 1.5; - margin-bottom: .75em; -} - -HTML BODY { - margin: 1em 8% 1em 10%; - line-height: 1.2; -} - -.LEGALNOTICE { - font-size: small; - font-variant: small-caps; -} - -BODY DIV { - margin: 0; -} - -DL { - margin: .8em 0; - line-height: 1.2; -} - -BODY FORM { - margin: .6em 0; -} - -H1, H2, H3, H4, H5, H6, -DIV.EXAMPLE P B, -.QUESTION, -DIV.TABLE P B, -DIV.PROCEDURE P B { - color: #990000; -} - -BODY H1, BODY H2, BODY H3, BODY H4, BODY H5, BODY H6 { - line-height: 1.3; - margin-left: 0; -} - -BODY H1, BODY H2 { - margin: .8em 0 0 -4%; -} - -BODY H3, BODY H4 { - margin: .8em 0 0 -3%; -} - -BODY H5 { - margin: .8em 0 0 -2%; -} - -BODY H6 { - margin: .8em 0 0 -1%; -} - -BODY HR { - margin: .6em; - border-width: 0 0 1px 0; - border-style: solid; - border-color: #cecece; -} - -BODY IMG.NAVHEADER { - margin: 0 0 0 -4%; -} - -OL { - margin: 0 0 0 5%; - line-height: 1.2; -} - -BODY PRE { - margin: .75em 0; - line-height: 1.0; - font-family: monospace; -} - -BODY TD, BODY TH { - line-height: 1.2; -} - -UL, BODY DIR, BODY MENU { - margin: 0 0 0 5%; - line-height: 1.2; -} - -HTML { - margin: 0; - padding: 0; -} - -BODY P B.APPLICATION { - color: #000000; -} - -.FILENAME { - color: #007a00; -} - -.GUIMENU, .GUIMENUITEM, .GUISUBMENU, -.GUILABEL, .INTERFACE, -.SHORTCUT, .SHORTCUT .KEYCAP { - font-weight: bold; -} - -.GUIBUTTON { - background-color: #CFCFCF; - padding: 2px; -} - -.ACCEL { - background-color: #F0F0F0; - text-decoration: underline; -} - -.SCREEN { - padding: 1ex; -} - -.PROGRAMLISTING { - padding: 1ex; - background-color: #eee; - border: 1px solid #ccc; -} - -@media screen { /* hide from IE3 */ - a[href]:hover { background: #ffa } -} - -BLOCKQUOTE.NOTE { - color: #222; - background: #eee; - border: 1px solid #ccc; - padding: 0.4em 0.4em; - width: 85%; -} - -BLOCKQUOTE.TIP { - color: #004F00; - background: #d8ecd6; - border: 1px solid green; - padding: 0.2em 2em; - width: 85%; -} - -BLOCKQUOTE.IMPORTANT { - font-style:italic; - border: 1px solid #a00; - border-left: 12px solid #c00; - padding: 0.1em 1em; -} - -BLOCKQUOTE.WARNING { - color: #9F1313; - background: #f8e8e8; - border: 1px solid #e59595; - padding: 0.2em 2em; - width: 85%; -} - -.EXAMPLE { - background: #fefde6; - border: 1px solid #f1bb16; - margin: 1em 0; - padding: 0.2em 2em; - width: 90%; -} - -.INFORMALTABLE TABLE.CALSTABLE TR TD { - padding-left: 1em; - padding-right: 1em; -} diff --git a/src/site/resources/css/site.css b/src/site/resources/css/site.css deleted file mode 100644 index f26d03c39b5..00000000000 --- a/src/site/resources/css/site.css +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -a.externalLink, a.externalLink:link, a.externalLink:visited, a.externalLink:active, a.externalLink:hover { - background: none; - padding-right: 0; -} - -/* -body ul { - list-style-type: square; -} -*/ - -#downloadbox { - float: right; - margin: 0 10px 20px 20px; - padding: 5px; - border: 1px solid #999; - background-color: #eee; -} - -#downloadbox h5 { - color: #000; - margin: 0; - border-bottom: 1px solid #aaaaaa; - font-size: smaller; - padding: 0; -} - -#downloadbox p { - margin-top: 1em; - margin-bottom: 0; -} - -#downloadbox ul { - margin-top: 0; - margin-bottom: 1em; - list-style-type: disc; -} - -#downloadbox li { - font-size: smaller; -} - -/* -h4 { - padding: 0; - border: none; - color: #000; - margin: 0; - font-size: larger; - font-weight: bold; -} -*/ - -#banner { - background: none; -} - -#banner img { - padding: 10px; - margin: auto; - display: block; - background: none; - float: center; - height:; -} - -#breadcrumbs { - background-image: url(); -} - -#footer { - border-top: 0px; -} - -.frontpagebox { - float: left; - text-align: center; - width: 15em; - margin-left: 0.5em; - margin-right: 0.5em; - margin-top: 2em; -} - -.headline { - font-size: 120%; - font-weight: bold; - padding-top: 1px; - padding-bottom: 5px; - background-image: url(../images/breadcrumbs.jpg); - background-repeat: repeat-x; -} - -.section { - padding-bottom: 0; - padding-top: 0; -} - -/* -#leftColumn { - display: none !important -} - -#bodyColumn { - margin-left: 1.5em; -} -*/ - - diff --git a/src/site/resources/doap_Hbase.rdf b/src/site/resources/doap_Hbase.rdf deleted file mode 100644 index 08e9bc07cf1..00000000000 --- a/src/site/resources/doap_Hbase.rdf +++ /dev/null @@ -1,57 +0,0 @@ - - - - - - 2012-04-14 - - Apache HBase - - - Apache HBase software is the Hadoop database. Think of it as a distributed, scalable, big data store. - Use Apache HBase software when you need random, realtime read/write access to your Big Data. This project's goal is the hosting of very large tables -- billions of rows X millions of columns -- atop clusters of commodity hardware. HBase is an open-source, distributed, versioned, column-oriented store modeled after Google's Bigtable: A Distributed Storage System for Structured Data by Chang et al. Just as Bigtable leverages the distributed data storage provided by the Google File System, HBase provides Bigtable-like capabilities on top of Hadoop and HDFS. - - - - Java - - - - Apache hbase 0.92.1 - 2012-03-19 - 0.92.1 - - - - - - - - - - - Apache HBase PMC - - - - - diff --git a/src/site/resources/images/hbase_logo.svg b/src/site/resources/images/hbase_logo.svg deleted file mode 100644 index c4b3343ecc2..00000000000 --- a/src/site/resources/images/hbase_logo.svg +++ /dev/null @@ -1,41 +0,0 @@ - - - - - - - - - - - diff --git a/src/site/site.vm b/src/site/site.vm deleted file mode 100644 index 0a478e4b4f9..00000000000 --- a/src/site/site.vm +++ /dev/null @@ -1,544 +0,0 @@ - -#* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. -*# - -#macro ( link $href $name $target $img $position $alt $border $width $height ) - #set ( $linkTitle = ' title="' + $name + '"' ) - #if( $target ) - #set ( $linkTarget = ' target="' + $target + '"' ) - #else - #set ( $linkTarget = "" ) - #end - #if ( ( $href.toLowerCase().startsWith("http") || $href.toLowerCase().startsWith("https") ) ) - #set ( $linkClass = ' class="externalLink"' ) - #else - #set ( $linkClass = "" ) - #end - #if ( $img ) - #if ( $position == "left" ) - #image($img $alt $border $width $height)$name - #else - $name #image($img $alt $border $width $height) - #end - #else - $name - #end -#end -## -#macro ( image $img $alt $border $width $height ) - #if( $img ) - #if ( ! ( $img.toLowerCase().startsWith("http") || $img.toLowerCase().startsWith("https") ) ) - #set ( $imgSrc = $PathTool.calculateLink( $img, $relativePath ) ) - #set ( $imgSrc = $imgSrc.replaceAll( "\\", "/" ) ) - #set ( $imgSrc = ' src="' + $imgSrc + '"' ) - #else - #set ( $imgSrc = ' src="' + $img + '"' ) - #end - #if( $alt ) - #set ( $imgAlt = ' alt="' + $alt + '"' ) - #else - #set ( $imgAlt = ' alt=""' ) - #end - #if( $border ) - #set ( $imgBorder = ' border="' + $border + '"' ) - #else - #set ( $imgBorder = "" ) - #end - #if( $width ) - #set ( $imgWidth = ' width="' + $width + '"' ) - #else - #set ( $imgWidth = "" ) - #end - #if( $height ) - #set ( $imgHeight = ' height="' + $height + '"' ) - #else - #set ( $imgHeight = "" ) - #end - - #end -#end -#macro ( banner $banner $id ) - #if ( $banner ) - #if( $banner.href ) - - #else - - #end - #end -#end -## -#macro ( links $links ) - #set ( $counter = 0 ) - #foreach( $item in $links ) - #set ( $counter = $counter + 1 ) - #set ( $currentItemHref = $PathTool.calculateLink( $item.href, $relativePath ) ) - #set ( $currentItemHref = $currentItemHref.replaceAll( "\\", "/" ) ) - #link( $currentItemHref $item.name $item.target $item.img $item.position $item.alt $item.border $item.width $item.height ) - #if ( $links.size() > $counter ) - | - #end - #end -#end -## -#macro ( breadcrumbs $breadcrumbs ) - #set ( $counter = 0 ) - #foreach( $item in $breadcrumbs ) - #set ( $counter = $counter + 1 ) - #set ( $currentItemHref = $PathTool.calculateLink( $item.href, $relativePath ) ) - #set ( $currentItemHref = $currentItemHref.replaceAll( "\\", "/" ) ) -## - #if ( $currentItemHref == $alignedFileName || $currentItemHref == "" ) - $item.name - #else - #link( $currentItemHref $item.name $item.target $item.img $item.position $item.alt $item.border $item.width $item.height ) - #end - #if ( $breadcrumbs.size() > $counter ) - > - #end - #end -#end -## -#macro ( displayTree $display $item ) - #if ( $item && $item.items && $item.items.size() > 0 ) - #foreach( $subitem in $item.items ) - #set ( $subitemHref = $PathTool.calculateLink( $subitem.href, $relativePath ) ) - #set ( $subitemHref = $subitemHref.replaceAll( "\\", "/" ) ) - #if ( $alignedFileName == $subitemHref ) - #set ( $display = true ) - #end -## - #displayTree( $display $subitem ) - #end - #end -#end -## -#macro ( menuItem $item ) - #set ( $collapse = "none" ) - #set ( $currentItemHref = $PathTool.calculateLink( $item.href, $relativePath ) ) - #set ( $currentItemHref = $currentItemHref.replaceAll( "\\", "/" ) ) -## - #if ( $item && $item.items && $item.items.size() > 0 ) - #if ( $item.collapse == false ) - #set ( $collapse = "expanded" ) - #else - ## By default collapsed - #set ( $collapse = "collapsed" ) - #end -## - #set ( $display = false ) - #displayTree( $display $item ) -## - #if ( $alignedFileName == $currentItemHref || $display ) - #set ( $collapse = "expanded" ) - #end - #end -
  • - #if ( $item.img ) - #if ( $item.position == "left" ) - #if ( $alignedFileName == $currentItemHref ) - #image($item.img $item.alt $item.border $item.width $item.height) $item.name - #else - #link($currentItemHref $item.name $item.target $item.img $item.position $item.alt $item.border $item.width $item.height) - #end - #else - #if ( $alignedFileName == $currentItemHref ) - $item.name #image($item.img $item.alt $item.border $item.width $item.height) - #else - #link($currentItemHref $item.name $item.target $item.img $item.position $item.alt $item.border $item.width $item.height) - #end - #end - #else - #if ( $alignedFileName == $currentItemHref ) - $item.name - #else - #link( $currentItemHref $item.name $item.target $item.img $item.position $item.alt $item.border $item.width $item.height ) - #end - #end - #if ( $item && $item.items && $item.items.size() > 0 ) - #if ( $collapse == "expanded" ) -
      - #foreach( $subitem in $item.items ) - #menuItem( $subitem ) - #end -
    - #end - #end -
  • -#end -## -#macro ( mainMenu $menus ) - #foreach( $menu in $menus ) - #if ( $menu.name ) - #if ( $menu.img ) - #if( $menu.position ) - #set ( $position = $menu.position ) - #else - #set ( $position = "left" ) - #end -## - #if ( ! ( $menu.img.toLowerCase().startsWith("http") || $menu.img.toLowerCase().startsWith("https") ) ) - #set ( $src = $PathTool.calculateLink( $menu.img, $relativePath ) ) - #set ( $src = $src.replaceAll( "\\", "/" ) ) - #set ( $src = ' src="' + $src + '"' ) - #else - #set ( $src = ' src="' + $menu.img + '"' ) - #end -## - #if( $menu.alt ) - #set ( $alt = ' alt="' + $menu.alt + '"' ) - #else - #set ( $alt = ' alt="' + $menu.name + '"' ) - #end -## - #if( $menu.border ) - #set ( $border = ' border="' + $menu.border + '"' ) - #else - #set ( $border = ' border="0"' ) - #end -## - #if( $menu.width ) - #set ( $width = ' width="' + $menu.width + '"' ) - #else - #set ( $width = "" ) - #end - #if( $menu.height ) - #set ( $height = ' height="' + $menu.height + '"' ) - #else - #set ( $height = "" ) - #end -## - #set ( $img = '" ) -## - #if ( $position == "left" ) -
    $img $menu.name
    - #else -
    $menu.name $img
    - #end - #else -
    $menu.name
    - #end - #end - #if ( $menu.items && $menu.items.size() > 0 ) -
      - #foreach( $item in $menu.items ) - #menuItem( $item ) - #end -
    - #end - #end -#end -## -#macro ( copyright ) - #if ( $project ) - #if ( ${project.organization} && ${project.organization.name} ) - #set ( $period = "" ) - #else - #set ( $period = "." ) - #end -## - #set ( $currentYear = ${currentDate.year} + 1900 ) -## - #if ( ${project.inceptionYear} && ( ${project.inceptionYear} != ${currentYear.toString()} ) ) - ${project.inceptionYear}-${currentYear}${period} - #else - ${currentYear}${period} - #end -## - #if ( ${project.organization} ) - #if ( ${project.organization.name} && ${project.organization.url} ) - ${project.organization.name}. - #elseif ( ${project.organization.name} ) - ${project.organization.name}. - #end - #end - #end -#end -## -#macro ( publishDate $position $publishDate $version ) - #if ( $publishDate && $publishDate.format ) - #set ( $format = $publishDate.format ) - #else - #set ( $format = "yyyy-MM-dd" ) - #end -## - $dateFormat.applyPattern( $format ) -## - #set ( $dateToday = $dateFormat.format( $currentDate ) ) -## - #if ( $publishDate && $publishDate.position ) - #set ( $datePosition = $publishDate.position ) - #else - #set ( $datePosition = "left" ) - #end -## - #if ( $version ) - #if ( $version.position ) - #set ( $versionPosition = $version.position ) - #else - #set ( $versionPosition = "left" ) - #end - #else - #set ( $version = "" ) - #set ( $versionPosition = "left" ) - #end -## - #set ( $breadcrumbs = $decoration.body.breadcrumbs ) - #set ( $links = $decoration.body.links ) - - #if ( $datePosition.equalsIgnoreCase( "right" ) && $links && $links.size() > 0 ) - #set ( $prefix = " |" ) - #else - #set ( $prefix = "" ) - #end -## - #if ( $datePosition.equalsIgnoreCase( $position ) ) - #if ( ( $datePosition.equalsIgnoreCase( "right" ) ) || ( $datePosition.equalsIgnoreCase( "bottom" ) ) ) - $prefix $i18n.getString( "site-renderer", $locale, "template.lastpublished" ): $dateToday - #if ( $versionPosition.equalsIgnoreCase( $position ) ) -  | $i18n.getString( "site-renderer", $locale, "template.version" ): ${project.version} - #end - #elseif ( ( $datePosition.equalsIgnoreCase( "navigation-bottom" ) ) || ( $datePosition.equalsIgnoreCase( "navigation-top" ) ) ) -
    - $i18n.getString( "site-renderer", $locale, "template.lastpublished" ): $dateToday - #if ( $versionPosition.equalsIgnoreCase( $position ) ) -  | $i18n.getString( "site-renderer", $locale, "template.version" ): ${project.version} - #end -
    - #elseif ( $datePosition.equalsIgnoreCase("left") ) -
    - $i18n.getString( "site-renderer", $locale, "template.lastpublished" ): $dateToday - #if ( $versionPosition.equalsIgnoreCase( $position ) ) -  | $i18n.getString( "site-renderer", $locale, "template.version" ): ${project.version} - #end - #if ( $breadcrumbs && $breadcrumbs.size() > 0 ) - | #breadcrumbs( $breadcrumbs ) - #end -
    - #end - #elseif ( $versionPosition.equalsIgnoreCase( $position ) ) - #if ( ( $versionPosition.equalsIgnoreCase( "right" ) ) || ( $versionPosition.equalsIgnoreCase( "bottom" ) ) ) - $prefix $i18n.getString( "site-renderer", $locale, "template.version" ): ${project.version} - #elseif ( ( $versionPosition.equalsIgnoreCase( "navigation-bottom" ) ) || ( $versionPosition.equalsIgnoreCase( "navigation-top" ) ) ) -
    - $i18n.getString( "site-renderer", $locale, "template.version" ): ${project.version} -
    - #elseif ( $versionPosition.equalsIgnoreCase("left") ) -
    - $i18n.getString( "site-renderer", $locale, "template.version" ): ${project.version} - #if ( $breadcrumbs && $breadcrumbs.size() > 0 ) - | #breadcrumbs( $breadcrumbs ) - #end -
    - #end - #elseif ( $position.equalsIgnoreCase( "left" ) ) - #if ( $breadcrumbs && $breadcrumbs.size() > 0 ) -
    - #breadcrumbs( $breadcrumbs ) -
    - #end - #end -#end -## -#macro ( poweredByLogo $poweredBy ) - #if( $poweredBy ) - #foreach ($item in $poweredBy) - #if( $item.href ) - #set ( $href = $PathTool.calculateLink( $item.href, $relativePath ) ) - #set ( $href = $href.replaceAll( "\\", "/" ) ) - #else - #set ( $href="http://maven.apache.org/" ) - #end -## - #if( $item.name ) - #set ( $name = $item.name ) - #else - #set ( $name = $i18n.getString( "site-renderer", $locale, "template.builtby" ) ) - #set ( $name = "${name} Maven" ) - #end -## - #if( $item.img ) - #set ( $img = $item.img ) - #else - #set ( $img = "images/logos/maven-feather.png" ) - #end -## - #if ( ! ( $img.toLowerCase().startsWith("http") || $img.toLowerCase().startsWith("https") ) ) - #set ( $img = $PathTool.calculateLink( $img, $relativePath ) ) - #set ( $img = $src.replaceAll( "\\", "/" ) ) - #end -## - #if( $item.alt ) - #set ( $alt = ' alt="' + $item.alt + '"' ) - #else - #set ( $alt = ' alt="' + $name + '"' ) - #end -## - #if( $item.border ) - #set ( $border = ' border="' + $item.border + '"' ) - #else - #set ( $border = "" ) - #end -## - #if( $item.width ) - #set ( $width = ' width="' + $item.width + '"' ) - #else - #set ( $width = "" ) - #end - #if( $item.height ) - #set ( $height = ' height="' + $item.height + '"' ) - #else - #set ( $height = "" ) - #end -## - - - - #end - #if( $poweredBy.isEmpty() ) - - $i18n.getString( - - #end - #else - - $i18n.getString( - - #end -#end -## - - - - $title - - - -#foreach( $author in $authors ) - -#end -#if ( $dateCreation ) - -#end -#if ( $dateRevision ) - -#end -#if ( $locale ) - -#end - #if ( $decoration.body.head ) - #foreach( $item in $decoration.body.head.getChildren() ) - ## Workaround for DOXIA-150 due to a non-desired behaviour in p-u - ## @see org.codehaus.plexus.util.xml.Xpp3Dom#toString() - ## @see org.codehaus.plexus.util.xml.Xpp3Dom#toUnescapedString() - #set ( $documentHeader = "" ) - #set ( $documentHeader = $documentHeader.replaceAll( "\\", "" ) ) - #if ( $item.name == "script" ) - $StringUtils.replace( $item.toUnescapedString(), $documentHeader, "" ) - #else - $StringUtils.replace( $item.toString(), $documentHeader, "" ) - #end - #end - #end - ## $headContent - - - - - - -
    - -
    -
    -
    - $bodyContent -
    -
    -
    -
    -
    - - - diff --git a/src/site/site.xml b/src/site/site.xml deleted file mode 100644 index a7a88aed914..00000000000 --- a/src/site/site.xml +++ /dev/null @@ -1,68 +0,0 @@ - - - - - - - HBase - images/hbase_logo.png - http://hbase.apache.org/ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - org.apache.maven.skins - maven-stylus-skin - - diff --git a/src/site/xdoc/acid-semantics.xml b/src/site/xdoc/acid-semantics.xml deleted file mode 100644 index 9f162d82472..00000000000 --- a/src/site/xdoc/acid-semantics.xml +++ /dev/null @@ -1,232 +0,0 @@ - - - - - - - - - HBase ACID Properties - - - - -
    -

    HBase is not an ACID compliant database. However, it does guarantee certain specific - properties.

    -

    This specification enumerates the ACID properties of HBase.

    -
    -
    -

    For the sake of common vocabulary, we define the following terms:

    -
    -
    Atomicity
    -
    an operation is atomic if it either completes entirely or not at all
    - -
    Consistency
    -
    - all actions cause the table to transition from one valid state directly to another - (eg a row will not disappear during an update, etc) -
    - -
    Isolation
    -
    - an operation is isolated if it appears to complete independently of any other concurrent transaction -
    - -
    Durability
    -
    any update that reports "successful" to the client will not be lost
    - -
    Visibility
    -
    an update is considered visible if any subsequent read will see the update as having been committed
    -
    -

    - The terms must and may are used as specified by RFC 2119. - In short, the word "must" implies that, if some case exists where the statement - is not true, it is a bug. The word "may" implies that, even if the guarantee - is provided in a current release, users should not rely on it. -

    -
    -
    -
      -
    • Read APIs -
        -
      • get
      • -
      • scan
      • -
      -
    • -
    • Write APIs
    • -
        -
      • put
      • -
      • batch put
      • -
      • delete
      • -
      -
    • Combination (read-modify-write) APIs
    • -
        -
      • incrementColumnValue
      • -
      • checkAndPut
      • -
      -
    -
    - -
    - -
    - -
      -
    1. All mutations are atomic within a row. Any put will either wholely succeed or wholely fail.[3]
    2. -
        -
      1. An operation that returns a "success" code has completely succeeded.
      2. -
      3. An operation that returns a "failure" code has completely failed.
      4. -
      5. An operation that times out may have succeeded and may have failed. However, - it will not have partially succeeded or failed.
      6. -
      -
    3. This is true even if the mutation crosses multiple column families within a row.
    4. -
    5. APIs that mutate several rows will _not_ be atomic across the multiple rows. - For example, a multiput that operates on rows 'a','b', and 'c' may return having - mutated some but not all of the rows. In such cases, these APIs will return a list - of success codes, each of which may be succeeded, failed, or timed out as described above.
    6. -
    7. The checkAndPut API happens atomically like the typical compareAndSet (CAS) operation - found in many hardware architectures.
    8. -
    9. The order of mutations is seen to happen in a well-defined order for each row, with no - interleaving. For example, if one writer issues the mutation "a=1,b=1,c=1" and - another writer issues the mutation "a=2,b=2,c=2", the row must either - be "a=1,b=1,c=1" or "a=2,b=2,c=2" and must not be something - like "a=1,b=2,c=1".
    10. -
        -
      1. Please note that this is not true _across rows_ for multirow batch mutations.
      2. -
      -
    -
    -
    -
      -
    1. All rows returned via any access API will consist of a complete row that existed at - some point in the table's history.
    2. -
    3. This is true across column families - i.e a get of a full row that occurs concurrent - with some mutations 1,2,3,4,5 will return a complete row that existed at some point in time - between mutation i and i+1 for some i between 1 and 5.
    4. -
    5. The state of a row will only move forward through the history of edits to it.
    6. -
    - -
    -

    - A scan is not a consistent view of a table. Scans do - not exhibit snapshot isolation. -

    -

    - Rather, scans have the following properties: -

    - -
      -
    1. - Any row returned by the scan will be a consistent view (i.e. that version - of the complete row existed at some point in time) [1] -
    2. -
    3. - A scan will always reflect a view of the data at least as new as - the beginning of the scan. This satisfies the visibility guarantees - enumerated below.
    4. -
        -
      1. For example, if client A writes data X and then communicates via a side - channel to client B, any scans started by client B will contain data at least - as new as X.
      2. -
      3. A scan _must_ reflect all mutations committed prior to the construction - of the scanner, and _may_ reflect some mutations committed subsequent to the - construction of the scanner.
      4. -
      5. Scans must include all data written prior to the scan (except in - the case where data is subsequently mutated, in which case it _may_ reflect - the mutation)
      6. -
      -
    -

    - Those familiar with relational databases will recognize this isolation level as "read committed". -

    -

    - Please note that the guarantees listed above regarding scanner consistency - are referring to "transaction commit time", not the "timestamp" - field of each cell. That is to say, a scanner started at time t may see edits - with a timestamp value greater than t, if those edits were committed with a - "forward dated" timestamp before the scanner was constructed. -

    -
    -
    -
    -
      -
    1. When a client receives a "success" response for any mutation, that - mutation is immediately visible to both that client and any client with whom it - later communicates through side channels. [3]
    2. -
    3. A row must never exhibit so-called "time-travel" properties. That - is to say, if a series of mutations moves a row sequentially through a series of - states, any sequence of concurrent reads will return a subsequence of those states.
    4. -
        -
      1. For example, if a row's cells are mutated using the "incrementColumnValue" - API, a client must never see the value of any cell decrease.
      2. -
      3. This is true regardless of which read API is used to read back the mutation.
      4. -
      -
    5. Any version of a cell that has been returned to a read operation is guaranteed to - be durably stored.
    6. -
    - -
    -
    -
      -
    1. All visible data is also durable data. That is to say, a read will never return - data that has not been made durable on disk[2]
    2. -
    3. Any operation that returns a "success" code (eg does not throw an exception) - will be made durable.[3]
    4. -
    5. Any operation that returns a "failure" code will not be made durable - (subject to the Atomicity guarantees above)
    6. -
    7. All reasonable failure scenarios will not affect any of the guarantees of this document.
    8. - -
    -
    -
    -

    All of the above guarantees must be possible within HBase. For users who would like to trade - off some guarantees for performance, HBase may offer several tuning options. For example:

    -
      -
    • Visibility may be tuned on a per-read basis to allow stale reads or time travel.
    • -
    • Durability may be tuned to only flush data to disk on a periodic basis
    • -
    -
    -
    -
    -

    - For more information, see the client architecture or data model sections in the HBase Reference Guide. -

    -
    - -
    -

    [1] A consistent view is not guaranteed intra-row scanning -- i.e. fetching a portion of - a row in one RPC then going back to fetch another portion of the row in a subsequent RPC. - Intra-row scanning happens when you set a limit on how many values to return per Scan#next - (See Scan#setBatch(int)). -

    - -

    [2] In the context of HBase, "durably on disk" implies an hflush() call on the transaction - log. This does not actually imply an fsync() to magnetic media, but rather just that the data has been - written to the OS cache on all replicas of the log. In the case of a full datacenter power loss, it is - possible that the edits are not truly durable.

    -

    [3] Puts will either wholely succeed or wholely fail, provided that they are actually sent - to the RegionServer. If the writebuffer is used, Puts will not be sent until the writebuffer is filled - or it is explicitly flushed.

    - -
    - - -
    diff --git a/src/site/xdoc/bulk-loads.xml b/src/site/xdoc/bulk-loads.xml deleted file mode 100644 index 450a98d15db..00000000000 --- a/src/site/xdoc/bulk-loads.xml +++ /dev/null @@ -1,31 +0,0 @@ - - - - - - Bulk Loads in HBase - - - -

    This page has been retired. The contents have been moved to the - Bulk Loading section - in the Reference Guide. -

    - -
    diff --git a/src/site/xdoc/cygwin.xml b/src/site/xdoc/cygwin.xml deleted file mode 100644 index 01dd0bceb6c..00000000000 --- a/src/site/xdoc/cygwin.xml +++ /dev/null @@ -1,242 +0,0 @@ - - - - - Installing HBase on Windows using Cygwin - - - -
    -

    HBase is a distributed, column-oriented store, modeled after Google's BigTable. HBase is built on top of Hadoop for its MapReduce and distributed file system implementation. All these projects are open-source and part of the Apache Software Foundation.

    - -

    As being distributed, large scale platforms, the Hadoop and HBase projects mainly focus on *nix environments for production installations. However, being developed in Java, both projects are fully portable across platforms and, hence, also to the Windows operating system. For ease of development the projects rely on Cygwin to have a *nix-like environment on Windows to run the shell scripts.

    -
    -
    -

    This document explains the intricacies of running HBase on Windows using Cygwin as an all-in-one single-node installation for testing and development. The HBase Overview and QuickStart guides on the other hand go a long way in explaning how to setup HBase in more complex deployment scenario's.

    -
    - -
    -

    For running HBase on Windows, 3 technologies are required: Java, Cygwin and SSH. The following paragraphs detail the installation of each of the aforementioned technologies.

    -
    -

    HBase depends on the Java Platform, Standard Edition, 6 Release. So the target system has to be provided with at least the Java Runtime Environment (JRE); however if the system will also be used for development, the Jave Development Kit (JDK) is preferred. You can download the latest versions for both from Sun's download page. Installation is a simple GUI wizard that guides you through the process.

    -
    -
    -

    Cygwin is probably the oddest technology in this solution stack. It provides a dynamic link library that emulates most of a *nix environment on Windows. On top of that a whole bunch of the most common *nix tools are supplied. Combined, the DLL with the tools form a very *nix-alike environment on Windows.

    - -

    For installation, Cygwin provides the setup.exe utility that tracks the versions of all installed components on the target system and provides the mechanism for installing or updating everything from the mirror sites of Cygwin.

    - -

    To support installation, the setup.exe utility uses 2 directories on the target system. The Root directory for Cygwin (defaults to C:\cygwin) which will become / within the eventual Cygwin installation; and the Local Package directory (e.g. C:\cygsetup that is the cache where setup.exe stores the packages before they are installed. The cache must not be the same folder as the Cygwin root.

    - -

    Perform following steps to install Cygwin, which are elaboratly detailed in the 2nd chapter of the Cygwin User's Guide:

    - -
      -
    1. Make sure you have Administrator privileges on the target system.
    2. -
    3. Choose and create you Root and Local Package directories. A good suggestion is to use C:\cygwin\root and C:\cygwin\setup folders.
    4. -
    5. Download the setup.exe utility and save it to the Local Package directory.
    6. -
    7. Run the setup.exe utility, -
        -
      1. Choose the Install from Internet option,
      2. -
      3. Choose your Root and Local Package folders
      4. -
      5. and select an appropriate mirror.
      6. -
      7. Don't select any additional packages yet, as we only want to install Cygwin for now.
      8. -
      9. Wait for download and install
      10. -
      11. Finish the installation
      12. -
      -
    8. -
    9. Optionally, you can now also add a shortcut to your Start menu pointing to the setup.exe utility in the Local Package folder.
    10. -
    11. Add CYGWIN_HOME system-wide environment variable that points to your Root directory.
    12. -
    13. Add %CYGWIN_HOME%\bin to the end of your PATH environment variable.
    14. -
    15. Reboot the sytem after making changes to the environment variables otherwise the OS will not be able to find the Cygwin utilities.
    16. -
    17. Test your installation by running your freshly created shortcuts or the Cygwin.bat command in the Root folder. You should end up in a terminal window that is running a Bash shell. Test the shell by issuing following commands: -
        -
      1. cd / should take you to thr Root directory in Cygwin;
      2. -
      3. the LS commands that should list all files and folders in the current directory.
      4. -
      5. Use the exit command to end the terminal.
      6. -
      -
    18. -
    19. When needed, to uninstall Cygwin you can simply delete the Root and Local Package directory, and the shortcuts that were created during installation.
    20. -
    -
    -
    -

    HBase (and Hadoop) rely on SSH for interprocess/-node communication and launching remote commands. SSH will be provisioned on the target system via Cygwin, which supports running Cygwin programs as Windows services!

    - -
      -
    1. Rerun the setup.exe utility.
    2. -
    3. Leave all parameters as is, skipping through the wizard using the Next button until the Select Packages panel is shown.
    4. -
    5. Maximize the window and click the View button to toggle to the list view, which is ordered alfabetically on Package, making it easier to find the packages we'll need.
    6. -
    7. Select the following packages by clicking the status word (normally Skip) so it's marked for installation. Use the Next button to download and install the packages. -
        -
      1. OpenSSH
      2. -
      3. tcp_wrappers
      4. -
      5. diffutils
      6. -
      7. zlib
      8. -
      -
    8. -
    9. Wait for the install to complete and finish the installation.
    10. -
    -
    -
    -

    Download the latest release of HBase from the website. As the HBase distributable is just a zipped archive, installation is as simple as unpacking the archive so it ends up in its final installation directory. Notice that HBase has to be installed in Cygwin and a good directory suggestion is to use /usr/local/ (or [Root directory]\usr\local in Windows slang). You should end up with a /usr/local/hbase-<version> installation in Cygwin.

    - -This finishes installation. We go on with the configuration. -
    -
    -
    -

    There are 3 parts left to configure: Java, SSH and HBase itself. Following paragraphs explain eacht topic in detail.

    -
    -

    One important thing to remember in shell scripting in general (i.e. *nix and Windows) is that managing, manipulating and assembling path names that contains spaces can be very hard, due to the need to escape and quote those characters and strings. So we try to stay away from spaces in path names. *nix environments can help us out here very easily by using symbolic links.

    - -
      -
    1. Create a link in /usr/local to the Java home directory by using the following command and substituting the name of your chosen Java environment: -
      LN -s /cygdrive/c/Program\ Files/Java/<jre name> /usr/local/<jre name>
      -
    2. -
    3. Test your java installation by changing directories to your Java folder CD /usr/local/<jre name> and issueing the command ./bin/java -version. This should output your version of the chosen JRE.
    4. -
    -
    -
    -SSH -

    Configuring SSH is quite elaborate, but primarily a question of launching it by default as a Windows service.

    - -
      -
    1. On Windows Vista and above make sure you run the Cygwin shell with elevated privileges, by right-clicking on the shortcut an using Run as Administrator.
    2. -
    3. First of all, we have to make sure the rights on some crucial files are correct. Use the commands underneath. You can verify all rights by using the LS -L command on the different files. Also, notice the auto-completion feature in the shell using <TAB> is extremely handy in these situations. -
        -
      1. chmod +r /etc/passwd to make the passwords file readable for all
      2. -
      3. chmod u+w /etc/passwd to make the passwords file writable for the owner
      4. -
      5. chmod +r /etc/group to make the groups file readable for all
      6. -
      -
        -
      1. chmod u+w /etc/group to make the groups file writable for the owner
      2. -
      -
        -
      1. chmod 755 /var to make the var folder writable to owner and readable and executable to all
      2. -
      -
    4. -
    5. Edit the /etc/hosts.allow file using your favorite editor (why not VI in the shell!) and make sure the following two lines are in there before the PARANOID line: -
        -
      1. ALL : localhost 127.0.0.1/32 : allow
      2. -
      3. ALL : [::1]/128 : allow
      4. -
      -
    6. -
    7. Next we have to configure SSH by using the script ssh-host-config -
        -
      1. If this script asks to overwrite an existing /etc/ssh_config, answer yes.
      2. -
      3. If this script asks to overwrite an existing /etc/sshd_config, answer yes.
      4. -
      5. If this script asks to use privilege separation, answer yes.
      6. -
      7. If this script asks to install sshd as a service, answer yes. Make sure you started your shell as Adminstrator!
      8. -
      9. If this script asks for the CYGWIN value, just <enter> as the default is ntsec.
      10. -
      11. If this script asks to create the sshd account, answer yes.
      12. -
      13. If this script asks to use a different user name as service account, answer no as the default will suffice.
      14. -
      15. If this script asks to create the cyg_server account, answer yes. Enter a password for the account.
      16. -
      -
    8. -
    9. Start the SSH service using net start sshd or cygrunsrv --start sshd. Notice that cygrunsrv is the utility that make the process run as a Windows service. Confirm that you see a message stating that the CYGWIN sshd service was started succesfully.
    10. -
    11. Harmonize Windows and Cygwin user account by using the commands: -
        -
      1. mkpasswd -cl > /etc/passwd
      2. -
      3. mkgroup --local > /etc/group
      4. -
      -
    12. -
    13. Test the installation of SSH: -
        -
      1. Open a new Cygwin terminal
      2. -
      3. Use the command whoami to verify your userID
      4. -
      5. Issue an ssh localhost to connect to the system itself -
          -
        1. Answer yes when presented with the server's fingerprint
        2. -
        3. Issue your password when prompted
        4. -
        5. test a few commands in the remote session
        6. -
        7. The exit command should take you back to your first shell in Cygwin
        8. -
        -
      6. -
      7. Exit should terminate the Cygwin shell.
      8. -
      -
    14. -
    -
    -
    -If all previous configurations are working properly, we just need some tinkering at the HBase config files to properly resolve on Windows/Cygwin. All files and paths referenced here start from the HBase [installation directory] as working directory. -
      -
    1. HBase uses the ./conf/hbase-env.sh to configure its dependencies on the runtime environment. Copy and uncomment following lines just underneath their original, change them to fit your environemnt. They should read something like: -
        -
      1. export JAVA_HOME=/usr/local/<jre name>
      2. -
      3. export HBASE_IDENT_STRING=$HOSTNAME as this most likely does not inlcude spaces.
      4. -
      -
    2. -
    3. HBase uses the ./conf/hbase-default.xml file for configuration. Some properties do not resolve to existing directories because the JVM runs on Windows. This is the major issue to keep in mind when working with Cygwin: within the shell all paths are *nix-alike, hence relative to the root /. However, every parameter that is to be consumed within the windows processes themself, need to be Windows settings, hence C:\-alike. Change following propeties in the configuration file, adjusting paths where necessary to conform with your own installation: -
        -
      1. hbase.rootdir must read e.g. file:///C:/cygwin/root/tmp/hbase/data
      2. -
      3. hbase.tmp.dir must read C:/cygwin/root/tmp/hbase/tmp
      4. -
      5. hbase.zookeeper.quorum must read 127.0.0.1 because for some reason localhost doesn't seem to resolve properly on Cygwin.
      6. -
      -
    4. -
    5. Make sure the configured hbase.rootdir and hbase.tmp.dir directories exist and have the proper rights set up e.g. by issuing a chmod 777 on them.
    6. -
    -
    -
    -
    -Testing -

    -This should conclude the installation and configuration of HBase on Windows using Cygwin. So it's time to test it. -

      -
    1. Start a Cygwin terminal, if you haven't already.
    2. -
    3. Change directory to HBase installation using CD /usr/local/hbase-<version>, preferably using auto-completion.
    4. -
    5. Start HBase using the command ./bin/start-hbase.sh -
        -
      1. When prompted to accept the SSH fingerprint, answer yes.
      2. -
      3. When prompted, provide your password. Maybe multiple times.
      4. -
      5. When the command completes, the HBase server should have started.
      6. -
      7. However, to be absolutely certain, check the logs in the ./logs directory for any exceptions.
      8. -
      -
    6. -
    7. Next we start the HBase shell using the command ./bin/hbase shell
    8. -
    9. We run some simple test commands -
        -
      1. Create a simple table using command create 'test', 'data'
      2. -
      3. Verify the table exists using the command list
      4. -
      5. Insert data into the table using e.g. -
        put 'test', 'row1', 'data:1', 'value1'
        -put 'test', 'row2', 'data:2', 'value2'
        -put 'test', 'row3', 'data:3', 'value3'
        -
      6. -
      7. List all rows in the table using the command scan 'test' that should list all the rows previously inserted. Notice how 3 new columns where added without changing the schema!
      8. -
      9. Finally we get rid of the table by issuing disable 'test' followed by drop 'test' and verified by list which should give an empty listing.
      10. -
      -
    10. -
    11. Leave the shell by exit
    12. -
    13. To stop the HBase server issue the ./bin/stop-hbase.sh command. And wait for it to complete!!! Killing the process might corrupt your data on disk.
    14. -
    15. In case of problems, -
        -
      1. verify the HBase logs in the ./logs directory.
      2. -
      3. Try to fix the problem
      4. -
      5. Get help on the forums or IRC (#hbase@freenode.net). People are very active and keen to help out!
      6. -
      7. Stopr, restart and retest the server.
      8. -
      -
    16. -
    -

    -
    - -
    -

    -Now your HBase server is running, start coding and build that next killer app on this particular, but scalable datastore! -

    -
    - -
    diff --git a/src/site/xdoc/index.xml b/src/site/xdoc/index.xml deleted file mode 100644 index f92ff233009..00000000000 --- a/src/site/xdoc/index.xml +++ /dev/null @@ -1,77 +0,0 @@ - - - - - HBase Home - - - - -
    -

    HBase is the Hadoop database. Think of it as a distributed, scalable, big data store. -

    -

    When Would I Use HBase?

    -

    - Use HBase when you need random, realtime read/write access to your Big Data. - This project's goal is the hosting of very large tables -- billions of rows X millions of columns -- atop clusters of commodity hardware. -HBase is an open-source, distributed, versioned, column-oriented store modeled after Google's Bigtable: A Distributed Storage System for Structured Data by Chang et al. - Just as Bigtable leverages the distributed data storage provided by the Google File System, HBase provides Bigtable-like capabilities on top of Hadoop and HDFS. -

    -

    Features

    -

    -

      -
    • Linear and modular scalability. -
    • -
    • Strictly consistent reads and writes. -
    • -
    • Automatic and configurable sharding of tables -
    • -
    • Automatic failover support between RegionServers. -
    • -
    • Convenient base classes for backing Hadoop MapReduce jobs with HBase tables. -
    • -
    • Easy to use Java API for client access. -
    • -
    • Block cache and Bloom Filters for real-time queries. -
    • -
    • Query predicate push down via server side Filters -
    • -
    • Thrift gateway and a REST-ful Web service that supports XML, Protobuf, and binary data encoding options -
    • -
    • Extensible jruby-based (JIRB) shell -
    • -
    • Support for exporting metrics via the Hadoop metrics subsystem to files or Ganglia; or via JMX -
    • -
    -

    -

    Where Can I Get More Information?

    -

    See the Architecture Overview, the Apache HBase Reference Guide FAQ, - and the other documentation links on the left! -

    -
    -
    -

    June 15th, 2012 Birds-of-a-feather in San Jose, day after Hadoop Summit

    -

    May 23rd, 2012 HackConAthon in Palo Alto

    -

    May 22nd, 2012 HBaseCon2012 in San Francisco

    - -

    Old News

    -
    - - -
    diff --git a/src/site/xdoc/metrics.xml b/src/site/xdoc/metrics.xml deleted file mode 100644 index ba889033314..00000000000 --- a/src/site/xdoc/metrics.xml +++ /dev/null @@ -1,147 +0,0 @@ - - - - - - HBase Metrics - - - - -
    -

    - HBase emits Hadoop metrics. -

    -
    -
    -

    First read up on Hadoop metrics. - If you are using ganglia, the GangliaMetrics - wiki page is useful read.

    -

    To have HBase emit metrics, edit $HBASE_HOME/conf/hadoop-metrics.properties - and enable metric 'contexts' per plugin. As of this writing, hadoop supports - file and ganglia plugins. - Yes, the hbase metrics files is named hadoop-metrics rather than - hbase-metrics because currently at least the hadoop metrics system has the - properties filename hardcoded. Per metrics context, - comment out the NullContext and enable one or more plugins instead. -

    -

    - If you enable the hbase context, on regionservers you'll see total requests since last - metric emission, count of regions and storefiles as well as a count of memstore size. - On the master, you'll see a count of the cluster's requests. -

    -

    - Enabling the rpc context is good if you are interested in seeing - metrics on each hbase rpc method invocation (counts and time taken). -

    -

    - The jvm context is - useful for long-term stats on running hbase jvms -- memory used, thread counts, etc. - As of this writing, if more than one jvm is running emitting metrics, at least - in ganglia, the stats are aggregated rather than reported per instance. -

    -
    - -
    -

    - In addition to the standard output contexts supported by the Hadoop - metrics package, you can also export HBase metrics via Java Management - Extensions (JMX). This will allow viewing HBase stats in JConsole or - any other JMX client. -

    -
    -

    - To enable JMX support in HBase, first edit - $HBASE_HOME/conf/hadoop-metrics.properties to support - metrics refreshing. (If you've running 0.94.1 and above, or have already configured - hadoop-metrics.properties for another output context, - you can skip this step). -

    - -# Configuration of the "hbase" context for null -hbase.class=org.apache.hadoop.metrics.spi.NullContextWithUpdateThread -hbase.period=60 - -# Configuration of the "jvm" context for null -jvm.class=org.apache.hadoop.metrics.spi.NullContextWithUpdateThread -jvm.period=60 - -# Configuration of the "rpc" context for null -rpc.class=org.apache.hadoop.metrics.spi.NullContextWithUpdateThread -rpc.period=60 - -
    -
    -

    - For remote access, you will need to configure JMX remote passwords - and access profiles. Create the files: -

    -
    -
    $HBASE_HOME/conf/jmxremote.passwd (set permissions - to 600)
    -
    - -monitorRole monitorpass -controlRole controlpass - -
    - -
    $HBASE_HOME/conf/jmxremote.access
    -
    - -monitorRole readonly -controlRole readwrite - -
    -
    -
    -
    -

    - Finally, edit the $HBASE_HOME/conf/hbase-env.sh - script to add JMX support: -

    -
    -
    $HBASE_HOME/conf/hbase-env.sh
    -
    -

    Add the lines:

    - -HBASE_JMX_OPTS="-Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.ssl=false" -HBASE_JMX_OPTS="$HBASE_JMX_OPTS -Dcom.sun.management.jmxremote.password.file=$HBASE_HOME/conf/jmxremote.passwd" -HBASE_JMX_OPTS="$HBASE_JMX_OPTS -Dcom.sun.management.jmxremote.access.file=$HBASE_HOME/conf/jmxremote.access" - -export HBASE_MASTER_OPTS="$HBASE_JMX_OPTS -Dcom.sun.management.jmxremote.port=10101" -export HBASE_REGIONSERVER_OPTS="$HBASE_JMX_OPTS -Dcom.sun.management.jmxremote.port=10102" - -
    -
    -

    - After restarting the processes you want to monitor, you should now be - able to run JConsole (included with the JDK since JDK 5.0) to view - the statistics via JMX. HBase MBeans are exported under the - hadoop domain in JMX. -

    -
    -
    -

    - For more information on understanding HBase metrics, see the metrics section in the HBase Reference Guide. -

    -
    -
    - -
    diff --git a/src/site/xdoc/old_news.xml b/src/site/xdoc/old_news.xml deleted file mode 100644 index 3ed0173ff53..00000000000 --- a/src/site/xdoc/old_news.xml +++ /dev/null @@ -1,66 +0,0 @@ - - - - - - - - - Old News - - - -
    -

    March 27th, 2012 Meetup @ StumbleUpon in San Francisco

    - -

    January 19th, 2012 Meetup @ EBay

    -

    January 23rd, 2012 HBase 0.92.0 released. Download it!

    -

    December 23rd, 2011 HBase 0.90.5 released. Download it!

    -

    November 29th, 2011 Developer Pow-Wow in SF at Salesforce HQ

    -

    November 7th, 2011 HBase Meetup in NYC (6PM) at the AppNexus office

    -

    August 22nd, 2011 HBase Hackathon (11AM) and Meetup (6PM) at FB in PA

    -

    June 30th, 2011 HBase Contributor Day, the day after the Hadoop Summit hosted by Y!

    -

    June 8th, 2011 HBase Hackathon in Berlin to coincide with Berlin Buzzwords

    -

    May 19th, 2011 HBase 0.90.3 released. Download it!

    -

    April 12th, 2011 HBase 0.90.2 released. Download it!

    -

    March 21st, HBase 0.92 Hackathon at StumbleUpon, SF

    -

    February 22nd, HUG12: February HBase User Group at StumbleUpon SF

    -

    December 13th, HBase Hackathon: Coprocessor Edition

    -

    November 19th, Hadoop HUG in London is all about HBase

    -

    November 15-19th, Devoxx features HBase Training and multiple HBase presentations

    -

    October 12th, HBase-related presentations by core contributors and users at Hadoop World 2010

    -

    October 11th, HUG-NYC: HBase User Group NYC Edition (Night before Hadoop World)

    -

    June 30th, HBase Contributor Workshop (Day after Hadoop Summit)

    -

    May 10th, 2010: HBase graduates from Hadoop sub-project to Apache Top Level Project

    -

    Signup for HBase User Group Meeting, HUG10 hosted by Trend Micro, April 19th, 2010

    - -

    HBase User Group Meeting, HUG9 hosted by Mozilla, March 10th, 2010

    -

    Sign up for the HBase User Group Meeting, HUG8, January 27th, 2010 at StumbleUpon in SF

    -

    September 8th, 2010: HBase 0.20.0 is faster, stronger, slimmer, and sweeter tasting than any previous HBase release. Get it off the Releases page.

    -

    ApacheCon in Oakland: November 2-6th, 2009: - The Apache Foundation will be celebrating its 10th anniversary in beautiful Oakland by the Bay. Lots of good talks and meetups including an HBase presentation by a couple of the lads.

    -

    HBase at Hadoop World in NYC: October 2nd, 2009: A few of us will be talking on Practical HBase out east at Hadoop World: NYC.

    -

    HUG7 and HBase Hackathon: August 7th-9th, 2009 at StumbleUpon in SF: Sign up for the HBase User Group Meeting, HUG7 or for the Hackathon or for both (all are welcome!).

    -

    June, 2009 -- HBase at HadoopSummit2009 and at NOSQL: See the presentations

    -

    March 3rd, 2009 -- HUG6: HBase User Group 6

    -

    January 30th, 2009 -- LA Hbackathon:HBase January Hackathon Los Angeles at Streamy in Manhattan Beach

    -
    - -
    diff --git a/src/site/xdoc/pseudo-distributed.xml b/src/site/xdoc/pseudo-distributed.xml deleted file mode 100644 index 3fdfde9fe3d..00000000000 --- a/src/site/xdoc/pseudo-distributed.xml +++ /dev/null @@ -1,39 +0,0 @@ - - - - - - - - -Running HBase in pseudo-distributed mode - - - - -

    This page has been retired. The contents have been moved to the - Distributed Operation: Pseudo- and Fully-distributed modes section - in the Reference Guide. -

    - - - -
    - diff --git a/src/site/xdoc/replication.xml b/src/site/xdoc/replication.xml deleted file mode 100644 index 8233520eb70..00000000000 --- a/src/site/xdoc/replication.xml +++ /dev/null @@ -1,401 +0,0 @@ - - - - - - - - - HBase Replication - - - -
    -

    - HBase replication is a way to copy data between HBase deployments. It - can serve as a disaster recovery solution and can contribute to provide - higher availability at the HBase layer. It can also serve more practically; - for example, as a way to easily copy edits from a web-facing cluster to a "MapReduce" - cluster which will process old and new data and ship back the results - automatically. -

    -

    - The basic architecture pattern used for HBase replication is (HBase cluster) master-push; - it is much easier to keep track of what’s currently being replicated since - each region server has its own write-ahead-log (aka WAL or HLog), just like - other well known solutions like MySQL master/slave replication where - there’s only one bin log to keep track of. One master cluster can - replicate to any number of slave clusters, and each region server will - participate to replicate their own stream of edits. For more information - on the different properties of master/slave replication and other types - of replication, please consult - How Google Serves Data From Multiple Datacenters. -

    -

    - The replication is done asynchronously, meaning that the clusters can - be geographically distant, the links between them can be offline for - some time, and rows inserted on the master cluster won’t be - available at the same time on the slave clusters (eventual consistency). -

    -

    - The replication format used in this design is conceptually the same as - - MySQL’s statement-based replication . Instead of SQL statements, whole - WALEdits (consisting of multiple cell inserts coming from the clients' - Put and Delete) are replicated in order to maintain atomicity. -

    -

    - The HLogs from each region server are the basis of HBase replication, - and must be kept in HDFS as long as they are needed to replicate data - to any slave cluster. Each RS reads from the oldest log it needs to - replicate and keeps the current position inside ZooKeeper to simplify - failure recovery. That position can be different for every slave - cluster, same for the queue of HLogs to process. -

    -

    - The clusters participating in replication can be of asymmetric sizes - and the master cluster will do its “best effort” to balance the stream - of replication on the slave clusters by relying on randomization. -

    -

    - As of version 0.92 HBase supports master/master and cyclic replication as - well as replication to multiple slaves. -

    - -
    -
    -

    - The guide on enabling and using cluster replication is contained - in the API documentation shipped with your HBase distribution. -

    -

    - The most up-to-date documentation is - - available at this address. -

    -
    -
    -

    - The following sections describe the life of a single edit going from a - client that communicates with a master cluster all the way to a single - slave cluster. -

    -
    -

    - The client uses a HBase API that sends a Put, Delete or ICV to a region - server. The key values are transformed into a WALEdit by the region - server and is inspected by the replication code that, for each family - that is scoped for replication, adds the scope to the edit. The edit - is appended to the current WAL and is then applied to its MemStore. -

    -

    - In a separate thread, the edit is read from the log (as part of a batch) - and only the KVs that are replicable are kept (that is, that they are part - of a family scoped GLOBAL in the family's schema, non-catalog so not - .META. or -ROOT-, and did not originate in the target slave cluster - in - case of cyclic replication). -

    -

    - The edit is then tagged with the master's cluster UUID. - When the buffer is filled, or the reader hits the end of the file, - the buffer is sent to a random region server on the slave cluster. -

    -

    - Synchronously, the region server that receives the edits reads them - sequentially and separates each of them into buffers, one per table. - Once all edits are read, each buffer is flushed using the normal HBase - client (HTables managed by a HTablePool). This is done in order to - leverage parallel insertion (MultiPut). - The master's cluster UUID is retained in the edits applied at the - slave cluster in order to allow cyclic replication. -

    -

    - Back in the master cluster's region server, the offset for the current - WAL that's being replicated is registered in ZooKeeper. -

    -
    -
    -

    - The edit is inserted in the same way. -

    -

    - In the separate thread, the region server reads, filters and buffers - the log edits the same way as during normal processing. The slave - region server that's contacted doesn't answer to the RPC, so the master - region server will sleep and retry up to a configured number of times. - If the slave RS still isn't available, the master cluster RS will select a - new subset of RS to replicate to and will retry sending the buffer of - edits. -

    -

    - In the mean time, the WALs will be rolled and stored in a queue in - ZooKeeper. Logs that are archived by their region server (archiving is - basically moving a log from the region server's logs directory to a - central logs archive directory) will update their paths in the in-memory - queue of the replicating thread. -

    -

    - When the slave cluster is finally available, the buffer will be applied - the same way as during normal processing. The master cluster RS will then - replicate the backlog of logs. -

    -
    -
    -
    -

    - This section describes in depth how each of replication's internal - features operate. -

    -
    -

    - When a master cluster RS initiates a replication source to a slave cluster, - it first connects to the slave's ZooKeeper ensemble using the provided - cluster key (that key is composed of the value of hbase.zookeeper.quorum, - zookeeper.znode.parent and hbase.zookeeper.property.clientPort). It - then scans the "rs" directory to discover all the available sinks - (region servers that are accepting incoming streams of edits to replicate) - and will randomly choose a subset of them using a configured - ratio (which has a default value of 10%). For example, if a slave - cluster has 150 machines, 15 will be chosen as potential recipient for - edits that this master cluster RS will be sending. Since this is done by all - master cluster RSs, the probability that all slave RSs are used is very high, - and this method works for clusters of any size. For example, a master cluster - of 10 machines replicating to a slave cluster of 5 machines with a ratio - of 10% means that the master cluster RSs will choose one machine each - at random, thus the chance of overlapping and full usage of the slave - cluster is higher. -

    -
    -
    -

    - Every master cluster RS has its own znode in the replication znodes hierarchy. - It contains one znode per peer cluster (if 5 slave clusters, 5 znodes - are created), and each of these contain a queue - of HLogs to process. Each of these queues will track the HLogs created - by that RS, but they can differ in size. For example, if one slave - cluster becomes unavailable for some time then the HLogs should not be deleted, - thus they need to stay in the queue (while the others are processed). - See the section named "Region server failover" for an example. -

    -

    - When a source is instantiated, it contains the current HLog that the - region server is writing to. During log rolling, the new file is added - to the queue of each slave cluster's znode just before it's made available. - This ensures that all the sources are aware that a new log exists - before HLog is able to append edits into it, but this operations is - now more expensive. - The queue items are discarded when the replication thread cannot read - more entries from a file (because it reached the end of the last block) - and that there are other files in the queue. - This means that if a source is up-to-date and replicates from the log - that the region server writes to, reading up to the "end" of the - current file won't delete the item in the queue. -

    -

    - When a log is archived (because it's not used anymore or because there's - too many of them per hbase.regionserver.maxlogs typically because insertion - rate is faster than region flushing), it will notify the source threads that the path - for that log changed. If the a particular source was already done with - it, it will just ignore the message. If it's in the queue, the path - will be updated in memory. If the log is currently being replicated, - the change will be done atomically so that the reader doesn't try to - open the file when it's already moved. Also, moving a file is a NameNode - operation so, if the reader is currently reading the log, it won't - generate any exception. -

    -
    -
    -

    - By default, a source will try to read from a log file and ship log - entries as fast as possible to a sink. This is first limited by the - filtering of log entries; only KeyValues that are scoped GLOBAL and - that don't belong to catalog tables will be retained. A second limit - is imposed on the total size of the list of edits to replicate per slave, - which by default is 64MB. This means that a master cluster RS with 3 slaves - will use at most 192MB to store data to replicate. This doesn't account - the data filtered that wasn't garbage collected. -

    -

    - Once the maximum size of edits was buffered or the reader hits the end - of the log file, the source thread will stop reading and will choose - at random a sink to replicate to (from the list that was generated by - keeping only a subset of slave RSs). It will directly issue a RPC to - the chosen machine and will wait for the method to return. If it's - successful, the source will determine if the current file is emptied - or if it should continue to read from it. If the former, it will delete - the znode in the queue. If the latter, it will register the new offset - in the log's znode. If the RPC threw an exception, the source will retry - 10 times until trying to find a different sink. -

    -
    -
    -

    - If replication isn't enabled, the master's logs cleaning thread will - delete old logs using a configured TTL. This doesn't work well with - replication since archived logs passed their TTL may still be in a - queue. Thus, the default behavior is augmented so that if a log is - passed its TTL, the cleaning thread will lookup every queue until it - finds the log (while caching the ones it finds). If it's not found, - the log will be deleted. The next time it has to look for a log, - it will first use its cache. -

    -
    -
    -

    - As long as region servers don't fail, keeping track of the logs in ZK - doesn't add any value. Unfortunately, they do fail, so since ZooKeeper - is highly available we can count on it and its semantics to help us - managing the transfer of the queues. -

    -

    - All the master cluster RSs keep a watcher on every other one of them to be - notified when one dies (just like the master does). When it happens, - they all race to create a znode called "lock" inside the dead RS' znode - that contains its queues. The one that creates it successfully will - proceed by transferring all the queues to its own znode (one by one - since ZK doesn't support the rename operation) and will delete all the - old ones when it's done. The recovered queues' znodes will be named - with the id of the slave cluster appended with the name of the dead - server. -

    -

    - Once that is done, the master cluster RS will create one new source thread per - copied queue, and each of them will follow the read/filter/ship pattern. - The main difference is that those queues will never have new data since - they don't belong to their new region server, which means that when - the reader hits the end of the last log, the queue's znode will be - deleted and the master cluster RS will close that replication source. -

    -

    - For example, consider a master cluster with 3 region servers that's - replicating to a single slave with id '2'. The following hierarchy - represents what the znodes layout could be at some point in time. We - can see the RSs' znodes all contain a "peers" znode that contains a - single queue. The znode names in the queues represent the actual file - names on HDFS in the form "address,port.timestamp". -

    -
    -/hbase/replication/rs/
    -                      1.1.1.1,60020,123456780/
    -                          2/
    -                              1.1.1.1,60020.1234  (Contains a position)
    -                              1.1.1.1,60020.1265
    -                      1.1.1.2,60020,123456790/
    -                          2/
    -                              1.1.1.2,60020.1214  (Contains a position)
    -                              1.1.1.2,60020.1248
    -                              1.1.1.2,60020.1312
    -                      1.1.1.3,60020,    123456630/
    -                          2/
    -                              1.1.1.3,60020.1280  (Contains a position)
    -        
    -

    - Now let's say that 1.1.1.2 loses its ZK session. The survivors will race - to create a lock, and for some reasons 1.1.1.3 wins. It will then start - transferring all the queues to its local peers znode by appending the - name of the dead server. Right before 1.1.1.3 is able to clean up the - old znodes, the layout will look like the following: -

    -
    -/hbase/replication/rs/
    -                      1.1.1.1,60020,123456780/
    -                          2/
    -                              1.1.1.1,60020.1234  (Contains a position)
    -                              1.1.1.1,60020.1265
    -                      1.1.1.2,60020,123456790/
    -                          lock
    -                          2/
    -                              1.1.1.2,60020.1214  (Contains a position)
    -                              1.1.1.2,60020.1248
    -                              1.1.1.2,60020.1312
    -                      1.1.1.3,60020,123456630/
    -                          2/
    -                              1.1.1.3,60020.1280  (Contains a position)
    -
    -                          2-1.1.1.2,60020,123456790/
    -                              1.1.1.2,60020.1214  (Contains a position)
    -                              1.1.1.2,60020.1248
    -                              1.1.1.2,60020.1312
    -        
    -

    - Some time later, but before 1.1.1.3 is able to finish replicating the - last HLog from 1.1.1.2, let's say that it dies too (also some new logs - were created in the normal queues). The last RS will then try to lock - 1.1.1.3's znode and will begin transferring all the queues. The new - layout will be: -

    -
    -/hbase/replication/rs/
    -                      1.1.1.1,60020,123456780/
    -                          2/
    -                              1.1.1.1,60020.1378  (Contains a position)
    -
    -                          2-1.1.1.3,60020,123456630/
    -                              1.1.1.3,60020.1325  (Contains a position)
    -                              1.1.1.3,60020.1401
    -
    -                          2-1.1.1.2,60020,123456790-1.1.1.3,60020,123456630/
    -                              1.1.1.2,60020.1312  (Contains a position)
    -                      1.1.1.3,60020,123456630/
    -                          lock
    -                          2/
    -                              1.1.1.3,60020.1325  (Contains a position)
    -                              1.1.1.3,60020.1401
    -
    -                          2-1.1.1.2,60020,123456790/
    -                              1.1.1.2,60020.1312  (Contains a position)
    -        
    -
    -
    -
    -
    -

    - Yes, this is for much later. -

    -
    -
    -

    - You can use the HBase-provided utility called CopyTable from the package - org.apache.hadoop.hbase.mapreduce in order to have a discp-like tool to - bulk copy data. -

    -
    -
    -

    - Yes, this behavior would help a lot but it's not currently available - in HBase (BatchUpdate had that, but it was lost in the new API). -

    -
    -
    -
    -

    - Here's a list of all the jiras that relate to major issues or missing - features in the replication implementation. -

    -
      -
    1. - HBASE-2611, basically if a region server dies while recovering the - queues of another dead RS, we will miss the data from the queues - that weren't copied. -
    2. -
    -
    - -
    diff --git a/src/site/xdoc/sponsors.xml b/src/site/xdoc/sponsors.xml deleted file mode 100644 index e39730bbbd2..00000000000 --- a/src/site/xdoc/sponsors.xml +++ /dev/null @@ -1,35 +0,0 @@ - - - - - Installing HBase on Windows using Cygwin - - - -
    -

    The below companies have been gracious enough to provide their commerical tool offerings free of charge to the Apache HBase project. -

    -

    -
    - -