From c9da74ebc75827be5a9c141f3af23c2063d85f4d Mon Sep 17 00:00:00 2001 From: Michael Stack Date: Fri, 29 Oct 2010 23:53:09 +0000 Subject: [PATCH] HBASE-2406 Define semantics of cell timestamps/versions git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1028949 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 1 + pom.xml | 4 + src/docbkx/book.xml | 418 ++++++++++++++++++++++++++++++++-- src/docbkx/sample_article.xml | 57 ----- src/site/site.xml | 1 - 5 files changed, 403 insertions(+), 78 deletions(-) delete mode 100644 src/docbkx/sample_article.xml diff --git a/CHANGES.txt b/CHANGES.txt index c37848e1f6b..5958a775fe6 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -626,6 +626,7 @@ Release 0.21.0 - Unreleased (Nicolas Spiegelberg via Stack) HBASE-3172 Reverse order of AssignmentManager and MetaNodeTracker in ZooKeeperWatcher + HBASE-2406 Define semantics of cell timestamps/versions IMPROVEMENTS diff --git a/pom.xml b/pom.xml index 5645ce33324..a606f1c752e 100644 --- a/pom.xml +++ b/pom.xml @@ -255,6 +255,10 @@ true true true + book- + 100 + true + true ${basedir}/target/site/ diff --git a/src/docbkx/book.xml b/src/docbkx/book.xml index 82bc358a9a9..b7dbc261568 100644 --- a/src/docbkx/book.xml +++ b/src/docbkx/book.xml @@ -23,17 +23,373 @@ + + Introduction + + This book aims to be the official guide for the HBase version it ships with. + This document describes HBase version . + Herein you will find either the definitive documentation on an HBase topic + as of its standing when the referenced HBase version shipped, or failing + that, this book will point to the location in javadoc, + JIRA + or wiki + where the pertinent information can be found. + + This book is a work in progress. It is lacking in many areas but we + hope to fill in the holes with time. Feel free to add to this book should + you feel so inclined by adding a patch to an issue up in the HBase JIRA. + + Getting Started -
- Requirements +
+ Quick Start - First... + + Here is a quick guide to starting up a standalone HBase + instance, inserting rows into a table via the HBase Shell, and then clean up and shutting + down your instance. + + + Download and unpack the latest stable release. + + Choose a download source from Apache + Download Mirrors. Click on it. This will take you to a + mirror of the HBase Releases page. Click on + the folder named stable and then download the + file .tar.gz. + + Decompress and untar your download. Then change into the + unpacked directory and startHBase + + $ tar xfz .tar.gz +$ cd + + You now have a running HBase instance. HBase logs can be + found in the logs subdirectory. Check them + out. + + + + Connect to your running HBase via the HBase Shell + + $ ./bin/hbase shell +HBase Shell; enter 'help<RETURN>' for list of supported commands. +Type "exit<RETURN>" to leave the HBase Shell +Version: 0.89.20100924, r1001068, Fri Sep 24 13:55:42 PDT 2010 + +hbase(main):001:0> + + Type help to see a listing of shell + commands and options. Browse at least the paragraphs at the end of + the help emission for the gist of how variables are entered in the + HBase shell; in particular note how table names, rows, and + columns, etc., must be quoted. + + + + Create a table named test with a single + colum family named cf. + + hbase(main):003:0> create 'test', 'cf' +0 row(s) in 1.2200 seconds + + + + Insert some values into the table + test. + + Below we insert 3 values. The first insert is at + row1, column cf:a -- columns + have a column family prefix delimited by the colon character -- + with a value of value1. + + hbase(main):004:0> put 'test', 'row1', 'cf:a', 'value1' +0 row(s) in 0.0560 seconds +hbase(main):005:0> put 'test', 'row2', 'cf:b', 'value2' +0 row(s) in 0.0370 seconds +hbase(main):006:0> put 'test', 'row3', 'cf:c', 'value3' +0 row(s) in 0.0450 seconds + + + + Verify the table content + + Run a scan of the table by doing the following + + hbase(main):007:0> scan 'test' +ROW COLUMN+CELL +row1 column=cf:a, timestamp=1288380727188, value=value1 +row2 column=cf:b, timestamp=1288380738440, value=value2 +row3 column=cf:c, timestamp=1288380747365, value=value3 +3 row(s) in 0.0590 seconds + + Get a single row as follows + + hbase(main):008:0> get 'test', 'row1' +COLUMN CELL +cf:a timestamp=1288380727188, value=value1 +1 row(s) in 0.0400 seconds + + + + Now, disable and drop your table. This will clean up all + done above. + + hbase(main):012:0> disable 'test' +0 row(s) in 1.0930 seconds +hbase(main):013:0> drop 'test' +0 row(s) in 0.0770 seconds + + + + Exit the shell by typing exit. + + hbase(main):014:0> exit +$ + + + + Stop your hbase instance by running the stop script. + + $ ./bin/stop-hbase.sh +stopping hbase............... + + +
+ +
+ Not-so-quick Start + + The HBase API overview document contains a detailed Getting + Started with a list of requirements and description of the + different HBase run modes: standalone, what is described above in Quick Start, pseudo-distributed where all + daemons run on a single server, and distributed.
- + + Data Model + +
+ Table + + +
+ +
+ Row + + +
+ +
+ Column Family + + +
+ +
+ Versions + + A {row, column, version} tuple exactly + specifies a cell in HBase. Its possible to have an + unbounded number of cells where the row and column are the same but the + cell address differs only in its version dimension. + + While rows and column keys are expressed as bytes, the version is + specified using a long integer. Typically this long contains time + instances such as those returned by + java.util.Date.getTime() or + System.currentTimeMillis(), that is: the difference, + measured in milliseconds, between the current time and midnight, January + 1, 1970 UTC. + + The HBase version dimension is stored in decreasing order, so that + when reading from a store file, the most recent values are found + first. + + There is a lot of confusion over the semantics of + cell versions, in HBase. In particular, a couple + questions that often come up are: + + If multiple writes to a cell have the same version, are all + versions maintained or just the last? + Currently, only the last written is fetchable. + + + + + Is it OK to write cells in a non-increasing version + order? + Yes + + + + + Below we describe how the version dimension in HBase currently + works + See HBASE-2406 + for discussion of HBase versions. Bending time + in HBase makes for a good read on the version, or time, + dimension in HBase. It has more detail on versioning than is + provided here. As of this writing, the limiitation + Overwriting values at existing timestamps + mentioned in the article no longer holds in HBase. This section is + basically a synopsis of this article by Bruno Dumon. + . + +
+ Versions and HBase Operations + + In this section we look at the behavior of the version dimension + for each of the core HBase operations. + +
+ Get/Scan + + Gets are implemented on top of Scans. The below discussion of + Get applies equally to Scans. + + By default, i.e. if you specify no explicit version, when + doing a get, the cell whose version has the + largest value is returned (which may or may not be the latest one + written, see later). The default behavior can be modified in the + following ways: + + + + to return more than one version, see Get.setMaxVersions() + + + + to return versions other than the latest, see Get.setTimeRange() + + To retrieve the latest version that is less than or equal + to a given value, thus giving the 'latest' state of the record + at a certain point in time, just use a range from 0 to the + desired version and set the max versions to 1. + + +
+ +
+ Put + + Doing a put always creates a new version of a + cell, at a certain timestamp. By default the + system uses the server's currentTimeMillis, but + you can specify the version (= the long integer) yourself, on a + per-column level. This means you could assign a time in the past or + the future, or use the long value for non-time purposes. + + To overwrite an existing value, do a put at exactly the same + row, column, and version as that of the cell you would + overshadow. +
+ +
+ Delete + + When performing a delete operation in HBase, there are two + ways to specify the versions to be deleted + + + + Delete all versions older than a certain timestamp + + + + Delete the version at a specific timestamp + + + + A delete can apply to a complete row, a complete column + family, or to just one column. It is only in the last case that you + can delete explicit versions. For the deletion of a row or all the + columns within a family, it always works by deleting all cells older + than a certain version. + + Deletes work by creating tombstone + markers. For example, let's suppose we want to delete a row. For + this you can specify a version, or else by default the + currentTimeMillis is used. What this means is + delete all cells where the version is less than or equal to + this version. HBase never modifies data in place, so for + example a delete will not immediately delete (or mark as deleted) + the entries in the storage file that correspond to the delete + condition. Rather, a so-called tombstone is + written, which will mask the deleted values + When HBase does a major compaction, the tombstones are + processed to actually remove the dead values, together with the + tombstones themselves. + . If the version you specified when deleting a row is + larger than the version of any value in the row, then you can + consider the complete row to be deleted. +
+
+ +
+ Current Limitations + + There are still some bugs (or at least 'undecided behavior') + with the version dimension that will be addressed by later HBase + releases. + +
+ Deletes mask Puts + + Deletes mask puts, even puts that happened after the delete + was entered + HBASE-2256 + . Remember that a delete writes a tombstone, which only + disappears after then next major compaction has run. Suppose you do + a delete of everything <= T. After this you do a new put with a + timestamp <= T. This put, even if it happened after the delete, + will be masked by the delete tombstone. Performing the put will not + fail, but when you do a get you will notice the put did have no + effect. It will start working again after the major compaction has + run. These issues should not be a problem if you use + always-increasing versions for new puts to a row. But they can occur + even if you do not care about time: just do delete and put + immediately after each other, and there is some chance they happen + within the same millisecond. +
+ +
+ Major compactions change query results + + ...create three cell versions at t1, t2 and t3, with a + maximum-versions setting of 2. So when getting all versions, only + the values at t2 and t3 will be returned. But if you delete the + version at t2 or t3, the one at t1 will appear again. Obviously, + once a major compaction has run, such behavior will not be the case + anymore... + See Garbage Collection in Bending + time in HBase + +
+
+
+
+ + The HBase Shell @@ -63,11 +419,14 @@
- + Regions This chapter is all about Regions. + + Does this belong in the data model chapter? +
Region Size @@ -114,10 +473,11 @@
Region Transitions - - TODO: Review all of the below to ensure it matches what was - committed -- St.Ack 20100901 - + + + TODO: Review all of the below to ensure it matches what was + committed -- St.Ack 20100901 + Regions only transition in a limited set of circumstances. @@ -674,20 +1034,21 @@
+
- Region Splits - Splits run unaided on the RegionServer; i.e. the Master does not - participate. The RegionServer splits - a region, offlines the split region and then adds the daughter regions - to META, opens daughters on the parent's hosting RegionServer and then - reports the split to the master. - + Region Splits + + Splits run unaided on the RegionServer; i.e. the Master does not + participate. The RegionServer splits a region, offlines the split + region and then adds the daughter regions to META, opens daughters on + the parent's hosting RegionServer and then reports the split to the + master.
- The WAL + The WAL HBase's Write-Ahead @@ -767,7 +1128,7 @@ - Bloom Filters + Bloom Filters Bloom filters were developed over in HBase-1200 @@ -796,7 +1157,8 @@ Configurations Blooms are enabled by specifying options on a column family in the - HBase shell or in java code as specification on org.apache.hadoop.hbase.HColumnDescriptor. + HBase shell or in java code as specification on + org.apache.hadoop.hbase.HColumnDescriptor.
<code>HColumnDescriptor</code> option @@ -885,9 +1247,25 @@ - Tools + Tools Here we list HBase tools for administration, analysis, fixup, and debugging. + + + HBase Glossary + + + column family + + cf + + cf + + + Define a column family + + + diff --git a/src/docbkx/sample_article.xml b/src/docbkx/sample_article.xml deleted file mode 100644 index 00ba855e809..00000000000 --- a/src/docbkx/sample_article.xml +++ /dev/null @@ -1,57 +0,0 @@ - -
- - Wah-wah -<?eval ${project.version}?> - - - - - -
- Wah-Wah changed my life - - I was born very young... - - This is a sample docbook article. - - - - -
- Then - - -
- -
- And - - -
- -
- Later - - -
-
- -
- Good books - - -
- -
- Rainy days - - Today it was raining -
-
diff --git a/src/site/site.xml b/src/site/site.xml index 863edbe5af2..11598d9dd0f 100644 --- a/src/site/site.xml +++ b/src/site/site.xml @@ -38,7 +38,6 @@ -