HBASE-1447 Take last version of the hbase-1249 design doc. and make documentation out of it
git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@785081 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
78af4ede60
commit
363d81c247
|
@ -193,6 +193,8 @@ Release 0.20.0 - Unreleased
|
|||
HBASE-1529 familyMap not invalidated when a Result is (re)read as a
|
||||
Writable
|
||||
HBASE-1528 Ensure scanners work across memcache snapshot
|
||||
HBASE-1447 Take last version of the hbase-1249 design doc. and make
|
||||
documentation out of it
|
||||
|
||||
IMPROVEMENTS
|
||||
HBASE-1089 Add count of regions on filesystem to master UI; add percentage
|
||||
|
|
|
@ -35,95 +35,93 @@ Provides HBase Client
|
|||
|
||||
<div style="background-color: #cccccc; padding: 2px">
|
||||
<blockquote><pre>
|
||||
REPLACE!!!!!!!!
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.hadoop.hbase.HBaseConfiguration;
|
||||
import org.apache.hadoop.hbase.client.Get;
|
||||
import org.apache.hadoop.hbase.client.HTable;
|
||||
import org.apache.hadoop.hbase.client.Scanner;
|
||||
import org.apache.hadoop.hbase.io.BatchUpdate;
|
||||
import org.apache.hadoop.hbase.io.Cell;
|
||||
import org.apache.hadoop.hbase.io.RowResult;
|
||||
import org.apache.hadoop.hbase.client.Put;
|
||||
import org.apache.hadoop.hbase.client.Result;
|
||||
import org.apache.hadoop.hbase.client.ResultScanner;
|
||||
import org.apache.hadoop.hbase.client.Scan;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
|
||||
public class MyClient {
|
||||
|
||||
public static void main(String args[]) throws IOException {
|
||||
// Class that has nothing but a main.
|
||||
// Does a Put, Get and a Scan against an hbase table.
|
||||
public class MyLittleHBaseClient {
|
||||
public static void main(String[] args) throws IOException {
|
||||
// You need a configuration object to tell the client where to connect.
|
||||
// But don't worry, the defaults are pulled from the local config file.
|
||||
// When you create a HBaseConfiguration, it reads in whatever you've set
|
||||
// into your hbase-site.xml and in hbase-default.xml, as long as these can
|
||||
// be found on the CLASSPATH
|
||||
HBaseConfiguration config = new HBaseConfiguration();
|
||||
|
||||
// This instantiates an HTable object that connects you to the "myTable"
|
||||
// table.
|
||||
HTable table = new HTable(config, "myTable");
|
||||
// This instantiates an HTable object that connects you to
|
||||
// the "myLittleHBaseTable" table.
|
||||
HTable table = new HTable(config, "myLittleHBaseTable");
|
||||
|
||||
// To do any sort of update on a row, you use an instance of the BatchUpdate
|
||||
// class. A BatchUpdate takes a row and optionally a timestamp which your
|
||||
// updates will affect. If no timestamp, the server applies current time
|
||||
// to the edits.
|
||||
BatchUpdate batchUpdate = new BatchUpdate("myRow");
|
||||
// To add to a row, use Put. A Put constructor takes the name of the row
|
||||
// you want to insert into as a byte array. In HBase, the Bytes class has
|
||||
// utility for converting all kinds of java types to byte arrays. In the
|
||||
// below, we are converting the String "myLittleRow" into a byte array to
|
||||
// use as a row key for our update. Once you have a Put instance, you can
|
||||
// adorn it by setting the names of columns you want to update on the row,
|
||||
// the timestamp to use in your update, etc.If no timestamp, the server
|
||||
// applies current time to the edits.
|
||||
Put p = new Put(Bytes.toBytes("myLittleRow"));
|
||||
|
||||
// The BatchUpdate#put method takes a byte [] (or String) that designates
|
||||
// what cell you want to put a value into, and a byte array that is the
|
||||
// value you want to store. Note that if you want to store Strings, you
|
||||
// have to getBytes() from the String for HBase to store it since HBase is
|
||||
// all about byte arrays. The same goes for primitives like ints and longs
|
||||
// and user-defined classes - you must find a way to reduce it to bytes.
|
||||
// The Bytes class from the hbase util package has utility for going from
|
||||
// String to utf-8 bytes and back again and help for other base types.
|
||||
batchUpdate.put("myColumnFamily:columnQualifier1",
|
||||
Bytes.toBytes("columnQualifier1 value!"));
|
||||
// To set the value you'd like to update in the row 'myRow', specify the
|
||||
// column family, column qualifier, and value of the table cell you'd like
|
||||
// to update. The column family must already exist in your table schema.
|
||||
// The qualifier can be anything. All must be specified as byte arrays as
|
||||
// hbase is all about byte arrays. Lets pretend the table
|
||||
// 'myLittleHBaseTable' was created with a family 'myLittleFamily'.
|
||||
p.add(Bytes.toBytes("myLittleFamily"), Bytes.toBytes("someQualifier"),
|
||||
Bytes.toBytes("Some Value"));
|
||||
|
||||
// Deletes are batch operations in HBase as well.
|
||||
batchUpdate.delete("myColumnFamily:cellIWantDeleted");
|
||||
|
||||
// Once you've done all the puts you want, you need to commit the results.
|
||||
// The HTable#commit method takes the BatchUpdate instance you've been
|
||||
// building and pushes the batch of changes you made into HBase.
|
||||
table.commit(batchUpdate);
|
||||
// Once you've adorned your Put instance with all the updates you want to
|
||||
// make, to commit it do the following (The HTable#put method takes the
|
||||
// Put instance you've been building and pushes the changes you made into
|
||||
// hbase)
|
||||
table.put(p);
|
||||
|
||||
// Now, to retrieve the data we just wrote. The values that come back are
|
||||
// Cell instances. A Cell is a combination of the value as a byte array and
|
||||
// the timestamp the value was stored with. If you happen to know that the
|
||||
// value contained is a string and want an actual string, then you must
|
||||
// convert it yourself.
|
||||
Cell cell = table.get("myRow", "myColumnFamily:columnQualifier1");
|
||||
// This could throw a NullPointerException if there was no value at the cell
|
||||
// location.
|
||||
String valueStr = Bytes.toString(cell.getValue());
|
||||
|
||||
// Result instances. Generally, a Result is an object that will package up
|
||||
// the hbase return into the form you find most palatable.
|
||||
Get g = new Get(Bytes.toBytes("myLittleRow"));
|
||||
Result r = table.get(g);
|
||||
byte [] value = r.getValue(Bytes.toBytes("myLittleFamily"),
|
||||
Bytes.toBytes("someQualifier"));
|
||||
// If we convert the value bytes, we should get back 'Some Value', the
|
||||
// value we inserted at this location.
|
||||
String valueStr = Bytes.toString(value);
|
||||
System.out.println("GET: " + valueStr);
|
||||
|
||||
// Sometimes, you won't know the row you're looking for. In this case, you
|
||||
// use a Scanner. This will give you cursor-like interface to the contents
|
||||
// of the table.
|
||||
Scanner scanner =
|
||||
// we want to get back only "myColumnFamily:columnQualifier1" when we iterate
|
||||
table.getScanner(new String[]{"myColumnFamily:columnQualifier1"});
|
||||
|
||||
|
||||
// Scanners return RowResult instances. A RowResult is like the
|
||||
// row key and the columns all wrapped up in a single Object.
|
||||
// RowResult#getRow gives you the row key. RowResult also implements
|
||||
// Map, so you can get to your column results easily.
|
||||
|
||||
// Now, for the actual iteration. One way is to use a while loop like so:
|
||||
RowResult rowResult = scanner.next();
|
||||
|
||||
while (rowResult != null) {
|
||||
// print out the row we found and the columns we were looking for
|
||||
System.out.println("Found row: " + Bytes.toString(rowResult.getRow()) +
|
||||
" with value: " + rowResult.get(Bytes.toBytes("myColumnFamily:columnQualifier1")));
|
||||
rowResult = scanner.next();
|
||||
// of the table. To set up a Scanner, do like you did above making a Put
|
||||
// and a Get, create a Scan. Adorn it with column names, etc.
|
||||
Scan s = new Scan();
|
||||
s.addColumn(Bytes.toBytes("myLittleFamily"), Bytes.toBytes("someQualifier"));
|
||||
ResultScanner scanner = table.getScanner(s);
|
||||
try {
|
||||
// Scanners return Result instances.
|
||||
// Now, for the actual iteration. One way is to use a while loop like so:
|
||||
for (Result rr = scanner.next(); rr != null; rr = scanner.next()) {
|
||||
// print out the row we found and the columns we were looking for
|
||||
System.out.println("Found row: " + rr);
|
||||
}
|
||||
|
||||
// The other approach is to use a foreach loop. Scanners are iterable!
|
||||
// for (Result rr : scanner) {
|
||||
// System.out.println("Found row: " + rr);
|
||||
// }
|
||||
} finally {
|
||||
// Make sure you close your scanners when you are done!
|
||||
// Thats why we have it inside a try/finally clause
|
||||
scanner.close();
|
||||
}
|
||||
|
||||
// The other approach is to use a foreach loop. Scanners are iterable!
|
||||
for (RowResult result : scanner) {
|
||||
// print out the row we found and the columns we were looking for
|
||||
System.out.println("Found row: " + Bytes.toString(rowResult.getRow()) +
|
||||
" with value: " + rowResult.get(Bytes.toBytes("myColumnFamily:columnQualifier1")));
|
||||
}
|
||||
|
||||
// Make sure you close your scanners when you are done!
|
||||
// Its probably best to put the iteration into a try/finally with the below
|
||||
// inside the finally clause.
|
||||
scanner.close();
|
||||
}
|
||||
}
|
||||
</pre></blockquote>
|
||||
|
|
|
@ -142,7 +142,7 @@ implements InputFormat<ImmutableBytesWritable, Result> {
|
|||
}
|
||||
|
||||
/**
|
||||
* @param inputColumns the columns to be placed in {@link RowResult}.
|
||||
* @param inputColumns the columns to be placed in {@link Result}.
|
||||
*/
|
||||
public void setInputColumns(final byte [][] inputColumns) {
|
||||
this.trrInputColumns = inputColumns;
|
||||
|
@ -304,7 +304,7 @@ implements InputFormat<ImmutableBytesWritable, Result> {
|
|||
}
|
||||
|
||||
/**
|
||||
* @param inputColumns to be passed in {@link RowResult} to the map task.
|
||||
* @param inputColumns to be passed in {@link Result} to the map task.
|
||||
*/
|
||||
protected void setInputColumns(byte [][] inputColumns) {
|
||||
this.inputColumns = inputColumns;
|
||||
|
|
|
@ -47,7 +47,6 @@ public class GetDeleteTracker implements DeleteTracker {
|
|||
|
||||
/**
|
||||
* Constructor
|
||||
* @param comparator
|
||||
*/
|
||||
public GetDeleteTracker() {}
|
||||
|
||||
|
|
|
@ -82,7 +82,7 @@ public class MinorCompactingStoreScanner implements KeyValueScanner, InternalSca
|
|||
/**
|
||||
* High performance merge scan.
|
||||
* @param writer
|
||||
* @return
|
||||
* @return True if more.
|
||||
* @throws IOException
|
||||
*/
|
||||
public boolean next(HFile.Writer writer) throws IOException {
|
||||
|
|
|
@ -35,6 +35,14 @@
|
|||
ssh must be installed and sshd must be running to use Hadoop's
|
||||
scripts to manage remote Hadoop daemons.
|
||||
</li>
|
||||
<li>HBase depends on <a href="http://hadoop.apache.org/zookeeper/">ZooKeeper</a> as of release 0.20.0.
|
||||
Clients and Servers now must know where their ZooKeeper Quorum locations before
|
||||
they can do anything else.
|
||||
In basic standalone and pseudo-distributed modes, HBase manages a ZooKeeper instance
|
||||
for you but it is required that you run a ZooKeeper Quorum when running HBase
|
||||
fully distributed (More on this below). The Zookeeper addition changes
|
||||
how some core HBase configuration is done.
|
||||
</li>
|
||||
<li>HBase currently is a file handle hog. The usual default of
|
||||
1024 on *nix systems is insufficient if you are loading any significant
|
||||
amount of data into regionservers. See the
|
||||
|
@ -46,11 +54,6 @@
|
|||
<li>The clocks on cluster members should be in basic alignments. Some skew is tolerable but
|
||||
wild skew can generate odd behaviors. Run <a href="http://en.wikipedia.org/wiki/Network_Time_Protocol">NTP</a>
|
||||
on your cluster, or an equivalent.</li>
|
||||
<li>HBase depends on <a href="http://hadoop.apache.org/zookeeper/">ZooKeeper</a> as of release 0.20.0.
|
||||
In basic standalone and pseudo-distributed modes, HBase manages a ZooKeeper instance
|
||||
for you but it is required that you run a ZooKeeper Quorum when running HBase
|
||||
fully distributed (More on this below).
|
||||
</li>
|
||||
<li>This is a list of patches we recommend you apply to your running Hadoop cluster:
|
||||
<ul>
|
||||
<li><a hef="https://issues.apache.org/jira/browse/HADOOP-4681">HADOOP-4681 <i>"DFSClient block read failures cause open DFSInputStream to become unusable"</i></a>. This patch will help with the ever-popular, "No live nodes contain current block".
|
||||
|
@ -86,6 +89,9 @@ What follows presumes you have obtained a copy of HBase,
|
|||
see <a href="http://hadoop.apache.org/hbase/releases.html">Releases</a>, and are installing
|
||||
for the first time. If upgrading your
|
||||
HBase instance, see <a href="#upgrading">Upgrading</a>.
|
||||
If you have used HBase in the past,
|
||||
please read carefully. Some core configuration has changed in 0.20.x HBase.
|
||||
</p>
|
||||
<p>Three modes are described: standalone, pseudo-distributed (where all servers are run on
|
||||
a single host), and distributed. If new to hbase start by following the standalone instruction.
|
||||
</p>
|
||||
|
|
|
@ -38,9 +38,6 @@ import org.apache.hadoop.hbase.client.Get;
|
|||
import org.apache.hadoop.hbase.client.Put;
|
||||
import org.apache.hadoop.hbase.client.Result;
|
||||
import org.apache.hadoop.hbase.client.Scan;
|
||||
import org.apache.hadoop.hbase.filter.StopRowFilter;
|
||||
import org.apache.hadoop.hbase.filter.WhileMatchRowFilter;
|
||||
import org.apache.hadoop.hbase.io.BatchUpdate;
|
||||
import org.apache.hadoop.hbase.io.hfile.Compression;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.hbase.util.Writables;
|
||||
|
|
Loading…
Reference in New Issue