HBASE-40 [hbase] Add a method of getting multiple (but not all) cells for a row at once

-new override of getRow added to the whole stack
-test for new override added to TestGet2

git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@636415 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Bryan Duxbury 2008-03-12 16:58:10 +00:00
parent 082e0237fd
commit 6fb7767fc6
9 changed files with 206 additions and 53 deletions

View File

@ -5,9 +5,11 @@ Hbase Change Log
0.17.0-dev.2008-02-07_12-01-58. (Tom White via Stack)
NEW FEATURES
HBASE-40 Add a method of getting multiple (but not all) cells for a row at once
OPTIMIZATIONS
HBASE-430 Performance: Scanners and getRow return maps with duplicate data
BUG FIXES
HBASE-19 CountingBloomFilter can overflow its storage
(Stu Hood and Bryan Duxbury via Stack)

View File

@ -361,6 +361,50 @@ public class HTable implements HConstants {
return results;
}
/**
* Get selected columns for the specified row at the latest timestamp
*
* @param row row key
* @param columns Array of column names you want to retrieve.
* @return Map of columns to values. Map is empty if row does not exist.
* @throws IOException
*/
public SortedMap<Text, Cell> getRow(final Text row, final Text[] columns)
throws IOException {
return getRow(row, columns, HConstants.LATEST_TIMESTAMP);
}
/**
* Get selected columns for the specified row at a specified timestamp
*
* @param row row key
* @param columns Array of column names you want to retrieve.
* @param ts timestamp
* @return Map of columns to values. Map is empty if row does not exist.
* @throws IOException
*/
public SortedMap<Text, Cell> getRow(final Text row, final Text[] columns,
final long ts)
throws IOException {
HbaseMapWritable value = null;
value = getRegionServerWithRetries(new ServerCallable<HbaseMapWritable>(row) {
public HbaseMapWritable call() throws IOException {
return server.getRow(location.getRegionInfo().getRegionName(), row,
columns, ts);
}
});
SortedMap<Text, Cell> results = new TreeMap<Text, Cell>();
if (value != null && value.size() != 0) {
for (Map.Entry<Writable, Writable> e: value.entrySet()) {
HStoreKey key = (HStoreKey) e.getKey();
results.put(key.getColumn(), (Cell)e.getValue());
}
}
return results;
}
/**
* Get a scanner on the current table starting at the specified row.
* Return the specified columns.

View File

@ -92,17 +92,6 @@ public interface HRegionInterface extends VersionedProtocol {
final Text column, final long timestamp, final int numVersions)
throws IOException;
/**
* Get all the data for the specified row
*
* @param regionName region name
* @param row row key
* @return map of values
* @throws IOException
*/
public HbaseMapWritable getRow(final Text regionName, final Text row)
throws IOException;
/**
* Get all the data for the specified row at a given timestamp
*
@ -140,6 +129,30 @@ public interface HRegionInterface extends VersionedProtocol {
final Text row, final long ts)
throws IOException;
/**
* Get selected columns for the specified row at a given timestamp.
*
* @param regionName region name
* @param row row key
* @return map of values
* @throws IOException
*/
public HbaseMapWritable getRow(final Text regionName, final Text row,
final Text[] columns, final long ts)
throws IOException;
/**
* Get selected columns for the specified row at the latest timestamp.
*
* @param regionName region name
* @param row row key
* @return map of values
* @throws IOException
*/
public HbaseMapWritable getRow(final Text regionName, final Text row,
final Text[] columns)
throws IOException;
/**
* Applies a batch of updates via one RPC
*

View File

@ -24,6 +24,7 @@ import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Random;
import java.util.SortedMap;
import java.util.TreeMap;
@ -1067,24 +1068,6 @@ public class HRegion implements HConstants {
return targetStore.get(key, numVersions);
}
/**
* Fetch all the columns for the indicated row.
* Returns a TreeMap that maps column names to values.
*
* We should eventually use Bloom filters here, to reduce running time. If
* the database has many column families and is very sparse, then we could be
* checking many files needlessly. A small Bloom for each row would help us
* determine which column groups are useful for that row. That would let us
* avoid a bunch of disk activity.
*
* @param row
* @return Map<columnName, byte[]> values
* @throws IOException
*/
public Map<Text, Cell> getFull(Text row) throws IOException {
return getFull(row, HConstants.LATEST_TIMESTAMP);
}
/**
* Fetch all the columns for the indicated row at a specified timestamp.
* Returns a TreeMap that maps column names to values.
@ -1096,18 +1079,21 @@ public class HRegion implements HConstants {
* avoid a bunch of disk activity.
*
* @param row
* @param columns Array of columns you'd like to retrieve. When null, get all.
* @param ts
* @return Map<columnName, byte[]> values
* @return Map<columnName, Cell> values
* @throws IOException
*/
public Map<Text, Cell> getFull(Text row, long ts) throws IOException {
public Map<Text, Cell> getFull(final Text row, final Set<Text> columns,
final long ts)
throws IOException {
HStoreKey key = new HStoreKey(row, ts);
obtainRowLock(row);
try {
TreeMap<Text, Cell> result = new TreeMap<Text, Cell>();
for (Text colFamily: stores.keySet()) {
HStore targetStore = stores.get(colFamily);
targetStore.getFull(key, result);
targetStore.getFull(key, columns, result);
}
return result;
} finally {
@ -1162,7 +1148,7 @@ public class HRegion implements HConstants {
TreeMap<Text, Cell> result = new TreeMap<Text, Cell>();
for (Text colFamily: stores.keySet()) {
HStore targetStore = stores.get(colFamily);
targetStore.getFull(key, result);
targetStore.getFull(key, null, result);
}
return result;

View File

@ -24,6 +24,7 @@ import java.lang.Thread.UncaughtExceptionHandler;
import java.lang.reflect.Constructor;
import java.net.InetSocketAddress;
import java.net.UnknownHostException;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
@ -948,20 +949,34 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
}
/** {@inheritDoc} */
public HbaseMapWritable getRow(final Text regionName, final Text row)
public HbaseMapWritable getRow(final Text regionName, final Text row, final long ts)
throws IOException {
return getRow(regionName, row, HConstants.LATEST_TIMESTAMP);
return getRow(regionName, row, null, ts);
}
/** {@inheritDoc} */
public HbaseMapWritable getRow(final Text regionName, final Text row, final long ts)
public HbaseMapWritable getRow(final Text regionName, final Text row,
final Text[] columns)
throws IOException {
return getRow(regionName, row, columns, HConstants.LATEST_TIMESTAMP);
}
/** {@inheritDoc} */
public HbaseMapWritable getRow(final Text regionName, final Text row,
final Text[] columns, final long ts)
throws IOException {
checkOpen();
requestCount.incrementAndGet();
try {
// convert the columns array into a set so it's easy to check later.
Set<Text> columnSet = new HashSet<Text>();
if (columns != null) {
columnSet.addAll(Arrays.asList(columns));
}
HRegion region = getRegion(regionName);
Map<Text, Cell> map = region.getFull(row, columnSet, ts);
HbaseMapWritable result = new HbaseMapWritable();
Map<Text, Cell> map = region.getFull(row, ts);
for (Map.Entry<Text, Cell> es: map.entrySet()) {
result.put(new HStoreKey(row, es.getKey()), es.getValue());
}

View File

@ -27,6 +27,7 @@ import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.Map.Entry;
@ -184,14 +185,14 @@ public class HStore implements HConstants {
* @param key
* @param results
*/
void getFull(HStoreKey key, SortedMap<Text, Cell> results) {
void getFull(HStoreKey key, Set<Text> columns, SortedMap<Text, Cell> results) {
this.lock.readLock().lock();
try {
synchronized (memcache) {
internalGetFull(memcache, key, results);
internalGetFull(memcache, key, columns, results);
}
synchronized (snapshot) {
internalGetFull(snapshot, key, results);
internalGetFull(snapshot, key, columns, results);
}
} finally {
@ -200,7 +201,7 @@ public class HStore implements HConstants {
}
private void internalGetFull(SortedMap<HStoreKey, byte[]> map, HStoreKey key,
SortedMap<Text, Cell> results) {
Set<Text> columns, SortedMap<Text, Cell> results) {
if (map.isEmpty() || key == null) {
return;
@ -214,7 +215,9 @@ public class HStore implements HConstants {
byte [] val = tailMap.get(itKey);
if (!HLogEdit.isDeleted(val)) {
results.put(itCol, new Cell(val, itKey.getTimestamp()));
if (columns == null || columns.contains(itKey.getColumn())) {
results.put(itCol, new Cell(val, itKey.getTimestamp()));
}
}
} else if (key.getRow().compareTo(itKey.getRow()) < 0) {
@ -1601,24 +1604,36 @@ public class HStore implements HConstants {
* Return all the available columns for the given key. The key indicates a
* row and timestamp, but not a column name.
*
* The returned object should map column names to byte arrays (byte[]).
* The returned object should map column names to Cells.
*/
void getFull(HStoreKey key, TreeMap<Text, Cell> results)
throws IOException {
void getFull(HStoreKey key, final Set<Text> columns, TreeMap<Text, Cell> results)
throws IOException {
Map<Text, List<Long>> deletes = new HashMap<Text, List<Long>>();
// if the key is null, we're not even looking for anything. return.
if (key == null) {
return;
}
this.lock.readLock().lock();
memcache.getFull(key, results);
// get from the memcache first.
memcache.getFull(key, columns, results);
try {
MapFile.Reader[] maparray = getReaders();
// examine each mapfile
for (int i = maparray.length - 1; i >= 0; i--) {
MapFile.Reader map = maparray[i];
// synchronize on the map so that no one else iterates it at the same
// time
synchronized(map) {
// seek back to the beginning
map.reset();
// seek to the closest key that should match the row we're looking for
ImmutableBytesWritable readval = new ImmutableBytesWritable();
HStoreKey readkey = (HStoreKey)map.getClosest(key, readval);
if (readkey == null) {
@ -1631,7 +1646,9 @@ public class HStore implements HConstants {
if(isDeleted(readkey, readval.get(), true, deletes)) {
break;
}
results.put(new Text(readcol), new Cell(readval.get(), readkey.getTimestamp()));
if (columns == null || columns.contains(readkey.getColumn())) {
results.put(new Text(readcol), new Cell(readval.get(), readkey.getTimestamp()));
}
readval = new ImmutableBytesWritable();
} else if(key.getRow().compareTo(readkey.getRow()) < 0) {
break;

View File

@ -496,7 +496,7 @@ public abstract class HBaseTestCase extends TestCase {
* @throws IOException
*/
public Map<Text, Cell> getFull(Text row) throws IOException {
return region.getFull(row);
return region.getFull(row, null, HConstants.LATEST_TIMESTAMP);
}
/** {@inheritDoc} */
public void flushcache() throws IOException {
@ -567,7 +567,7 @@ public abstract class HBaseTestCase extends TestCase {
protected void assertCellEquals(final HRegion region, final Text row,
final Text column, final long timestamp, final String value)
throws IOException {
Map<Text, Cell> result = region.getFull(row, timestamp);
Map<Text, Cell> result = region.getFull(row, null, timestamp);
Cell cell_value = result.get(column);
if(value == null){
assertEquals(column.toString() + " at timestamp " + timestamp, null, cell_value);

View File

@ -21,6 +21,7 @@ package org.apache.hadoop.hbase.regionserver;
import java.io.IOException;
import java.util.Map;
import java.util.HashSet;
import java.util.TreeMap;
import org.apache.hadoop.dfs.MiniDFSCluster;
@ -33,6 +34,7 @@ import org.apache.hadoop.hbase.HScannerInterface;
import org.apache.hadoop.hbase.HStoreKey;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.io.Cell;
import org.apache.hadoop.hbase.io.BatchUpdate;
/**
* {@link TestGet} is a medley of tests of get all done up as a single test.
@ -224,10 +226,84 @@ public class TestGet2 extends HBaseTestCase {
}
}
}
/**
* For HBASE-40
*/
public void testGetFullWithSpecifiedColumns() throws IOException {
HRegion region = null;
HRegionIncommon region_incommon = null;
try {
HTableDescriptor htd = createTableDescriptor(getName());
region = createNewHRegion(htd, null, null);
region_incommon = new HRegionIncommon(region);
// write a row with a bunch of columns
Text row = new Text("some_row");
BatchUpdate bu = new BatchUpdate(row);
bu.put(COLUMNS[0], "column 0".getBytes());
bu.put(COLUMNS[1], "column 1".getBytes());
bu.put(COLUMNS[2], "column 2".getBytes());
region.batchUpdate(bu);
assertSpecifiedColumns(region, row);
// try it again with a cache flush to involve the store, not just the
// memcache.
region_incommon.flushcache();
assertSpecifiedColumns(region, row);
} finally {
if (region != null) {
try {
region.close();
} catch (Exception e) {
e.printStackTrace();
}
region.getLog().closeAndDelete();
}
}
}
private void assertSpecifiedColumns(final HRegion region, final Text row)
throws IOException {
HashSet<Text> all = new HashSet<Text>();
HashSet<Text> one = new HashSet<Text>();
HashSet<Text> none = new HashSet<Text>();
all.add(COLUMNS[0]);
all.add(COLUMNS[1]);
all.add(COLUMNS[2]);
one.add(COLUMNS[0]);
// make sure we get all of them with standard getFull
Map<Text, Cell> result = region.getFull(row, null,
HConstants.LATEST_TIMESTAMP);
assertEquals(new String(result.get(COLUMNS[0]).getValue()), "column 0");
assertEquals(new String(result.get(COLUMNS[1]).getValue()), "column 1");
assertEquals(new String(result.get(COLUMNS[2]).getValue()), "column 2");
// try to get just one
result = region.getFull(row, one, HConstants.LATEST_TIMESTAMP);
assertEquals(new String(result.get(COLUMNS[0]).getValue()), "column 0");
assertNull(result.get(COLUMNS[1]));
assertNull(result.get(COLUMNS[2]));
// try to get all of them (specified)
result = region.getFull(row, all, HConstants.LATEST_TIMESTAMP);
assertEquals(new String(result.get(COLUMNS[0]).getValue()), "column 0");
assertEquals(new String(result.get(COLUMNS[1]).getValue()), "column 1");
assertEquals(new String(result.get(COLUMNS[2]).getValue()), "column 2");
// try to get none with empty column set
result = region.getFull(row, none, HConstants.LATEST_TIMESTAMP);
assertNull(result.get(COLUMNS[0]));
assertNull(result.get(COLUMNS[1]));
assertNull(result.get(COLUMNS[2]));
}
private void assertColumnsPresent(final HRegion r, final Text row)
throws IOException {
Map<Text, Cell> result = r.getFull(row);
Map<Text, Cell> result = r.getFull(row, null, HConstants.LATEST_TIMESTAMP);
int columnCount = 0;
for (Map.Entry<Text, Cell> e: result.entrySet()) {
columnCount++;

View File

@ -136,7 +136,7 @@ public class TestHMemcache extends TestCase {
for (int i = 0; i < ROW_COUNT; i++) {
HStoreKey hsk = new HStoreKey(getRowName(i));
TreeMap<Text, Cell> all = new TreeMap<Text, Cell>();
this.hmemcache.getFull(hsk, all);
this.hmemcache.getFull(hsk, null, all);
isExpectedRow(i, all);
}
}