HBASE-40 [hbase] Add a method of getting multiple (but not all) cells for a row at once

-new override of getRow added to the whole stack
-test for new override added to TestGet2

git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@636415 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Bryan Duxbury 2008-03-12 16:58:10 +00:00
parent 082e0237fd
commit 6fb7767fc6
9 changed files with 206 additions and 53 deletions

View File

@ -5,9 +5,11 @@ Hbase Change Log
0.17.0-dev.2008-02-07_12-01-58. (Tom White via Stack) 0.17.0-dev.2008-02-07_12-01-58. (Tom White via Stack)
NEW FEATURES NEW FEATURES
HBASE-40 Add a method of getting multiple (but not all) cells for a row at once
OPTIMIZATIONS OPTIMIZATIONS
HBASE-430 Performance: Scanners and getRow return maps with duplicate data
BUG FIXES BUG FIXES
HBASE-19 CountingBloomFilter can overflow its storage HBASE-19 CountingBloomFilter can overflow its storage
(Stu Hood and Bryan Duxbury via Stack) (Stu Hood and Bryan Duxbury via Stack)

View File

@ -361,6 +361,50 @@ public class HTable implements HConstants {
return results; return results;
} }
/**
* Get selected columns for the specified row at the latest timestamp
*
* @param row row key
* @param columns Array of column names you want to retrieve.
* @return Map of columns to values. Map is empty if row does not exist.
* @throws IOException
*/
public SortedMap<Text, Cell> getRow(final Text row, final Text[] columns)
throws IOException {
return getRow(row, columns, HConstants.LATEST_TIMESTAMP);
}
/**
* Get selected columns for the specified row at a specified timestamp
*
* @param row row key
* @param columns Array of column names you want to retrieve.
* @param ts timestamp
* @return Map of columns to values. Map is empty if row does not exist.
* @throws IOException
*/
public SortedMap<Text, Cell> getRow(final Text row, final Text[] columns,
final long ts)
throws IOException {
HbaseMapWritable value = null;
value = getRegionServerWithRetries(new ServerCallable<HbaseMapWritable>(row) {
public HbaseMapWritable call() throws IOException {
return server.getRow(location.getRegionInfo().getRegionName(), row,
columns, ts);
}
});
SortedMap<Text, Cell> results = new TreeMap<Text, Cell>();
if (value != null && value.size() != 0) {
for (Map.Entry<Writable, Writable> e: value.entrySet()) {
HStoreKey key = (HStoreKey) e.getKey();
results.put(key.getColumn(), (Cell)e.getValue());
}
}
return results;
}
/** /**
* Get a scanner on the current table starting at the specified row. * Get a scanner on the current table starting at the specified row.
* Return the specified columns. * Return the specified columns.

View File

@ -92,17 +92,6 @@ public interface HRegionInterface extends VersionedProtocol {
final Text column, final long timestamp, final int numVersions) final Text column, final long timestamp, final int numVersions)
throws IOException; throws IOException;
/**
* Get all the data for the specified row
*
* @param regionName region name
* @param row row key
* @return map of values
* @throws IOException
*/
public HbaseMapWritable getRow(final Text regionName, final Text row)
throws IOException;
/** /**
* Get all the data for the specified row at a given timestamp * Get all the data for the specified row at a given timestamp
* *
@ -140,6 +129,30 @@ public interface HRegionInterface extends VersionedProtocol {
final Text row, final long ts) final Text row, final long ts)
throws IOException; throws IOException;
/**
* Get selected columns for the specified row at a given timestamp.
*
* @param regionName region name
* @param row row key
* @return map of values
* @throws IOException
*/
public HbaseMapWritable getRow(final Text regionName, final Text row,
final Text[] columns, final long ts)
throws IOException;
/**
* Get selected columns for the specified row at the latest timestamp.
*
* @param regionName region name
* @param row row key
* @return map of values
* @throws IOException
*/
public HbaseMapWritable getRow(final Text regionName, final Text row,
final Text[] columns)
throws IOException;
/** /**
* Applies a batch of updates via one RPC * Applies a batch of updates via one RPC
* *

View File

@ -24,6 +24,7 @@ import java.util.ArrayList;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set;
import java.util.Random; import java.util.Random;
import java.util.SortedMap; import java.util.SortedMap;
import java.util.TreeMap; import java.util.TreeMap;
@ -1067,24 +1068,6 @@ public class HRegion implements HConstants {
return targetStore.get(key, numVersions); return targetStore.get(key, numVersions);
} }
/**
* Fetch all the columns for the indicated row.
* Returns a TreeMap that maps column names to values.
*
* We should eventually use Bloom filters here, to reduce running time. If
* the database has many column families and is very sparse, then we could be
* checking many files needlessly. A small Bloom for each row would help us
* determine which column groups are useful for that row. That would let us
* avoid a bunch of disk activity.
*
* @param row
* @return Map<columnName, byte[]> values
* @throws IOException
*/
public Map<Text, Cell> getFull(Text row) throws IOException {
return getFull(row, HConstants.LATEST_TIMESTAMP);
}
/** /**
* Fetch all the columns for the indicated row at a specified timestamp. * Fetch all the columns for the indicated row at a specified timestamp.
* Returns a TreeMap that maps column names to values. * Returns a TreeMap that maps column names to values.
@ -1096,18 +1079,21 @@ public class HRegion implements HConstants {
* avoid a bunch of disk activity. * avoid a bunch of disk activity.
* *
* @param row * @param row
* @param columns Array of columns you'd like to retrieve. When null, get all.
* @param ts * @param ts
* @return Map<columnName, byte[]> values * @return Map<columnName, Cell> values
* @throws IOException * @throws IOException
*/ */
public Map<Text, Cell> getFull(Text row, long ts) throws IOException { public Map<Text, Cell> getFull(final Text row, final Set<Text> columns,
final long ts)
throws IOException {
HStoreKey key = new HStoreKey(row, ts); HStoreKey key = new HStoreKey(row, ts);
obtainRowLock(row); obtainRowLock(row);
try { try {
TreeMap<Text, Cell> result = new TreeMap<Text, Cell>(); TreeMap<Text, Cell> result = new TreeMap<Text, Cell>();
for (Text colFamily: stores.keySet()) { for (Text colFamily: stores.keySet()) {
HStore targetStore = stores.get(colFamily); HStore targetStore = stores.get(colFamily);
targetStore.getFull(key, result); targetStore.getFull(key, columns, result);
} }
return result; return result;
} finally { } finally {
@ -1162,7 +1148,7 @@ public class HRegion implements HConstants {
TreeMap<Text, Cell> result = new TreeMap<Text, Cell>(); TreeMap<Text, Cell> result = new TreeMap<Text, Cell>();
for (Text colFamily: stores.keySet()) { for (Text colFamily: stores.keySet()) {
HStore targetStore = stores.get(colFamily); HStore targetStore = stores.get(colFamily);
targetStore.getFull(key, result); targetStore.getFull(key, null, result);
} }
return result; return result;

View File

@ -24,6 +24,7 @@ import java.lang.Thread.UncaughtExceptionHandler;
import java.lang.reflect.Constructor; import java.lang.reflect.Constructor;
import java.net.InetSocketAddress; import java.net.InetSocketAddress;
import java.net.UnknownHostException; import java.net.UnknownHostException;
import java.util.Arrays;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
@ -948,20 +949,34 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
} }
/** {@inheritDoc} */ /** {@inheritDoc} */
public HbaseMapWritable getRow(final Text regionName, final Text row) public HbaseMapWritable getRow(final Text regionName, final Text row, final long ts)
throws IOException { throws IOException {
return getRow(regionName, row, HConstants.LATEST_TIMESTAMP); return getRow(regionName, row, null, ts);
} }
/** {@inheritDoc} */ /** {@inheritDoc} */
public HbaseMapWritable getRow(final Text regionName, final Text row, final long ts) public HbaseMapWritable getRow(final Text regionName, final Text row,
final Text[] columns)
throws IOException {
return getRow(regionName, row, columns, HConstants.LATEST_TIMESTAMP);
}
/** {@inheritDoc} */
public HbaseMapWritable getRow(final Text regionName, final Text row,
final Text[] columns, final long ts)
throws IOException { throws IOException {
checkOpen(); checkOpen();
requestCount.incrementAndGet(); requestCount.incrementAndGet();
try { try {
// convert the columns array into a set so it's easy to check later.
Set<Text> columnSet = new HashSet<Text>();
if (columns != null) {
columnSet.addAll(Arrays.asList(columns));
}
HRegion region = getRegion(regionName); HRegion region = getRegion(regionName);
Map<Text, Cell> map = region.getFull(row, columnSet, ts);
HbaseMapWritable result = new HbaseMapWritable(); HbaseMapWritable result = new HbaseMapWritable();
Map<Text, Cell> map = region.getFull(row, ts);
for (Map.Entry<Text, Cell> es: map.entrySet()) { for (Map.Entry<Text, Cell> es: map.entrySet()) {
result.put(new HStoreKey(row, es.getKey()), es.getValue()); result.put(new HStoreKey(row, es.getKey()), es.getValue());
} }

View File

@ -27,6 +27,7 @@ import java.util.HashMap;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set;
import java.util.SortedMap; import java.util.SortedMap;
import java.util.TreeMap; import java.util.TreeMap;
import java.util.Map.Entry; import java.util.Map.Entry;
@ -184,14 +185,14 @@ public class HStore implements HConstants {
* @param key * @param key
* @param results * @param results
*/ */
void getFull(HStoreKey key, SortedMap<Text, Cell> results) { void getFull(HStoreKey key, Set<Text> columns, SortedMap<Text, Cell> results) {
this.lock.readLock().lock(); this.lock.readLock().lock();
try { try {
synchronized (memcache) { synchronized (memcache) {
internalGetFull(memcache, key, results); internalGetFull(memcache, key, columns, results);
} }
synchronized (snapshot) { synchronized (snapshot) {
internalGetFull(snapshot, key, results); internalGetFull(snapshot, key, columns, results);
} }
} finally { } finally {
@ -200,7 +201,7 @@ public class HStore implements HConstants {
} }
private void internalGetFull(SortedMap<HStoreKey, byte[]> map, HStoreKey key, private void internalGetFull(SortedMap<HStoreKey, byte[]> map, HStoreKey key,
SortedMap<Text, Cell> results) { Set<Text> columns, SortedMap<Text, Cell> results) {
if (map.isEmpty() || key == null) { if (map.isEmpty() || key == null) {
return; return;
@ -214,7 +215,9 @@ public class HStore implements HConstants {
byte [] val = tailMap.get(itKey); byte [] val = tailMap.get(itKey);
if (!HLogEdit.isDeleted(val)) { if (!HLogEdit.isDeleted(val)) {
results.put(itCol, new Cell(val, itKey.getTimestamp())); if (columns == null || columns.contains(itKey.getColumn())) {
results.put(itCol, new Cell(val, itKey.getTimestamp()));
}
} }
} else if (key.getRow().compareTo(itKey.getRow()) < 0) { } else if (key.getRow().compareTo(itKey.getRow()) < 0) {
@ -1601,24 +1604,36 @@ public class HStore implements HConstants {
* Return all the available columns for the given key. The key indicates a * Return all the available columns for the given key. The key indicates a
* row and timestamp, but not a column name. * row and timestamp, but not a column name.
* *
* The returned object should map column names to byte arrays (byte[]). * The returned object should map column names to Cells.
*/ */
void getFull(HStoreKey key, TreeMap<Text, Cell> results) void getFull(HStoreKey key, final Set<Text> columns, TreeMap<Text, Cell> results)
throws IOException { throws IOException {
Map<Text, List<Long>> deletes = new HashMap<Text, List<Long>>(); Map<Text, List<Long>> deletes = new HashMap<Text, List<Long>>();
// if the key is null, we're not even looking for anything. return.
if (key == null) { if (key == null) {
return; return;
} }
this.lock.readLock().lock(); this.lock.readLock().lock();
memcache.getFull(key, results);
// get from the memcache first.
memcache.getFull(key, columns, results);
try { try {
MapFile.Reader[] maparray = getReaders(); MapFile.Reader[] maparray = getReaders();
// examine each mapfile
for (int i = maparray.length - 1; i >= 0; i--) { for (int i = maparray.length - 1; i >= 0; i--) {
MapFile.Reader map = maparray[i]; MapFile.Reader map = maparray[i];
// synchronize on the map so that no one else iterates it at the same
// time
synchronized(map) { synchronized(map) {
// seek back to the beginning
map.reset(); map.reset();
// seek to the closest key that should match the row we're looking for
ImmutableBytesWritable readval = new ImmutableBytesWritable(); ImmutableBytesWritable readval = new ImmutableBytesWritable();
HStoreKey readkey = (HStoreKey)map.getClosest(key, readval); HStoreKey readkey = (HStoreKey)map.getClosest(key, readval);
if (readkey == null) { if (readkey == null) {
@ -1631,7 +1646,9 @@ public class HStore implements HConstants {
if(isDeleted(readkey, readval.get(), true, deletes)) { if(isDeleted(readkey, readval.get(), true, deletes)) {
break; break;
} }
results.put(new Text(readcol), new Cell(readval.get(), readkey.getTimestamp())); if (columns == null || columns.contains(readkey.getColumn())) {
results.put(new Text(readcol), new Cell(readval.get(), readkey.getTimestamp()));
}
readval = new ImmutableBytesWritable(); readval = new ImmutableBytesWritable();
} else if(key.getRow().compareTo(readkey.getRow()) < 0) { } else if(key.getRow().compareTo(readkey.getRow()) < 0) {
break; break;

View File

@ -496,7 +496,7 @@ public abstract class HBaseTestCase extends TestCase {
* @throws IOException * @throws IOException
*/ */
public Map<Text, Cell> getFull(Text row) throws IOException { public Map<Text, Cell> getFull(Text row) throws IOException {
return region.getFull(row); return region.getFull(row, null, HConstants.LATEST_TIMESTAMP);
} }
/** {@inheritDoc} */ /** {@inheritDoc} */
public void flushcache() throws IOException { public void flushcache() throws IOException {
@ -567,7 +567,7 @@ public abstract class HBaseTestCase extends TestCase {
protected void assertCellEquals(final HRegion region, final Text row, protected void assertCellEquals(final HRegion region, final Text row,
final Text column, final long timestamp, final String value) final Text column, final long timestamp, final String value)
throws IOException { throws IOException {
Map<Text, Cell> result = region.getFull(row, timestamp); Map<Text, Cell> result = region.getFull(row, null, timestamp);
Cell cell_value = result.get(column); Cell cell_value = result.get(column);
if(value == null){ if(value == null){
assertEquals(column.toString() + " at timestamp " + timestamp, null, cell_value); assertEquals(column.toString() + " at timestamp " + timestamp, null, cell_value);

View File

@ -21,6 +21,7 @@ package org.apache.hadoop.hbase.regionserver;
import java.io.IOException; import java.io.IOException;
import java.util.Map; import java.util.Map;
import java.util.HashSet;
import java.util.TreeMap; import java.util.TreeMap;
import org.apache.hadoop.dfs.MiniDFSCluster; import org.apache.hadoop.dfs.MiniDFSCluster;
@ -33,6 +34,7 @@ import org.apache.hadoop.hbase.HScannerInterface;
import org.apache.hadoop.hbase.HStoreKey; import org.apache.hadoop.hbase.HStoreKey;
import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.io.Cell; import org.apache.hadoop.hbase.io.Cell;
import org.apache.hadoop.hbase.io.BatchUpdate;
/** /**
* {@link TestGet} is a medley of tests of get all done up as a single test. * {@link TestGet} is a medley of tests of get all done up as a single test.
@ -224,10 +226,84 @@ public class TestGet2 extends HBaseTestCase {
} }
} }
} }
/**
* For HBASE-40
*/
public void testGetFullWithSpecifiedColumns() throws IOException {
HRegion region = null;
HRegionIncommon region_incommon = null;
try {
HTableDescriptor htd = createTableDescriptor(getName());
region = createNewHRegion(htd, null, null);
region_incommon = new HRegionIncommon(region);
// write a row with a bunch of columns
Text row = new Text("some_row");
BatchUpdate bu = new BatchUpdate(row);
bu.put(COLUMNS[0], "column 0".getBytes());
bu.put(COLUMNS[1], "column 1".getBytes());
bu.put(COLUMNS[2], "column 2".getBytes());
region.batchUpdate(bu);
assertSpecifiedColumns(region, row);
// try it again with a cache flush to involve the store, not just the
// memcache.
region_incommon.flushcache();
assertSpecifiedColumns(region, row);
} finally {
if (region != null) {
try {
region.close();
} catch (Exception e) {
e.printStackTrace();
}
region.getLog().closeAndDelete();
}
}
}
private void assertSpecifiedColumns(final HRegion region, final Text row)
throws IOException {
HashSet<Text> all = new HashSet<Text>();
HashSet<Text> one = new HashSet<Text>();
HashSet<Text> none = new HashSet<Text>();
all.add(COLUMNS[0]);
all.add(COLUMNS[1]);
all.add(COLUMNS[2]);
one.add(COLUMNS[0]);
// make sure we get all of them with standard getFull
Map<Text, Cell> result = region.getFull(row, null,
HConstants.LATEST_TIMESTAMP);
assertEquals(new String(result.get(COLUMNS[0]).getValue()), "column 0");
assertEquals(new String(result.get(COLUMNS[1]).getValue()), "column 1");
assertEquals(new String(result.get(COLUMNS[2]).getValue()), "column 2");
// try to get just one
result = region.getFull(row, one, HConstants.LATEST_TIMESTAMP);
assertEquals(new String(result.get(COLUMNS[0]).getValue()), "column 0");
assertNull(result.get(COLUMNS[1]));
assertNull(result.get(COLUMNS[2]));
// try to get all of them (specified)
result = region.getFull(row, all, HConstants.LATEST_TIMESTAMP);
assertEquals(new String(result.get(COLUMNS[0]).getValue()), "column 0");
assertEquals(new String(result.get(COLUMNS[1]).getValue()), "column 1");
assertEquals(new String(result.get(COLUMNS[2]).getValue()), "column 2");
// try to get none with empty column set
result = region.getFull(row, none, HConstants.LATEST_TIMESTAMP);
assertNull(result.get(COLUMNS[0]));
assertNull(result.get(COLUMNS[1]));
assertNull(result.get(COLUMNS[2]));
}
private void assertColumnsPresent(final HRegion r, final Text row) private void assertColumnsPresent(final HRegion r, final Text row)
throws IOException { throws IOException {
Map<Text, Cell> result = r.getFull(row); Map<Text, Cell> result = r.getFull(row, null, HConstants.LATEST_TIMESTAMP);
int columnCount = 0; int columnCount = 0;
for (Map.Entry<Text, Cell> e: result.entrySet()) { for (Map.Entry<Text, Cell> e: result.entrySet()) {
columnCount++; columnCount++;

View File

@ -136,7 +136,7 @@ public class TestHMemcache extends TestCase {
for (int i = 0; i < ROW_COUNT; i++) { for (int i = 0; i < ROW_COUNT; i++) {
HStoreKey hsk = new HStoreKey(getRowName(i)); HStoreKey hsk = new HStoreKey(getRowName(i));
TreeMap<Text, Cell> all = new TreeMap<Text, Cell>(); TreeMap<Text, Cell> all = new TreeMap<Text, Cell>();
this.hmemcache.getFull(hsk, all); this.hmemcache.getFull(hsk, null, all);
isExpectedRow(i, all); isExpectedRow(i, all);
} }
} }