HBASE-1537 Intra-row scanning; apply limit over multiple families

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@951682 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Andrew Kyle Purtell 2010-06-05 08:54:32 +00:00
parent 650b6cea53
commit d380a628bf
3 changed files with 183 additions and 29 deletions

View File

@ -2065,7 +2065,7 @@ public class HRegion implements HConstants, HeapSize { // , Writable{
} else { } else {
byte [] nextRow; byte [] nextRow;
do { do {
this.storeHeap.next(results, limit); this.storeHeap.next(results, limit - results.size());
if (limit > 0 && results.size() == limit) { if (limit > 0 && results.size() == limit) {
if (this.filter != null && filter.hasFilterRow()) throw new IncompatibleFilterException( if (this.filter != null && filter.hasFilterRow()) throw new IncompatibleFilterException(
"Filter with filterRow(List<KeyValue>) incompatible with scan with limit!"); "Filter with filterRow(List<KeyValue>) incompatible with scan with limit!");

View File

@ -0,0 +1,152 @@
<?xml version="1.0" encoding="UTF-8"?>
<schema targetNamespace="ModelSchema" elementFormDefault="qualified" xmlns="http://www.w3.org/2001/XMLSchema" xmlns:tns="ModelSchema">
<element name="Version" type="tns:Version"></element>
<complexType name="Version">
<attribute name="REST" type="string"></attribute>
<attribute name="JVM" type="string"></attribute>
<attribute name="OS" type="string"></attribute>
<attribute name="Server" type="string"></attribute>
<attribute name="Jersey" type="string"></attribute>
</complexType>
<element name="TableList" type="tns:TableList"></element>
<complexType name="TableList">
<sequence>
<element name="table" type="tns:Table" maxOccurs="unbounded" minOccurs="1"></element>
</sequence>
</complexType>
<complexType name="Table">
<sequence>
<element name="name" type="string"></element>
</sequence>
</complexType>
<element name="TableInfo" type="tns:TableInfo"></element>
<complexType name="TableInfo">
<sequence>
<element name="region" type="tns:TableRegion" maxOccurs="unbounded" minOccurs="1"></element>
</sequence>
<attribute name="name" type="string"></attribute>
</complexType>
<complexType name="TableRegion">
<attribute name="name" type="string"></attribute>
<attribute name="id" type="int"></attribute>
<attribute name="startKey" type="base64Binary"></attribute>
<attribute name="endKey" type="base64Binary"></attribute>
<attribute name="location" type="string"></attribute>
</complexType>
<element name="TableSchema" type="tns:TableSchema"></element>
<complexType name="TableSchema">
<sequence>
<element name="column" type="tns:ColumnSchema" maxOccurs="unbounded" minOccurs="1"></element>
</sequence>
<attribute name="name" type="string"></attribute>
<anyAttribute></anyAttribute>
</complexType>
<complexType name="ColumnSchema">
<attribute name="name" type="string"></attribute>
<anyAttribute></anyAttribute>
</complexType>
<element name="CellSet" type="tns:CellSet"></element>
<complexType name="CellSet">
<sequence>
<element name="row" type="tns:Row" maxOccurs="unbounded" minOccurs="1"></element>
</sequence>
</complexType>
<element name="Row" type="tns:Row"></element>
<complexType name="Row">
<sequence>
<element name="key" type="base64Binary"></element>
<element name="cell" type="tns:Cell" maxOccurs="unbounded" minOccurs="1"></element>
</sequence>
</complexType>
<element name="Cell" type="tns:Cell"></element>
<complexType name="Cell">
<sequence>
<element name="value" maxOccurs="1" minOccurs="1">
<simpleType><restriction base="base64Binary">
</simpleType>
</element>
</sequence>
<attribute name="column" type="base64Binary" />
<attribute name="timestamp" type="int" />
</complexType>
<element name="Scanner" type="tns:Scanner"></element>
<complexType name="Scanner">
<sequence>
<element name="column" type="base64Binary" minOccurs="0" maxOccurs="unbounded"></element>
</sequence>
<sequence>
<element name="filter" type="string" minOccurs="0" maxOccurs="1"></element>
</sequence>
<attribute name="startRow" type="base64Binary"></attribute>
<attribute name="endRow" type="base64Binary"></attribute>
<attribute name="batch" type="int"></attribute>
<attribute name="startTime" type="int"></attribute>
<attribute name="endTime" type="int"></attribute>
</complexType>
<element name="StorageClusterVersion" type="tns:StorageClusterVersion" />
<complexType name="StorageClusterVersion">
<attribute name="version" type="string"></attribute>
</complexType>
<element name="StorageClusterStatus"
type="tns:StorageClusterStatus">
</element>
<complexType name="StorageClusterStatus">
<sequence>
<element name="liveNode" type="tns:Node"
maxOccurs="unbounded" minOccurs="0">
</element>
<element name="deadNode" type="string" maxOccurs="unbounded"
minOccurs="0">
</element>
</sequence>
<attribute name="regions" type="int"></attribute>
<attribute name="requests" type="int"></attribute>
<attribute name="averageLoad" type="float"></attribute>
</complexType>
<complexType name="Node">
<sequence>
<element name="region" type="tns:Region"
maxOccurs="unbounded" minOccurs="0">
</element>
</sequence>
<attribute name="name" type="string"></attribute>
<attribute name="startCode" type="int"></attribute>
<attribute name="requests" type="int"></attribute>
<attribute name="heapSizeMB" type="int"></attribute>
<attribute name="maxHeapSizeMB" type="int"></attribute>
</complexType>
<complexType name="Region">
<attribute name="name" type="base64Binary"></attribute>
<attribute name="stores" type="int"></attribute>
<attribute name="storefiles" type="int"></attribute>
<attribute name="storefileSizeMB" type="int"></attribute>
<attribute name="memstoreSizeMB" type="int"></attribute>
<attribute name="storefileIndexSizeMB" type="int"></attribute>
</complexType>
</schema>

View File

@ -23,6 +23,7 @@ package org.apache.hadoop.hbase.regionserver;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Random;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
@ -41,25 +42,39 @@ import org.apache.hadoop.hdfs.MiniDFSCluster;
public class TestWideScanner extends HBaseTestCase { public class TestWideScanner extends HBaseTestCase {
private final Log LOG = LogFactory.getLog(this.getClass()); private final Log LOG = LogFactory.getLog(this.getClass());
static final int BATCH = 1000; static final byte[] A = Bytes.toBytes("A");
static final byte[] B = Bytes.toBytes("B");
private MiniDFSCluster cluster = null; static final byte[] C = Bytes.toBytes("C");
private HRegion r; static byte[][] COLUMNS = { A, B, C };
static final Random rng = new Random();
static final HTableDescriptor TESTTABLEDESC = static final HTableDescriptor TESTTABLEDESC =
new HTableDescriptor("testwidescan"); new HTableDescriptor("testwidescan");
static { static {
TESTTABLEDESC.addFamily(new HColumnDescriptor(HConstants.CATALOG_FAMILY, TESTTABLEDESC.addFamily(new HColumnDescriptor(A,
10, // Ten is arbitrary number. Keep versions to help debuggging.
Compression.Algorithm.NONE.getName(), false, true, 8 * 1024,
HConstants.FOREVER, StoreFile.BloomType.NONE.toString(),
HColumnDescriptor.DEFAULT_REPLICATION_SCOPE));
TESTTABLEDESC.addFamily(new HColumnDescriptor(B,
10, // Ten is arbitrary number. Keep versions to help debuggging.
Compression.Algorithm.NONE.getName(), false, true, 8 * 1024,
HConstants.FOREVER, StoreFile.BloomType.NONE.toString(),
HColumnDescriptor.DEFAULT_REPLICATION_SCOPE));
TESTTABLEDESC.addFamily(new HColumnDescriptor(C,
10, // Ten is arbitrary number. Keep versions to help debuggging. 10, // Ten is arbitrary number. Keep versions to help debuggging.
Compression.Algorithm.NONE.getName(), false, true, 8 * 1024, Compression.Algorithm.NONE.getName(), false, true, 8 * 1024,
HConstants.FOREVER, StoreFile.BloomType.NONE.toString(), HConstants.FOREVER, StoreFile.BloomType.NONE.toString(),
HColumnDescriptor.DEFAULT_REPLICATION_SCOPE)); HColumnDescriptor.DEFAULT_REPLICATION_SCOPE));
} }
/** HRegionInfo for root region */ /** HRegionInfo for root region */
public static final HRegionInfo REGION_INFO = public static final HRegionInfo REGION_INFO =
new HRegionInfo(TESTTABLEDESC, HConstants.EMPTY_BYTE_ARRAY, new HRegionInfo(TESTTABLEDESC, HConstants.EMPTY_BYTE_ARRAY,
HConstants.EMPTY_BYTE_ARRAY); HConstants.EMPTY_BYTE_ARRAY);
MiniDFSCluster cluster = null;
HRegion r;
@Override @Override
public void setUp() throws Exception { public void setUp() throws Exception {
cluster = new MiniDFSCluster(conf, 2, true, (String[])null); cluster = new MiniDFSCluster(conf, 2, true, (String[])null);
@ -69,30 +84,15 @@ public class TestWideScanner extends HBaseTestCase {
super.setUp(); super.setUp();
} }
private int addWideContent(HRegion region, byte[] family) private int addWideContent(HRegion region) throws IOException {
throws IOException {
int count = 0; int count = 0;
// add a few rows of 2500 columns (we'll use batch of 1000) to make things
// interesting
for (char c = 'a'; c <= 'c'; c++) { for (char c = 'a'; c <= 'c'; c++) {
byte[] row = Bytes.toBytes("ab" + c); byte[] row = Bytes.toBytes("ab" + c);
int i; int i;
for (i = 0; i < 2500; i++) { for (i = 0; i < 2500; i++) {
byte[] b = Bytes.toBytes(String.format("%10d", i)); byte[] b = Bytes.toBytes(String.format("%10d", i));
Put put = new Put(row); Put put = new Put(row);
put.add(family, b, b); put.add(COLUMNS[rng.nextInt(COLUMNS.length)], b, b);
region.put(put);
count++;
}
}
// add one row of 100,000 columns
{
byte[] row = Bytes.toBytes("abf");
int i;
for (i = 0; i < 100000; i++) {
byte[] b = Bytes.toBytes(String.format("%10d", i));
Put put = new Put(row);
put.add(family, b, b);
region.put(put); region.put(put);
count++; count++;
} }
@ -103,11 +103,13 @@ public class TestWideScanner extends HBaseTestCase {
public void testWideScanBatching() throws IOException { public void testWideScanBatching() throws IOException {
try { try {
this.r = createNewHRegion(REGION_INFO.getTableDesc(), null, null); this.r = createNewHRegion(REGION_INFO.getTableDesc(), null, null);
int inserted = addWideContent(this.r, HConstants.CATALOG_FAMILY); int inserted = addWideContent(this.r);
List<KeyValue> results = new ArrayList<KeyValue>(); List<KeyValue> results = new ArrayList<KeyValue>();
Scan scan = new Scan(); Scan scan = new Scan();
scan.addFamily(HConstants.CATALOG_FAMILY); scan.addFamily(A);
scan.setBatch(BATCH); scan.addFamily(B);
scan.addFamily(C);
scan.setBatch(1000);
InternalScanner s = r.getScanner(scan); InternalScanner s = r.getScanner(scan);
int total = 0; int total = 0;
int i = 0; int i = 0;
@ -117,8 +119,8 @@ public class TestWideScanner extends HBaseTestCase {
i++; i++;
LOG.info("iteration #" + i + ", results.size=" + results.size()); LOG.info("iteration #" + i + ", results.size=" + results.size());
// assert that the result set is no larger than BATCH // assert that the result set is no larger than 1000
assertTrue(results.size() <= BATCH); assertTrue(results.size() <= 1000);
total += results.size(); total += results.size();