HBASE-3073 New APIs for Result, faster implementation for some calls

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1004866 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Ryan Rawson 2010-10-06 00:44:21 +00:00
parent 66a4faa7b0
commit 1ef046250f
4 changed files with 242 additions and 45 deletions

View File

@ -1022,6 +1022,7 @@ Release 0.21.0 - Unreleased
HBASE-3043 'hbase-daemon.sh stop regionserver' should kill compactions HBASE-3043 'hbase-daemon.sh stop regionserver' should kill compactions
that are in progress that are in progress
(Nicolas Spiegelberg via Stack) (Nicolas Spiegelberg via Stack)
HBASE-3073 New APIs for Result, faster implementation for some calls
OPTIMIZATIONS OPTIMIZATIONS
HBASE-410 [testing] Speed up the test suite HBASE-410 [testing] Speed up the test suite

View File

@ -41,6 +41,8 @@ import java.util.TreeMap;
/** /**
* Single row result of a {@link Get} or {@link Scan} query.<p> * Single row result of a {@link Get} or {@link Scan} query.<p>
* *
* This class is NOT THREAD SAFE.<p>
*
* Convenience methods are available that return various {@link Map} * Convenience methods are available that return various {@link Map}
* structures and values directly.<p> * structures and values directly.<p>
* *
@ -109,7 +111,7 @@ public class Result implements Writable {
* Method for retrieving the row that this result is for * Method for retrieving the row that this result is for
* @return row * @return row
*/ */
public synchronized byte [] getRow() { public byte [] getRow() {
if (this.row == null) { if (this.row == null) {
if(this.kvs == null) { if(this.kvs == null) {
readFields(); readFields();
@ -120,8 +122,24 @@ public class Result implements Writable {
} }
/** /**
* Return the unsorted array of KeyValues backing this Result instance. * Return the array of KeyValues backing this Result instance.
* @return unsorted array of KeyValues *
* The array is sorted from smallest -> largest using the
* {@link KeyValue#COMPARATOR}.
*
* The array only contains what your Get or Scan specifies and no more.
* For example if you request column "A" 1 version you will have at most 1
* KeyValue in the array. If you request column "A" with 2 version you will
* have at most 2 KeyValues, with the first one being the newer timestamp and
* the second being the older timestamp (this is the sort order defined by
* {@link KeyValue#COMPARATOR}). If columns don't exist, they won't be
* present in the result. Therefore if you ask for 1 version all columns,
* it is safe to iterate over this array and expect to see 1 KeyValue for
* each column and no more.
*
* This API is faster than using getFamilyMap() and getMap()
*
* @return array of KeyValues
*/ */
public KeyValue[] raw() { public KeyValue[] raw() {
if(this.kvs == null) { if(this.kvs == null) {
@ -133,6 +151,8 @@ public class Result implements Writable {
/** /**
* Create a sorted list of the KeyValue's in this result. * Create a sorted list of the KeyValue's in this result.
* *
* Since HBase 0.20.5 this is equivalent to raw().
*
* @return The sorted list of KeyValue's. * @return The sorted list of KeyValue's.
*/ */
public List<KeyValue> list() { public List<KeyValue> list() {
@ -145,24 +165,135 @@ public class Result implements Writable {
/** /**
* Returns a sorted array of KeyValues in this Result. * Returns a sorted array of KeyValues in this Result.
* <p> * <p>
* Note: Sorting is done in place, so the backing array will be sorted * Since HBase 0.20.5 this is equivalent to {@link #raw}. Use
* after calling this method. * {@link #raw} instead.
*
* @return sorted array of KeyValues * @return sorted array of KeyValues
* @deprecated
*/ */
public KeyValue[] sorted() { public KeyValue[] sorted() {
if (isEmpty()) { // used for side effect! raw(); // side effect of loading this.kvs
return null;
}
if (!sorted) { if (!sorted) {
assert Ordering.from(KeyValue.COMPARATOR).isOrdered(Arrays.asList(kvs)); assert Ordering.from(KeyValue.COMPARATOR).isOrdered(Arrays.asList(kvs));
Arrays.sort(kvs, KeyValue.COMPARATOR); Arrays.sort(kvs, KeyValue.COMPARATOR);
sorted = true; sorted = true;
} }
return kvs;
return raw();
} }
private boolean sorted = false; private boolean sorted = false;
/**
* Return the KeyValues for the specific column. The KeyValues are sorted in
* the {@link KeyValue#COMPARATOR} order. That implies the first entry in
* the list is the most recent column. If the query (Scan or Get) only
* requested 1 version the list will contain at most 1 entry. If the column
* did not exist in the result set (either the column does not exist
* or the column was not selected in the query) the list will be empty.
*
* Also see getColumnLatest which returns just a KeyValue
*
* @param family the family
* @param qualifier
* @return a list of KeyValues for this column or empty list if the column
* did not exist in the result set
*/
public List<KeyValue> getColumn(byte [] family, byte [] qualifier) {
List<KeyValue> result = new ArrayList<KeyValue>();
KeyValue [] kvs = raw();
if (kvs == null || kvs.length == 0) {
return result;
}
int pos = binarySearch(kvs, family, qualifier);
if (pos == -1) {
return result; // cant find it
}
for (int i = pos ; i < kvs.length ; i++ ) {
KeyValue kv = kvs[i];
if (kv.matchingColumn(family,qualifier)) {
result.add(kv);
} else {
break;
}
}
return result;
}
protected int binarySearch(final KeyValue [] kvs,
final byte [] family,
final byte [] qualifier) {
KeyValue searchTerm =
KeyValue.createFirstOnRow(kvs[0].getRow(),
family, qualifier);
// pos === ( -(insertion point) - 1)
int pos = Arrays.binarySearch(kvs, searchTerm, KeyValue.COMPARATOR);
if (pos == kvs.length) {
return -1; // null/empty result.
}
// never will exact match
if (pos < 0) {
pos = (pos+1) * -1;
// pos is now insertion point
}
return pos;
}
/**
* The KeyValue for the most recent for a given column. If the column does
* not exist in the result set - if it wasn't selected in the query (Get/Scan)
* or just does not exist in the row the return value is null.
*
* @param family
* @param qualifier
* @return KeyValue for the column or null
*/
public KeyValue getColumnLatest(byte [] family, byte [] qualifier) {
KeyValue [] kvs = raw(); // side effect possibly.
if (kvs == null || kvs.length == 0) {
return null;
}
int pos = binarySearch(kvs, family, qualifier);
if (pos == -1) {
return null;
}
KeyValue kv = kvs[pos];
if (kv.matchingColumn(family, qualifier)) {
return kv;
}
return null;
}
/**
* Get the latest version of the specified column.
* @param family family name
* @param qualifier column qualifier
* @return value of latest version of column, null if none found
*/
public byte[] getValue(byte [] family, byte [] qualifier) {
KeyValue kv = getColumnLatest(family, qualifier);
if (kv == null) {
return null;
}
return kv.getValue();
}
/**
* Checks for existence of the specified column.
* @param family family name
* @param qualifier column qualifier
* @return true if at least one value exists in the result, false if not
*/
public boolean containsColumn(byte [] family, byte [] qualifier) {
KeyValue kv = getColumnLatest(family, qualifier);
return kv != null;
}
/** /**
* Map of families to all versions of its qualifiers and values. * Map of families to all versions of its qualifiers and values.
* <p> * <p>
@ -271,17 +402,6 @@ public class Result implements Writable {
return returnMap; return returnMap;
} }
/**
* Get the latest version of the specified column.
* @param family family name
* @param qualifier column qualifier
* @return value of latest version of column, null if none found
*/
public byte [] getValue(byte [] family, byte [] qualifier) {
Map.Entry<Long,byte[]> entry = getKeyValue(family, qualifier);
return entry == null? null: entry.getValue();
}
private Map.Entry<Long,byte[]> getKeyValue(byte[] family, byte[] qualifier) { private Map.Entry<Long,byte[]> getKeyValue(byte[] family, byte[] qualifier) {
if(this.familyMap == null) { if(this.familyMap == null) {
getMap(); getMap();
@ -308,28 +428,6 @@ public class Result implements Writable {
qualifierMap.get(qualifier): qualifierMap.get(new byte[0]); qualifierMap.get(qualifier): qualifierMap.get(new byte[0]);
} }
/**
* Checks for existence of the specified column.
* @param family family name
* @param qualifier column qualifier
* @return true if at least one value exists in the result, false if not
*/
public boolean containsColumn(byte [] family, byte [] qualifier) {
if(this.familyMap == null) {
getMap();
}
if(isEmpty()) {
return false;
}
NavigableMap<byte [], NavigableMap<Long, byte[]>> qualifierMap =
familyMap.get(family);
if(qualifierMap == null) {
return false;
}
NavigableMap<Long, byte[]> versionMap = getVersionMap(qualifierMap, qualifier);
return versionMap != null;
}
/** /**
* Returns the value of the first column in the Result. * Returns the value of the first column in the Result.
* @return value of the first column * @return value of the first column

View File

@ -669,7 +669,7 @@ public abstract class HBaseTestCase extends TestCase {
} }
} }
public void assertByteEquals(byte[] expected, public static void assertByteEquals(byte[] expected,
byte[] actual) { byte[] actual) {
if (Bytes.compareTo(expected, actual) != 0) { if (Bytes.compareTo(expected, actual) != 0) {
throw new AssertionFailedError("expected:<" + throw new AssertionFailedError("expected:<" +

View File

@ -0,0 +1,98 @@
/*
* Copyright 2010 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.client;
import junit.framework.TestCase;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.util.Bytes;
import static org.apache.hadoop.hbase.HBaseTestCase.assertByteEquals;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.NavigableMap;
public class TestResult extends TestCase {
static KeyValue[] genKVs(final byte[] row, final byte[] family,
final byte[] value,
final long timestamp,
final int cols) {
KeyValue [] kvs = new KeyValue[cols];
for (int i = 0; i < cols ; i++) {
kvs[i] = new KeyValue(
row, family, Bytes.toBytes(i),
timestamp,
Bytes.add(value, Bytes.toBytes(i)));
}
return kvs;
}
static final byte [] row = Bytes.toBytes("row");
static final byte [] family = Bytes.toBytes("family");
static final byte [] value = Bytes.toBytes("value");
public void testBasic() throws Exception {
KeyValue [] kvs = genKVs(row, family, value, 1, 100);
Arrays.sort(kvs, KeyValue.COMPARATOR);
Result r = new Result(kvs);
for (int i = 0; i < 100; ++i) {
final byte[] qf = Bytes.toBytes(i);
List<KeyValue> ks = r.getColumn(family, qf);
assertEquals(1, ks.size());
assertByteEquals(qf, ks.get(0).getQualifier());
assertEquals(ks.get(0), r.getColumnLatest(family, qf));
assertByteEquals(Bytes.add(value, Bytes.toBytes(i)), r.getValue(family, qf));
assertTrue(r.containsColumn(family, qf));
}
}
public void testMultiVersion() throws Exception {
KeyValue [] kvs1 = genKVs(row, family, value, 1, 100);
KeyValue [] kvs2 = genKVs(row, family, value, 200, 100);
KeyValue [] kvs = new KeyValue[kvs1.length+kvs2.length];
System.arraycopy(kvs1, 0, kvs, 0, kvs1.length);
System.arraycopy(kvs2, 0, kvs, kvs1.length, kvs2.length);
Arrays.sort(kvs, KeyValue.COMPARATOR);
Result r = new Result(kvs);
for (int i = 0; i < 100; ++i) {
final byte[] qf = Bytes.toBytes(i);
List<KeyValue> ks = r.getColumn(family, qf);
assertEquals(2, ks.size());
assertByteEquals(qf, ks.get(0).getQualifier());
assertEquals(200, ks.get(0).getTimestamp());
assertEquals(ks.get(0), r.getColumnLatest(family, qf));
assertByteEquals(Bytes.add(value, Bytes.toBytes(i)), r.getValue(family, qf));
assertTrue(r.containsColumn(family, qf));
}
}
}