From 1ef046250f2b384275e5fd5279b41f9487084fc8 Mon Sep 17 00:00:00 2001 From: Ryan Rawson Date: Wed, 6 Oct 2010 00:44:21 +0000 Subject: [PATCH] HBASE-3073 New APIs for Result, faster implementation for some calls git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1004866 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 1 + .../apache/hadoop/hbase/client/Result.java | 182 ++++++++++++++---- .../apache/hadoop/hbase/HBaseTestCase.java | 6 +- .../hadoop/hbase/client/TestResult.java | 98 ++++++++++ 4 files changed, 242 insertions(+), 45 deletions(-) create mode 100644 src/test/java/org/apache/hadoop/hbase/client/TestResult.java diff --git a/CHANGES.txt b/CHANGES.txt index c9091f02e62..53f792b96d9 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1022,6 +1022,7 @@ Release 0.21.0 - Unreleased HBASE-3043 'hbase-daemon.sh stop regionserver' should kill compactions that are in progress (Nicolas Spiegelberg via Stack) + HBASE-3073 New APIs for Result, faster implementation for some calls OPTIMIZATIONS HBASE-410 [testing] Speed up the test suite diff --git a/src/main/java/org/apache/hadoop/hbase/client/Result.java b/src/main/java/org/apache/hadoop/hbase/client/Result.java index 4fe19a51337..de507d252ae 100644 --- a/src/main/java/org/apache/hadoop/hbase/client/Result.java +++ b/src/main/java/org/apache/hadoop/hbase/client/Result.java @@ -41,6 +41,8 @@ import java.util.TreeMap; /** * Single row result of a {@link Get} or {@link Scan} query.

* + * This class is NOT THREAD SAFE.

+ * * Convenience methods are available that return various {@link Map} * structures and values directly.

* @@ -109,7 +111,7 @@ public class Result implements Writable { * Method for retrieving the row that this result is for * @return row */ - public synchronized byte [] getRow() { + public byte [] getRow() { if (this.row == null) { if(this.kvs == null) { readFields(); @@ -120,8 +122,24 @@ public class Result implements Writable { } /** - * Return the unsorted array of KeyValues backing this Result instance. - * @return unsorted array of KeyValues + * Return the array of KeyValues backing this Result instance. + * + * The array is sorted from smallest -> largest using the + * {@link KeyValue#COMPARATOR}. + * + * The array only contains what your Get or Scan specifies and no more. + * For example if you request column "A" 1 version you will have at most 1 + * KeyValue in the array. If you request column "A" with 2 version you will + * have at most 2 KeyValues, with the first one being the newer timestamp and + * the second being the older timestamp (this is the sort order defined by + * {@link KeyValue#COMPARATOR}). If columns don't exist, they won't be + * present in the result. Therefore if you ask for 1 version all columns, + * it is safe to iterate over this array and expect to see 1 KeyValue for + * each column and no more. + * + * This API is faster than using getFamilyMap() and getMap() + * + * @return array of KeyValues */ public KeyValue[] raw() { if(this.kvs == null) { @@ -133,6 +151,8 @@ public class Result implements Writable { /** * Create a sorted list of the KeyValue's in this result. * + * Since HBase 0.20.5 this is equivalent to raw(). + * * @return The sorted list of KeyValue's. */ public List list() { @@ -145,24 +165,135 @@ public class Result implements Writable { /** * Returns a sorted array of KeyValues in this Result. *

- * Note: Sorting is done in place, so the backing array will be sorted - * after calling this method. + * Since HBase 0.20.5 this is equivalent to {@link #raw}. Use + * {@link #raw} instead. + * * @return sorted array of KeyValues + * @deprecated */ public KeyValue[] sorted() { - if (isEmpty()) { // used for side effect! - return null; - } + raw(); // side effect of loading this.kvs if (!sorted) { assert Ordering.from(KeyValue.COMPARATOR).isOrdered(Arrays.asList(kvs)); Arrays.sort(kvs, KeyValue.COMPARATOR); sorted = true; } - return kvs; + + return raw(); } private boolean sorted = false; + /** + * Return the KeyValues for the specific column. The KeyValues are sorted in + * the {@link KeyValue#COMPARATOR} order. That implies the first entry in + * the list is the most recent column. If the query (Scan or Get) only + * requested 1 version the list will contain at most 1 entry. If the column + * did not exist in the result set (either the column does not exist + * or the column was not selected in the query) the list will be empty. + * + * Also see getColumnLatest which returns just a KeyValue + * + * @param family the family + * @param qualifier + * @return a list of KeyValues for this column or empty list if the column + * did not exist in the result set + */ + public List getColumn(byte [] family, byte [] qualifier) { + List result = new ArrayList(); + + KeyValue [] kvs = raw(); + + if (kvs == null || kvs.length == 0) { + return result; + } + int pos = binarySearch(kvs, family, qualifier); + if (pos == -1) { + return result; // cant find it + } + + for (int i = pos ; i < kvs.length ; i++ ) { + KeyValue kv = kvs[i]; + if (kv.matchingColumn(family,qualifier)) { + result.add(kv); + } else { + break; + } + } + + return result; + } + + protected int binarySearch(final KeyValue [] kvs, + final byte [] family, + final byte [] qualifier) { + KeyValue searchTerm = + KeyValue.createFirstOnRow(kvs[0].getRow(), + family, qualifier); + + // pos === ( -(insertion point) - 1) + int pos = Arrays.binarySearch(kvs, searchTerm, KeyValue.COMPARATOR); + if (pos == kvs.length) { + return -1; // null/empty result. + } + // never will exact match + if (pos < 0) { + pos = (pos+1) * -1; + // pos is now insertion point + } + return pos; + } + + /** + * The KeyValue for the most recent for a given column. If the column does + * not exist in the result set - if it wasn't selected in the query (Get/Scan) + * or just does not exist in the row the return value is null. + * + * @param family + * @param qualifier + * @return KeyValue for the column or null + */ + public KeyValue getColumnLatest(byte [] family, byte [] qualifier) { + KeyValue [] kvs = raw(); // side effect possibly. + if (kvs == null || kvs.length == 0) { + return null; + } + int pos = binarySearch(kvs, family, qualifier); + if (pos == -1) { + return null; + } + KeyValue kv = kvs[pos]; + if (kv.matchingColumn(family, qualifier)) { + return kv; + } + return null; + } + + /** + * Get the latest version of the specified column. + * @param family family name + * @param qualifier column qualifier + * @return value of latest version of column, null if none found + */ + public byte[] getValue(byte [] family, byte [] qualifier) { + KeyValue kv = getColumnLatest(family, qualifier); + if (kv == null) { + return null; + } + return kv.getValue(); + } + + /** + * Checks for existence of the specified column. + * @param family family name + * @param qualifier column qualifier + * @return true if at least one value exists in the result, false if not + */ + public boolean containsColumn(byte [] family, byte [] qualifier) { + KeyValue kv = getColumnLatest(family, qualifier); + return kv != null; + } + /** * Map of families to all versions of its qualifiers and values. *

@@ -271,17 +402,6 @@ public class Result implements Writable { return returnMap; } - /** - * Get the latest version of the specified column. - * @param family family name - * @param qualifier column qualifier - * @return value of latest version of column, null if none found - */ - public byte [] getValue(byte [] family, byte [] qualifier) { - Map.Entry entry = getKeyValue(family, qualifier); - return entry == null? null: entry.getValue(); - } - private Map.Entry getKeyValue(byte[] family, byte[] qualifier) { if(this.familyMap == null) { getMap(); @@ -308,28 +428,6 @@ public class Result implements Writable { qualifierMap.get(qualifier): qualifierMap.get(new byte[0]); } - /** - * Checks for existence of the specified column. - * @param family family name - * @param qualifier column qualifier - * @return true if at least one value exists in the result, false if not - */ - public boolean containsColumn(byte [] family, byte [] qualifier) { - if(this.familyMap == null) { - getMap(); - } - if(isEmpty()) { - return false; - } - NavigableMap> qualifierMap = - familyMap.get(family); - if(qualifierMap == null) { - return false; - } - NavigableMap versionMap = getVersionMap(qualifierMap, qualifier); - return versionMap != null; - } - /** * Returns the value of the first column in the Result. * @return value of the first column diff --git a/src/test/java/org/apache/hadoop/hbase/HBaseTestCase.java b/src/test/java/org/apache/hadoop/hbase/HBaseTestCase.java index d1dbb17cdb2..e2a7ffca499 100644 --- a/src/test/java/org/apache/hadoop/hbase/HBaseTestCase.java +++ b/src/test/java/org/apache/hadoop/hbase/HBaseTestCase.java @@ -194,7 +194,7 @@ public abstract class HBaseTestCase extends TestCase { HTableDescriptor htd = new HTableDescriptor(name); htd.addFamily(new HColumnDescriptor(fam1, versions, HColumnDescriptor.DEFAULT_COMPRESSION, false, false, - Integer.MAX_VALUE, HConstants.FOREVER, + Integer.MAX_VALUE, HConstants.FOREVER, HColumnDescriptor.DEFAULT_BLOOMFILTER, HConstants.REPLICATION_SCOPE_LOCAL)); htd.addFamily(new HColumnDescriptor(fam2, versions, @@ -669,8 +669,8 @@ public abstract class HBaseTestCase extends TestCase { } } - public void assertByteEquals(byte[] expected, - byte[] actual) { + public static void assertByteEquals(byte[] expected, + byte[] actual) { if (Bytes.compareTo(expected, actual) != 0) { throw new AssertionFailedError("expected:<" + Bytes.toString(expected) + "> but was:<" + diff --git a/src/test/java/org/apache/hadoop/hbase/client/TestResult.java b/src/test/java/org/apache/hadoop/hbase/client/TestResult.java new file mode 100644 index 00000000000..becabcfb469 --- /dev/null +++ b/src/test/java/org/apache/hadoop/hbase/client/TestResult.java @@ -0,0 +1,98 @@ +/* + * Copyright 2010 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.client; + +import junit.framework.TestCase; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.util.Bytes; + +import static org.apache.hadoop.hbase.HBaseTestCase.assertByteEquals; + +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.NavigableMap; + +public class TestResult extends TestCase { + + static KeyValue[] genKVs(final byte[] row, final byte[] family, + final byte[] value, + final long timestamp, + final int cols) { + KeyValue [] kvs = new KeyValue[cols]; + + for (int i = 0; i < cols ; i++) { + kvs[i] = new KeyValue( + row, family, Bytes.toBytes(i), + timestamp, + Bytes.add(value, Bytes.toBytes(i))); + } + return kvs; + } + + static final byte [] row = Bytes.toBytes("row"); + static final byte [] family = Bytes.toBytes("family"); + static final byte [] value = Bytes.toBytes("value"); + + public void testBasic() throws Exception { + KeyValue [] kvs = genKVs(row, family, value, 1, 100); + + Arrays.sort(kvs, KeyValue.COMPARATOR); + + Result r = new Result(kvs); + + for (int i = 0; i < 100; ++i) { + final byte[] qf = Bytes.toBytes(i); + + List ks = r.getColumn(family, qf); + assertEquals(1, ks.size()); + assertByteEquals(qf, ks.get(0).getQualifier()); + + assertEquals(ks.get(0), r.getColumnLatest(family, qf)); + assertByteEquals(Bytes.add(value, Bytes.toBytes(i)), r.getValue(family, qf)); + assertTrue(r.containsColumn(family, qf)); + } + } + public void testMultiVersion() throws Exception { + KeyValue [] kvs1 = genKVs(row, family, value, 1, 100); + KeyValue [] kvs2 = genKVs(row, family, value, 200, 100); + + KeyValue [] kvs = new KeyValue[kvs1.length+kvs2.length]; + System.arraycopy(kvs1, 0, kvs, 0, kvs1.length); + System.arraycopy(kvs2, 0, kvs, kvs1.length, kvs2.length); + + Arrays.sort(kvs, KeyValue.COMPARATOR); + + Result r = new Result(kvs); + for (int i = 0; i < 100; ++i) { + final byte[] qf = Bytes.toBytes(i); + + List ks = r.getColumn(family, qf); + assertEquals(2, ks.size()); + assertByteEquals(qf, ks.get(0).getQualifier()); + assertEquals(200, ks.get(0).getTimestamp()); + + assertEquals(ks.get(0), r.getColumnLatest(family, qf)); + assertByteEquals(Bytes.add(value, Bytes.toBytes(i)), r.getValue(family, qf)); + assertTrue(r.containsColumn(family, qf)); + } + } +}