From e90ac5f812b1a6af8886e3afd7fa41528cfad30e Mon Sep 17 00:00:00 2001 From: Andrew Purtell Date: Fri, 20 Mar 2015 17:24:04 -0700 Subject: [PATCH] HBASE-13303 Fix size calculation of results on the region server --- .../org/apache/hadoop/hbase/CellUtil.java | 15 +++ .../org/apache/hadoop/hbase/KeyValue.java | 21 +++ .../hbase/regionserver/RSRpcServices.java | 4 +- .../client/TestResultSizeEstimation.java | 127 ++++++++++++++++++ 4 files changed, 165 insertions(+), 2 deletions(-) create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestResultSizeEstimation.java diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/CellUtil.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/CellUtil.java index c801654b5ba..b0eece85620 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/CellUtil.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/CellUtil.java @@ -571,6 +571,21 @@ public final class CellUtil { return estimatedSerializedSizeOf(cell); } + /** + * This is a hack that should be removed once we don't care about matching + * up client- and server-side estimations of cell size. It needed to be + * backwards compatible with estimations done by older clients. We need to + * pretend that tags never exist and cells aren't serialized with tag + * length included. See HBASE-13262 and HBASE-13303 + */ + @Deprecated + public static long estimatedHeapSizeOfWithoutTags(final Cell cell) { + if (cell instanceof KeyValue) { + return ((KeyValue)cell).heapSizeWithoutTags(); + } + return getSumOfCellKeyElementLengths(cell) + cell.getValueLength(); + } + /********************* tags *************************************/ /** * Util method to iterate through the tags diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValue.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValue.java index 8cbd793addc..48daa83040a 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValue.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValue.java @@ -2699,6 +2699,27 @@ public class KeyValue implements Cell, HeapSize, Cloneable, SettableSequenceId, return ClassSize.align(sum); } + /** + * This is a hack that should be removed once we don't care about matching + * up client- and server-side estimations of cell size. It needed to be + * backwards compatible with estimations done by older clients. We need to + * pretend that tags never exist and KeyValues aren't serialized with tag + * length included. See HBASE-13262 and HBASE-13303 + */ + @Deprecated + public long heapSizeWithoutTags() { + int sum = 0; + sum += ClassSize.OBJECT;// the KeyValue object itself + sum += ClassSize.REFERENCE;// pointer to "bytes" + sum += ClassSize.align(ClassSize.ARRAY);// "bytes" + sum += KeyValue.KEYVALUE_INFRASTRUCTURE_SIZE; + sum += getKeyLength(); + sum += getValueLength(); + sum += 2 * Bytes.SIZEOF_INT;// offset, length + sum += Bytes.SIZEOF_LONG;// memstoreTS + return ClassSize.align(sum); + } + /** * A simple form of KeyValue that creates a keyvalue with only the key part of the byte[] * Mainly used in places where we need to compare two cells. Avoids copying of bytes diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java index a48d055bdf7..ba7b70ca23c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java @@ -2121,7 +2121,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler, for (Result r : results) { for (Cell cell : r.rawCells()) { totalCellSize += CellUtil.estimatedSerializedSizeOf(cell); - currentScanResultSize += CellUtil.estimatedHeapSizeOf(cell); + currentScanResultSize += CellUtil.estimatedHeapSizeOfWithoutTags(cell); } } } @@ -2184,7 +2184,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler, // If the calculation can't be skipped, then do it now. if (!skipResultSizeCalculation) { - currentScanResultSize += CellUtil.estimatedHeapSizeOf(cell); + currentScanResultSize += CellUtil.estimatedHeapSizeOfWithoutTags(cell); } } // The size limit was reached. This means there are more cells remaining in diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestResultSizeEstimation.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestResultSizeEstimation.java new file mode 100644 index 00000000000..1f3a95bba62 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestResultSizeEstimation.java @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.client; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.Tag; +import org.apache.hadoop.hbase.client.HTable; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.ResultScanner; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.io.hfile.HFile; +import org.apache.hadoop.hbase.testclassification.LargeTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +import static org.junit.Assert.assertEquals; + +@Category(LargeTests.class) +public class TestResultSizeEstimation { + + final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + + final static int TAG_DATA_SIZE = 2048; + final static int SCANNER_DATA_LIMIT = TAG_DATA_SIZE + 256; + + @BeforeClass + public static void setUpBeforeClass() throws Exception { + Configuration conf = TEST_UTIL.getConfiguration(); + // Need HFileV3 + conf.setInt(HFile.FORMAT_VERSION_KEY, HFile.MIN_FORMAT_VERSION_WITH_TAGS); + // effectively limit max result size to one entry if it has tags + conf.setLong(HConstants.HBASE_CLIENT_SCANNER_MAX_RESULT_SIZE_KEY, SCANNER_DATA_LIMIT); + conf.setBoolean(ScannerCallable.LOG_SCANNER_ACTIVITY, true); + TEST_UTIL.startMiniCluster(1); + } + + @AfterClass + public static void tearDownAfterClass() throws Exception { + TEST_UTIL.shutdownMiniCluster(); + } + + @Test + public void testResultSizeEstimation() throws Exception { + byte [] ROW1 = Bytes.toBytes("testRow1"); + byte [] ROW2 = Bytes.toBytes("testRow2"); + byte [] FAMILY = Bytes.toBytes("testFamily"); + byte [] QUALIFIER = Bytes.toBytes("testQualifier"); + byte [] VALUE = Bytes.toBytes("testValue"); + + TableName TABLE = TableName.valueOf("testResultSizeEstimation"); + byte[][] FAMILIES = new byte[][] { FAMILY }; + Configuration conf = new Configuration(TEST_UTIL.getConfiguration()); + HTable table = TEST_UTIL.createTable(TABLE, FAMILIES, conf); + Put p = new Put(ROW1); + p.add(new KeyValue(ROW1, FAMILY, QUALIFIER, Long.MAX_VALUE, VALUE)); + table.put(p); + p = new Put(ROW2); + p.add(new KeyValue(ROW2, FAMILY, QUALIFIER, Long.MAX_VALUE, VALUE)); + table.put(p); + + Scan s = new Scan(); + s.setMaxResultSize(SCANNER_DATA_LIMIT); + ResultScanner rs = table.getScanner(s); + int count = 0; + while(rs.next() != null) { + count++; + } + assertEquals("Result size estimation did not work properly", 2, count); + rs.close(); + table.close(); + } + + @Test + public void testResultSizeEstimationWithTags() throws Exception { + byte [] ROW1 = Bytes.toBytes("testRow1"); + byte [] ROW2 = Bytes.toBytes("testRow2"); + byte [] FAMILY = Bytes.toBytes("testFamily"); + byte [] QUALIFIER = Bytes.toBytes("testQualifier"); + byte [] VALUE = Bytes.toBytes("testValue"); + + TableName TABLE = TableName.valueOf("testResultSizeEstimationWithTags"); + byte[][] FAMILIES = new byte[][] { FAMILY }; + Configuration conf = new Configuration(TEST_UTIL.getConfiguration()); + HTable table = TEST_UTIL.createTable(TABLE, FAMILIES, conf); + Put p = new Put(ROW1); + p.add(new KeyValue(ROW1, FAMILY, QUALIFIER, Long.MAX_VALUE, VALUE, + new Tag[] { new Tag((byte)1, new byte[TAG_DATA_SIZE]) } )); + table.put(p); + p = new Put(ROW2); + p.add(new KeyValue(ROW2, FAMILY, QUALIFIER, Long.MAX_VALUE, VALUE, + new Tag[] { new Tag((byte)1, new byte[TAG_DATA_SIZE]) } )); + table.put(p); + + Scan s = new Scan(); + s.setMaxResultSize(SCANNER_DATA_LIMIT); + ResultScanner rs = table.getScanner(s); + int count = 0; + while(rs.next() != null) { + count++; + } + assertEquals("Result size estimation did not work properly", 2, count); + rs.close(); + table.close(); + } +} \ No newline at end of file