HBASE-13303 Fix size calculation of results on the region server

2015-03-20 17:24:05 -07:00 · 2015-03-20 17:24:05 -07:00 · 8dfed5dc88
parent 0915191167
commit 8dfed5dc88
4 changed files with 165 additions and 2 deletions
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/CellUtil.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/CellUtil.java
@ -568,6 +568,21 @@ public final class CellUtil {
    return estimatedSerializedSizeOf(cell);
  }

+  /**
+   * This is a hack that should be removed once we don't care about matching
+   * up client- and server-side estimations of cell size. It needed to be
+   * backwards compatible with estimations done by older clients. We need to
+   * pretend that tags never exist and cells aren't serialized with tag
+   * length included. See HBASE-13262 and HBASE-13303
+   */
+  @Deprecated
+  public static long estimatedHeapSizeOfWithoutTags(final Cell cell) {
+    if (cell instanceof KeyValue) {
+      return ((KeyValue)cell).heapSizeWithoutTags();
+    }
+    return getSumOfCellKeyElementLengths(cell) + cell.getValueLength();
+  }
+
  /********************* tags *************************************/
  /**
   * Util method to iterate through the tags
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValue.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValue.java
@ -2640,6 +2640,27 @@ public class KeyValue implements Cell, HeapSize, Cloneable, SettableSequenceId,
    return ClassSize.align(sum);
  }

+  /**
+   * This is a hack that should be removed once we don't care about matching
+   * up client- and server-side estimations of cell size. It needed to be
+   * backwards compatible with estimations done by older clients. We need to
+   * pretend that tags never exist and KeyValues aren't serialized with tag
+   * length included. See HBASE-13262 and HBASE-13303
+   */
+  @Deprecated
+  public long heapSizeWithoutTags() {
+    int sum = 0;
+    sum += ClassSize.OBJECT;// the KeyValue object itself
+    sum += ClassSize.REFERENCE;// pointer to "bytes"
+    sum += ClassSize.align(ClassSize.ARRAY);// "bytes"
+    sum += KeyValue.KEYVALUE_INFRASTRUCTURE_SIZE;
+    sum += getKeyLength();
+    sum += getValueLength();
+    sum += 2 * Bytes.SIZEOF_INT;// offset, length
+    sum += Bytes.SIZEOF_LONG;// memstoreTS
+    return ClassSize.align(sum);
+  }
+
  /**
   * A simple form of KeyValue that creates a keyvalue with only the key part of the byte[]
   * Mainly used in places where we need to compare two cells.  Avoids copying of bytes
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
@ -2129,7 +2129,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler,
              for (Result r : results) {
                for (Cell cell : r.rawCells()) {
                  totalCellSize += CellUtil.estimatedSerializedSizeOf(cell);
-                  currentScanResultSize += CellUtil.estimatedHeapSizeOf(cell);
+                  currentScanResultSize += CellUtil.estimatedHeapSizeOfWithoutTags(cell);
                }
              }
            }
@ -2192,7 +2192,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler,

                      // If the calculation can't be skipped, then do it now.
                      if (!skipResultSizeCalculation) {
-                        currentScanResultSize += CellUtil.estimatedHeapSizeOf(cell);
+                        currentScanResultSize += CellUtil.estimatedHeapSizeOfWithoutTags(cell);
                      }
                    }
                    // The size limit was reached. This means there are more cells remaining in
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestResultSizeEstimation.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestResultSizeEstimation.java
@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.client;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.Tag;
+import org.apache.hadoop.hbase.client.HTable;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.hfile.HFile;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+import static org.junit.Assert.assertEquals;
+
+@Category(LargeTests.class)
+public class TestResultSizeEstimation {
+
+  final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
+
+  final static int TAG_DATA_SIZE = 2048;
+  final static int SCANNER_DATA_LIMIT = TAG_DATA_SIZE + 256;
+
+  @BeforeClass
+  public static void setUpBeforeClass() throws Exception {
+    Configuration conf = TEST_UTIL.getConfiguration();
+    // Need HFileV3
+    conf.setInt(HFile.FORMAT_VERSION_KEY, HFile.MIN_FORMAT_VERSION_WITH_TAGS);
+    // effectively limit max result size to one entry if it has tags
+    conf.setLong(HConstants.HBASE_CLIENT_SCANNER_MAX_RESULT_SIZE_KEY, SCANNER_DATA_LIMIT);
+    conf.setBoolean(ScannerCallable.LOG_SCANNER_ACTIVITY, true);
+    TEST_UTIL.startMiniCluster(1);
+  }
+
+  @AfterClass
+  public static void tearDownAfterClass() throws Exception {
+    TEST_UTIL.shutdownMiniCluster();
+  }
+
+  @Test
+  public void testResultSizeEstimation() throws Exception {
+    byte [] ROW1 = Bytes.toBytes("testRow1");
+    byte [] ROW2 = Bytes.toBytes("testRow2");
+    byte [] FAMILY = Bytes.toBytes("testFamily");
+    byte [] QUALIFIER = Bytes.toBytes("testQualifier");
+    byte [] VALUE = Bytes.toBytes("testValue");
+
+    TableName TABLE = TableName.valueOf("testResultSizeEstimation");
+    byte[][] FAMILIES = new byte[][] { FAMILY };
+    Configuration conf = new Configuration(TEST_UTIL.getConfiguration());
+    HTable table = TEST_UTIL.createTable(TABLE, FAMILIES, conf);
+    Put p = new Put(ROW1);
+    p.add(new KeyValue(ROW1, FAMILY, QUALIFIER, Long.MAX_VALUE, VALUE));
+    table.put(p);
+    p = new Put(ROW2);
+    p.add(new KeyValue(ROW2, FAMILY, QUALIFIER, Long.MAX_VALUE, VALUE));
+    table.put(p);
+
+    Scan s = new Scan();
+    s.setMaxResultSize(SCANNER_DATA_LIMIT);
+    ResultScanner rs = table.getScanner(s);
+    int count = 0;
+    while(rs.next() != null) {
+      count++;
+    }
+    assertEquals("Result size estimation did not work properly", 2, count);
+    rs.close();
+    table.close();
+  }
+
+  @Test
+  public void testResultSizeEstimationWithTags() throws Exception {
+    byte [] ROW1 = Bytes.toBytes("testRow1");
+    byte [] ROW2 = Bytes.toBytes("testRow2");
+    byte [] FAMILY = Bytes.toBytes("testFamily");
+    byte [] QUALIFIER = Bytes.toBytes("testQualifier");
+    byte [] VALUE = Bytes.toBytes("testValue");
+
+    TableName TABLE = TableName.valueOf("testResultSizeEstimationWithTags");
+    byte[][] FAMILIES = new byte[][] { FAMILY };
+    Configuration conf = new Configuration(TEST_UTIL.getConfiguration());
+    HTable table = TEST_UTIL.createTable(TABLE, FAMILIES, conf);
+    Put p = new Put(ROW1);
+    p.add(new KeyValue(ROW1, FAMILY, QUALIFIER, Long.MAX_VALUE, VALUE,
+      new Tag[] { new Tag((byte)1, new byte[TAG_DATA_SIZE]) } ));
+    table.put(p);
+    p = new Put(ROW2);
+    p.add(new KeyValue(ROW2, FAMILY, QUALIFIER, Long.MAX_VALUE, VALUE,
+      new Tag[] { new Tag((byte)1, new byte[TAG_DATA_SIZE]) } ));
+    table.put(p);
+
+    Scan s = new Scan();
+    s.setMaxResultSize(SCANNER_DATA_LIMIT);
+    ResultScanner rs = table.getScanner(s);
+    int count = 0;
+    while(rs.next() != null) {
+      count++;
+    }
+    assertEquals("Result size estimation did not work properly", 2, count);
+    rs.close();
+    table.close();
+  }
+}