From c1d32df547c581086c194a203fc3def3c240deed Mon Sep 17 00:00:00 2001 From: Michael Stack Date: Wed, 30 Apr 2014 00:08:39 +0000 Subject: [PATCH] HBASE-10925 Do not OOME, throw RowTooBigException instead (Mikhail Antonov) git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1591154 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/hadoop/hbase/HConstants.java | 10 ++ .../src/main/resources/hbase-default.xml | 9 ++ .../regionserver/RowTooBigException.java | 33 +++++ .../hbase/regionserver/StoreScanner.java | 26 ++++ .../hbase/regionserver/TestRowTooBig.java | 132 ++++++++++++++++++ 5 files changed, 210 insertions(+) create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowTooBigException.java create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowTooBig.java diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java index 082c5cef7f2..1b15770ffe6 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java @@ -311,6 +311,16 @@ public final class HConstants { /** Default maximum file size */ public static final long DEFAULT_MAX_FILE_SIZE = 10 * 1024 * 1024 * 1024L; + /** + * Max size of single row for Get's or Scan's without in-row scanning flag set. + */ + public static final String TABLE_MAX_ROWSIZE_KEY = "hbase.table.max.rowsize"; + + /** + * Default max row size (1 Gb). + */ + public static final long TABLE_MAX_ROWSIZE_DEFAULT = 1024 * 1024 * 1024L; + /** * The max number of threads used for opening and closing stores or store * files in parallel diff --git a/hbase-common/src/main/resources/hbase-default.xml b/hbase-common/src/main/resources/hbase-default.xml index c4289c79c9e..f0fafd18ec8 100644 --- a/hbase-common/src/main/resources/hbase-default.xml +++ b/hbase-common/src/main/resources/hbase-default.xml @@ -896,6 +896,15 @@ possible configurations would overwhelm and obscure the important. Table locking from master prevents concurrent schema modifications to corrupt table state. + + hbase.table.max.rowsize + 1073741824 + + Maximum size of single row in bytes (default is 1 Gb) for Get'ting + or Scan'ning without in-row scan flag set. If row size exceeds this limit + RowTooBigException is thrown to client. + + hbase.thrift.minWorkerThreads 16 diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowTooBigException.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowTooBigException.java new file mode 100644 index 00000000000..b9db0d5abbf --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowTooBigException.java @@ -0,0 +1,33 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import org.apache.hadoop.hbase.RegionException; + +/** + * Gets or Scans throw this exception if running without in-row scan flag + * set and row size appears to exceed max configured size (configurable via + * hbase.table.max.rowsize). + */ +public class RowTooBigException extends RegionException { + + public RowTooBigException(String message) { + super(message); + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java index 90b61b8b40d..1ef3e914fbe 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java @@ -31,6 +31,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.DoNotRetryIOException; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.KeyValue; @@ -76,6 +77,7 @@ public class StoreScanner extends NonReversedNonLazyKeyValueScanner protected final NavigableSet columns; protected final long oldestUnexpiredTS; protected final int minVersions; + protected final long maxRowSize; /** * The number of KVs seen by the scanner. Includes explicitly skipped KVs, but not @@ -123,6 +125,14 @@ public class StoreScanner extends NonReversedNonLazyKeyValueScanner oldestUnexpiredTS = EnvironmentEdgeManager.currentTimeMillis() - ttl; this.minVersions = minVersions; + if (store != null && ((HStore)store).getHRegion() != null + && ((HStore)store).getHRegion().getBaseConf() != null) { + this.maxRowSize = ((HStore) store).getHRegion().getBaseConf().getLong( + HConstants.TABLE_MAX_ROWSIZE_KEY, HConstants.TABLE_MAX_ROWSIZE_DEFAULT); + } else { + this.maxRowSize = HConstants.TABLE_MAX_ROWSIZE_DEFAULT; + } + // We look up row-column Bloom filters for multi-column queries as part of // the seek operation. However, we also look the row-column Bloom filter // for multi-row (non-"get") scans because this is not done in @@ -313,8 +323,17 @@ public class StoreScanner extends NonReversedNonLazyKeyValueScanner } } else { if (!isParallelSeek) { + long totalScannersSoughtBytes = 0; for (KeyValueScanner scanner : scanners) { + if (totalScannersSoughtBytes >= maxRowSize) { + throw new RowTooBigException("Max row size allowed: " + maxRowSize + + ", but row is bigger than that"); + } scanner.seek(seekKey); + Cell c = scanner.peek(); + if (c != null ) { + totalScannersSoughtBytes += CellUtil.estimatedSizeOf(c); + } } } else { parallelSeek(scanners, seekKey); @@ -461,6 +480,8 @@ public class StoreScanner extends NonReversedNonLazyKeyValueScanner store != null ? store.getComparator() : null; int count = 0; + long totalBytesRead = 0; + LOOP: while((cell = this.heap.peek()) != null) { if (prevCell != cell) ++kvsScanned; // Do object compare - we set prevKV from the same heap. checkScanOrder(prevCell, cell, comparator); @@ -494,6 +515,11 @@ public class StoreScanner extends NonReversedNonLazyKeyValueScanner if (this.countPerRow > storeOffset) { outResult.add(cell); count++; + totalBytesRead += CellUtil.estimatedSizeOf(cell); + if (totalBytesRead > maxRowSize) { + throw new RowTooBigException("Max row size allowed: " + maxRowSize + + ", but the row is bigger than that."); + } } if (qcode == ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_ROW) { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowTooBig.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowTooBig.java new file mode 100644 index 00000000000..573a5c94719 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowTooBig.java @@ -0,0 +1,132 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.regionserver; + +import org.apache.hadoop.hbase.*; +import org.apache.hadoop.hbase.client.Get; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +import java.io.IOException; + +/** + * Test case to check HRS throws {@link RowTooBigException} + * when row size exceeds configured limits. + */ +@Category(MediumTests.class) +public class TestRowTooBig { + private final static HBaseTestingUtility HTU = HBaseTestingUtility.createLocalHTU(); + private static final HTableDescriptor TEST_HTD = + new HTableDescriptor(TableName.valueOf(TestRowTooBig.class.getSimpleName())); + + @BeforeClass + public static void before() throws Exception { + HTU.startMiniCluster(); + HTU.getConfiguration().setLong(HConstants.TABLE_MAX_ROWSIZE_KEY, + 10 * 1024 * 1024L); + } + + @AfterClass + public static void after() throws Exception { + HTU.shutdownMiniCluster(); + } + + /** + * Usecase: + * - create a row with 5 large cells (5 Mb each) + * - flush memstore but don't compact storefiles. + * - try to Get whole row. + * + * OOME happened before we actually get to reading results, but + * during seeking, as each StoreFile gets it's own scanner, + * and each scanner seeks after the first KV. + * @throws IOException + */ + @Test(expected = RowTooBigException.class) + public void testScannersSeekOnFewLargeCells() throws IOException { + byte[] row1 = Bytes.toBytes("row1"); + byte[] fam1 = Bytes.toBytes("fam1"); + + HTableDescriptor htd = TEST_HTD; + HColumnDescriptor hcd = new HColumnDescriptor(fam1); + htd.addFamily(hcd); + + final HRegionInfo hri = + new HRegionInfo(htd.getTableName(), HConstants.EMPTY_END_ROW, + HConstants.EMPTY_END_ROW); + HRegion region = HTU.createLocalHRegion(hri, htd); + + // Add 5 cells to memstore + for (int i = 0; i < 5 ; i++) { + Put put = new Put(row1); + + put.add(fam1, Bytes.toBytes("col_" + i ), new byte[5 * 1024 * 1024]); + region.put(put); + region.flushcache(); + } + + Get get = new Get(row1); + region.get(get); + } + + /** + * Usecase: + * + * - create a row with 1M cells, 10 bytes in each + * - flush & run major compaction + * - try to Get whole row. + * + * OOME happened in StoreScanner.next(..). + * + * @throws IOException + */ + @Test(expected = RowTooBigException.class) + public void testScanAcrossManySmallColumns() throws IOException { + byte[] row1 = Bytes.toBytes("row1"); + byte[] fam1 = Bytes.toBytes("fam1"); + + HTableDescriptor htd = TEST_HTD; + HColumnDescriptor hcd = new HColumnDescriptor(fam1); + htd.addFamily(hcd); + + final HRegionInfo hri = + new HRegionInfo(htd.getTableName(), HConstants.EMPTY_END_ROW, + HConstants.EMPTY_END_ROW); + HRegion region = HTU.createLocalHRegion(hri, htd); + + // Add to memstore + for (int i = 0; i < 10; i++) { + Put put = new Put(row1); + for (int j = 0; j < 10 * 10000; j++) { + put.add(fam1, Bytes.toBytes("col_" + i + "_" + j), new byte[10]); + } + region.put(put); + region.flushcache(); + } + region.compactStores(true); + + Get get = new Get(row1); + region.get(get); + } +}