From 9363a4a6dfc0266d3922c4c216c4c6c5cfbf8bb5 Mon Sep 17 00:00:00 2001 From: Bing Xiao Date: Fri, 17 May 2019 13:36:50 -0700 Subject: [PATCH] HBASE-22169 Open region failed cause memory leak Signed-off-by: stack --- .../hadoop/hbase/regionserver/HRegion.java | 38 +++++++++------- .../hbase/regionserver/TestHRegion.java | 44 +++++++++++++++++++ .../TestOpenSeqNumUnexpectedIncrease.java | 9 ++++ 3 files changed, 76 insertions(+), 15 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java index e02f27e4099..45dbaf48c5a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java @@ -7231,21 +7231,29 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi */ protected HRegion openHRegion(final CancelableProgressable reporter) throws IOException { - // Refuse to open the region if we are missing local compression support - checkCompressionCodecs(); - // Refuse to open the region if encryption configuration is incorrect or - // codec support is missing - checkEncryption(); - // Refuse to open the region if a required class cannot be loaded - checkClassLoading(); - this.openSeqNum = initialize(reporter); - this.mvcc.advanceTo(openSeqNum); - // The openSeqNum must be increased every time when a region is assigned, as we rely on it to - // determine whether a region has been successfully reopened. So here we always write open - // marker, even if the table is read only. - if (wal != null && getRegionServerServices() != null && - RegionReplicaUtil.isDefaultReplica(getRegionInfo())) { - writeRegionOpenMarker(wal, openSeqNum); + try { + // Refuse to open the region if we are missing local compression support + checkCompressionCodecs(); + // Refuse to open the region if encryption configuration is incorrect or + // codec support is missing + checkEncryption(); + // Refuse to open the region if a required class cannot be loaded + checkClassLoading(); + this.openSeqNum = initialize(reporter); + this.mvcc.advanceTo(openSeqNum); + // The openSeqNum must be increased every time when a region is assigned, as we rely on it to + // determine whether a region has been successfully reopened. So here we always write open + // marker, even if the table is read only. + if (wal != null && getRegionServerServices() != null && + RegionReplicaUtil.isDefaultReplica(getRegionInfo())) { + writeRegionOpenMarker(wal, openSeqNum); + } + } catch(Throwable t) { + // By coprocessor path wrong region will open failed, + // MetricsRegionWrapperImpl is already init and not close, + // add region close when open failed + this.close(); + throw t; } return this; } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java index b4d10fe8f18..3d49dcf8cda 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java @@ -41,6 +41,7 @@ import static org.mockito.Mockito.when; import java.io.IOException; import java.io.InterruptedIOException; +import java.lang.reflect.Field; import java.math.BigDecimal; import java.nio.charset.StandardCharsets; import java.security.PrivilegedExceptionAction; @@ -52,11 +53,14 @@ import java.util.Map; import java.util.NavigableMap; import java.util.Objects; import java.util.TreeMap; +import java.util.concurrent.BlockingQueue; import java.util.concurrent.Callable; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; @@ -162,6 +166,7 @@ import org.apache.hadoop.hbase.wal.WALKeyImpl; import org.apache.hadoop.hbase.wal.WALProvider; import org.apache.hadoop.hbase.wal.WALProvider.Writer; import org.apache.hadoop.hbase.wal.WALSplitter; +import org.apache.hadoop.metrics2.MetricsExecutor; import org.junit.After; import org.junit.Assert; import org.junit.Before; @@ -6595,6 +6600,45 @@ public class TestHRegion { region.close(); } + // make sure region is success close when coprocessor wrong region open failed + @Test + public void testOpenRegionFailedMemoryLeak() throws Exception { + final ServerName serverName = ServerName.valueOf("testOpenRegionFailed", 100, 42); + final RegionServerServices rss = spy(TEST_UTIL.createMockRegionServerService(serverName)); + + HTableDescriptor htd + = new HTableDescriptor(TableName.valueOf("testOpenRegionFailed")); + htd.addFamily(new HColumnDescriptor(fam1)); + htd.setValue("COPROCESSOR$1", "hdfs://test/test.jar|test||"); + + HRegionInfo hri = new HRegionInfo(htd.getTableName(), + HConstants.EMPTY_BYTE_ARRAY, HConstants.EMPTY_BYTE_ARRAY); + ScheduledExecutorService executor = CompatibilitySingletonFactory.getInstance( + MetricsExecutor.class).getExecutor(); + for (int i = 0; i < 20 ; i++) { + try { + HRegion.openHRegion(hri, htd, rss.getWAL(hri), + TEST_UTIL.getConfiguration(), rss, null); + }catch(Throwable t){ + LOG.info("Expected exception, continue"); + } + } + TimeUnit.SECONDS.sleep(MetricsRegionWrapperImpl.PERIOD); + Field[] fields = ThreadPoolExecutor.class.getDeclaredFields(); + boolean found = false; + for(Field field : fields){ + if(field.getName().equals("workQueue")){ + field.setAccessible(true); + BlockingQueue workQueue = (BlockingQueue)field.get(executor); + //there are still two task not cancel, can not cause to memory lack + Assert.assertTrue("ScheduledExecutor#workQueue should equals 2 , please check region is " + + "close", 2 == workQueue.size()); + found = true; + } + } + Assert.assertTrue("can not find workQueue, test failed", found); + } + /** * The same as HRegion class, the only difference is that instantiateHStore will * create a different HStore - HStoreForTesting. [HBASE-8518] diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestOpenSeqNumUnexpectedIncrease.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestOpenSeqNumUnexpectedIncrease.java index 14d5a98b4d8..40cfa615b1e 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestOpenSeqNumUnexpectedIncrease.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestOpenSeqNumUnexpectedIncrease.java @@ -20,6 +20,8 @@ package org.apache.hadoop.hbase.regionserver; import static org.junit.Assert.assertEquals; import java.io.IOException; +import java.util.List; +import java.util.Map; import java.util.concurrent.atomic.AtomicInteger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; @@ -74,6 +76,13 @@ public class TestOpenSeqNumUnexpectedIncrease { throw new IOException("Inject error for testing"); } } + + public Map> close() throws IOException { + //skip close + return null; + } + + } @BeforeClass