From 10cc64a7d690429174405517b3e7d75e4f0998fa Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Tue, 5 Nov 2019 01:06:38 +0530 Subject: [PATCH] HBASE-23212 : Dynamically reload configs for Region Recovery chore (#773) * HBASE-23212 : Dynamically reload configs for Region Recovery chore * remove redundant volatile --- .../org/apache/hadoop/hbase/HConstants.java | 5 + .../apache/hadoop/hbase/master/HMaster.java | 5 + .../hbase/master/RegionsRecoveryChore.java | 25 ++- .../master/RegionsRecoveryConfigManager.java | 100 ++++++++++++ .../TestRegionsRecoveryConfigManager.java | 147 ++++++++++++++++++ .../asciidoc/_chapters/configuration.adoc | 2 + 6 files changed, 277 insertions(+), 7 deletions(-) create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionsRecoveryConfigManager.java create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionsRecoveryConfigManager.java diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java index d339ca569e6..ce482ed022e 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java @@ -1481,6 +1481,11 @@ public final class HConstants { // default -1 indicates there is no threshold on high storeRefCount public static final int DEFAULT_STORE_FILE_REF_COUNT_THRESHOLD = -1; + public static final String REGIONS_RECOVERY_INTERVAL = + "hbase.master.regions.recovery.check.interval"; + + public static final int DEFAULT_REGIONS_RECOVERY_INTERVAL = 1200 * 1000; // Default 20 min + /** * Configurations for master executor services. */ diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index bb3c12c0b88..8bb8851083b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -423,6 +423,8 @@ public class HMaster extends HRegionServer implements MasterServices { private MasterProcedureManagerHost mpmHost; private RegionsRecoveryChore regionsRecoveryChore = null; + + private RegionsRecoveryConfigManager regionsRecoveryConfigManager = null; // it is assigned after 'initialized' guard set to true, so should be volatile private volatile MasterQuotaManager quotaManager; private SpaceQuotaSnapshotNotifier spaceQuotaSnapshotNotifier; @@ -1146,6 +1148,7 @@ public class HMaster extends HRegionServer implements MasterServices { configurationManager.registerObserver(this.cleanerPool); configurationManager.registerObserver(this.hfileCleaner); configurationManager.registerObserver(this.logCleaner); + configurationManager.registerObserver(this.regionsRecoveryConfigManager); // Set master as 'initialized'. setInitialized(true); @@ -1480,6 +1483,8 @@ public class HMaster extends HRegionServer implements MasterServices { HConstants.STORE_FILE_REF_COUNT_THRESHOLD); } + this.regionsRecoveryConfigManager = new RegionsRecoveryConfigManager(this); + replicationBarrierCleaner = new ReplicationBarrierCleaner(conf, this, getConnection(), replicationPeerManager); getChoreService().scheduleChore(replicationBarrierCleaner); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionsRecoveryChore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionsRecoveryChore.java index 7502eeb9462..c5ad8676756 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionsRecoveryChore.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionsRecoveryChore.java @@ -52,11 +52,6 @@ public class RegionsRecoveryChore extends ScheduledChore { private static final String REGIONS_RECOVERY_CHORE_NAME = "RegionsRecoveryChore"; - private static final String REGIONS_RECOVERY_INTERVAL = - "hbase.master.regions.recovery.check.interval"; - - private static final int DEFAULT_REGIONS_RECOVERY_INTERVAL = 1200 * 1000; // Default 20 min ? - private static final String ERROR_REOPEN_REIONS_MSG = "Error reopening regions with high storeRefCount. "; @@ -76,8 +71,8 @@ public class RegionsRecoveryChore extends ScheduledChore { RegionsRecoveryChore(final Stoppable stopper, final Configuration configuration, final HMaster hMaster) { - super(REGIONS_RECOVERY_CHORE_NAME, stopper, configuration.getInt(REGIONS_RECOVERY_INTERVAL, - DEFAULT_REGIONS_RECOVERY_INTERVAL)); + super(REGIONS_RECOVERY_CHORE_NAME, stopper, configuration.getInt( + HConstants.REGIONS_RECOVERY_INTERVAL, HConstants.DEFAULT_REGIONS_RECOVERY_INTERVAL)); this.hMaster = hMaster; this.storeFileRefCountThreshold = configuration.getInt( HConstants.STORE_FILE_REF_COUNT_THRESHOLD, @@ -171,4 +166,20 @@ public class RegionsRecoveryChore extends ScheduledChore { } + // hashcode/equals implementation to ensure at-most one object of RegionsRecoveryChore + // is scheduled at a time - RegionsRecoveryConfigManager + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + return o != null && getClass() == o.getClass(); + } + + @Override + public int hashCode() { + return 31; + } + } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionsRecoveryConfigManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionsRecoveryConfigManager.java new file mode 100644 index 00000000000..b1bfdc0ecb0 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionsRecoveryConfigManager.java @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.master; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.ChoreService; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.conf.ConfigurationObserver; +import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Config manager for RegionsRecovery Chore - Dynamically reload config and update chore + * accordingly + */ +@InterfaceAudience.Private +public class RegionsRecoveryConfigManager implements ConfigurationObserver { + + private static final Logger LOG = LoggerFactory.getLogger(RegionsRecoveryConfigManager.class); + + private final HMaster hMaster; + private int prevMaxStoreFileRefCount; + private int prevRegionsRecoveryInterval; + + RegionsRecoveryConfigManager(final HMaster hMaster) { + this.hMaster = hMaster; + Configuration conf = hMaster.getConfiguration(); + this.prevMaxStoreFileRefCount = getMaxStoreFileRefCount(conf); + this.prevRegionsRecoveryInterval = getRegionsRecoveryChoreInterval(conf); + } + + @Override + public void onConfigurationChange(Configuration conf) { + final int newMaxStoreFileRefCount = getMaxStoreFileRefCount(conf); + final int newRegionsRecoveryInterval = getRegionsRecoveryChoreInterval(conf); + + if (prevMaxStoreFileRefCount == newMaxStoreFileRefCount + && prevRegionsRecoveryInterval == newRegionsRecoveryInterval) { + // no need to re-schedule the chore with updated config + // as there is no change in desired configs + return; + } + + LOG.info("Config Reload for RegionsRecovery Chore. prevMaxStoreFileRefCount: {}," + + " newMaxStoreFileRefCount: {}, prevRegionsRecoveryInterval: {}, " + + "newRegionsRecoveryInterval: {}", prevMaxStoreFileRefCount, newMaxStoreFileRefCount, + prevRegionsRecoveryInterval, newRegionsRecoveryInterval); + + RegionsRecoveryChore regionsRecoveryChore = new RegionsRecoveryChore(this.hMaster, + conf, this.hMaster); + ChoreService choreService = this.hMaster.getChoreService(); + + // Regions Reopen based on very high storeFileRefCount is considered enabled + // only if hbase.regions.recovery.store.file.ref.count has value > 0 + + synchronized (this) { + if (newMaxStoreFileRefCount > 0) { + // reschedule the chore + // provide mayInterruptIfRunning - false to take care of completion + // of in progress task if any + choreService.cancelChore(regionsRecoveryChore, false); + choreService.scheduleChore(regionsRecoveryChore); + } else { + choreService.cancelChore(regionsRecoveryChore, false); + } + this.prevMaxStoreFileRefCount = newMaxStoreFileRefCount; + this.prevRegionsRecoveryInterval = newRegionsRecoveryInterval; + } + } + + private int getMaxStoreFileRefCount(Configuration configuration) { + return configuration.getInt( + HConstants.STORE_FILE_REF_COUNT_THRESHOLD, + HConstants.DEFAULT_STORE_FILE_REF_COUNT_THRESHOLD); + } + + private int getRegionsRecoveryChoreInterval(Configuration configuration) { + return configuration.getInt( + HConstants.REGIONS_RECOVERY_INTERVAL, + HConstants.DEFAULT_REGIONS_RECOVERY_INTERVAL); + } + +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionsRecoveryConfigManager.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionsRecoveryConfigManager.java new file mode 100644 index 00000000000..22554d355b9 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionsRecoveryConfigManager.java @@ -0,0 +1,147 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.master; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.MiniHBaseCluster; +import org.apache.hadoop.hbase.StartMiniClusterOption; +import org.apache.hadoop.hbase.Stoppable; +import org.apache.hadoop.hbase.testclassification.MasterTests; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.zookeeper.KeeperException; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +/** + * Test for Regions Recovery Config Manager + */ +@Category({MasterTests.class, MediumTests.class}) +public class TestRegionsRecoveryConfigManager { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestRegionsRecoveryConfigManager.class); + + private static final HBaseTestingUtility HBASE_TESTING_UTILITY = new HBaseTestingUtility(); + + private MiniHBaseCluster cluster; + + private HMaster hMaster; + + private RegionsRecoveryChore regionsRecoveryChore; + + private RegionsRecoveryConfigManager regionsRecoveryConfigManager; + + private Configuration conf; + + @Before + public void setup() throws Exception { + conf = HBASE_TESTING_UTILITY.getConfiguration(); + conf.unset("hbase.regions.recovery.store.file.ref.count"); + conf.unset("hbase.master.regions.recovery.check.interval"); + StartMiniClusterOption option = StartMiniClusterOption.builder() + .masterClass(TestHMaster.class) + .numRegionServers(1) + .numDataNodes(1).build(); + HBASE_TESTING_UTILITY.startMiniCluster(option); + cluster = HBASE_TESTING_UTILITY.getMiniHBaseCluster(); + } + + @After + public void tearDown() throws Exception { + HBASE_TESTING_UTILITY.shutdownMiniCluster(); + } + + @Test + public void testChoreSchedule() throws Exception { + + this.hMaster = cluster.getMaster(); + + Stoppable stoppable = new StoppableImplementation(); + this.regionsRecoveryChore = new RegionsRecoveryChore(stoppable, conf, hMaster); + + this.regionsRecoveryConfigManager = new RegionsRecoveryConfigManager(this.hMaster); + // not yet scheduled + Assert.assertFalse(hMaster.getChoreService().isChoreScheduled(regionsRecoveryChore)); + + this.regionsRecoveryConfigManager.onConfigurationChange(conf); + // not yet scheduled + Assert.assertFalse(hMaster.getChoreService().isChoreScheduled(regionsRecoveryChore)); + + conf.setInt("hbase.master.regions.recovery.check.interval", 10); + this.regionsRecoveryConfigManager.onConfigurationChange(conf); + // not yet scheduled - missing config: hbase.regions.recovery.store.file.ref.count + Assert.assertFalse(hMaster.getChoreService().isChoreScheduled(regionsRecoveryChore)); + + conf.setInt("hbase.regions.recovery.store.file.ref.count", 10); + this.regionsRecoveryConfigManager.onConfigurationChange(conf); + // chore scheduled + Assert.assertTrue(hMaster.getChoreService().isChoreScheduled(regionsRecoveryChore)); + + conf.setInt("hbase.regions.recovery.store.file.ref.count", 20); + this.regionsRecoveryConfigManager.onConfigurationChange(conf); + // chore re-scheduled + Assert.assertTrue(hMaster.getChoreService().isChoreScheduled(regionsRecoveryChore)); + + conf.setInt("hbase.regions.recovery.store.file.ref.count", 20); + this.regionsRecoveryConfigManager.onConfigurationChange(conf); + // chore scheduling untouched + Assert.assertTrue(hMaster.getChoreService().isChoreScheduled(regionsRecoveryChore)); + + conf.unset("hbase.regions.recovery.store.file.ref.count"); + this.regionsRecoveryConfigManager.onConfigurationChange(conf); + // chore un-scheduled + Assert.assertFalse(hMaster.getChoreService().isChoreScheduled(regionsRecoveryChore)); + } + + // Make it public so that JVMClusterUtil can access it. + public static class TestHMaster extends HMaster { + public TestHMaster(Configuration conf) throws IOException, KeeperException { + super(conf); + } + } + + /** + * Simple helper class that just keeps track of whether or not its stopped. + */ + private static class StoppableImplementation implements Stoppable { + + private boolean stop = false; + + @Override + public void stop(String why) { + this.stop = true; + } + + @Override + public boolean isStopped() { + return this.stop; + } + + } + +} \ No newline at end of file diff --git a/src/main/asciidoc/_chapters/configuration.adoc b/src/main/asciidoc/_chapters/configuration.adoc index dbb7ac16de2..88aba19b05f 100644 --- a/src/main/asciidoc/_chapters/configuration.adoc +++ b/src/main/asciidoc/_chapters/configuration.adoc @@ -1150,6 +1150,8 @@ Here are those configurations: | hbase.master.balancer.stochastic.moveCost | hbase.master.balancer.stochastic.maxMovePercent | hbase.master.balancer.stochastic.tableSkewCost +| hbase.master.regions.recovery.check.interval +| hbase.regions.recovery.store.file.ref.count |=== ifdef::backend-docbook[]