HBASE-23212 : Dynamically reload configs for Region Recovery chore (#773)

* HBASE-23212 : Dynamically reload configs for Region Recovery chore

* remove redundant volatile
This commit is contained in:
Viraj Jasani 2019-11-05 01:06:38 +05:30 committed by cang
parent 2f8860b1da
commit 10cc64a7d6
6 changed files with 277 additions and 7 deletions

View File

@ -1481,6 +1481,11 @@ public final class HConstants {
// default -1 indicates there is no threshold on high storeRefCount // default -1 indicates there is no threshold on high storeRefCount
public static final int DEFAULT_STORE_FILE_REF_COUNT_THRESHOLD = -1; public static final int DEFAULT_STORE_FILE_REF_COUNT_THRESHOLD = -1;
public static final String REGIONS_RECOVERY_INTERVAL =
"hbase.master.regions.recovery.check.interval";
public static final int DEFAULT_REGIONS_RECOVERY_INTERVAL = 1200 * 1000; // Default 20 min
/** /**
* Configurations for master executor services. * Configurations for master executor services.
*/ */

View File

@ -423,6 +423,8 @@ public class HMaster extends HRegionServer implements MasterServices {
private MasterProcedureManagerHost mpmHost; private MasterProcedureManagerHost mpmHost;
private RegionsRecoveryChore regionsRecoveryChore = null; private RegionsRecoveryChore regionsRecoveryChore = null;
private RegionsRecoveryConfigManager regionsRecoveryConfigManager = null;
// it is assigned after 'initialized' guard set to true, so should be volatile // it is assigned after 'initialized' guard set to true, so should be volatile
private volatile MasterQuotaManager quotaManager; private volatile MasterQuotaManager quotaManager;
private SpaceQuotaSnapshotNotifier spaceQuotaSnapshotNotifier; private SpaceQuotaSnapshotNotifier spaceQuotaSnapshotNotifier;
@ -1146,6 +1148,7 @@ public class HMaster extends HRegionServer implements MasterServices {
configurationManager.registerObserver(this.cleanerPool); configurationManager.registerObserver(this.cleanerPool);
configurationManager.registerObserver(this.hfileCleaner); configurationManager.registerObserver(this.hfileCleaner);
configurationManager.registerObserver(this.logCleaner); configurationManager.registerObserver(this.logCleaner);
configurationManager.registerObserver(this.regionsRecoveryConfigManager);
// Set master as 'initialized'. // Set master as 'initialized'.
setInitialized(true); setInitialized(true);
@ -1480,6 +1483,8 @@ public class HMaster extends HRegionServer implements MasterServices {
HConstants.STORE_FILE_REF_COUNT_THRESHOLD); HConstants.STORE_FILE_REF_COUNT_THRESHOLD);
} }
this.regionsRecoveryConfigManager = new RegionsRecoveryConfigManager(this);
replicationBarrierCleaner = new ReplicationBarrierCleaner(conf, this, getConnection(), replicationBarrierCleaner = new ReplicationBarrierCleaner(conf, this, getConnection(),
replicationPeerManager); replicationPeerManager);
getChoreService().scheduleChore(replicationBarrierCleaner); getChoreService().scheduleChore(replicationBarrierCleaner);

View File

@ -52,11 +52,6 @@ public class RegionsRecoveryChore extends ScheduledChore {
private static final String REGIONS_RECOVERY_CHORE_NAME = "RegionsRecoveryChore"; private static final String REGIONS_RECOVERY_CHORE_NAME = "RegionsRecoveryChore";
private static final String REGIONS_RECOVERY_INTERVAL =
"hbase.master.regions.recovery.check.interval";
private static final int DEFAULT_REGIONS_RECOVERY_INTERVAL = 1200 * 1000; // Default 20 min ?
private static final String ERROR_REOPEN_REIONS_MSG = private static final String ERROR_REOPEN_REIONS_MSG =
"Error reopening regions with high storeRefCount. "; "Error reopening regions with high storeRefCount. ";
@ -76,8 +71,8 @@ public class RegionsRecoveryChore extends ScheduledChore {
RegionsRecoveryChore(final Stoppable stopper, final Configuration configuration, RegionsRecoveryChore(final Stoppable stopper, final Configuration configuration,
final HMaster hMaster) { final HMaster hMaster) {
super(REGIONS_RECOVERY_CHORE_NAME, stopper, configuration.getInt(REGIONS_RECOVERY_INTERVAL, super(REGIONS_RECOVERY_CHORE_NAME, stopper, configuration.getInt(
DEFAULT_REGIONS_RECOVERY_INTERVAL)); HConstants.REGIONS_RECOVERY_INTERVAL, HConstants.DEFAULT_REGIONS_RECOVERY_INTERVAL));
this.hMaster = hMaster; this.hMaster = hMaster;
this.storeFileRefCountThreshold = configuration.getInt( this.storeFileRefCountThreshold = configuration.getInt(
HConstants.STORE_FILE_REF_COUNT_THRESHOLD, HConstants.STORE_FILE_REF_COUNT_THRESHOLD,
@ -171,4 +166,20 @@ public class RegionsRecoveryChore extends ScheduledChore {
} }
// hashcode/equals implementation to ensure at-most one object of RegionsRecoveryChore
// is scheduled at a time - RegionsRecoveryConfigManager
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
return o != null && getClass() == o.getClass();
}
@Override
public int hashCode() {
return 31;
}
} }

View File

@ -0,0 +1,100 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.master;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.ChoreService;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.conf.ConfigurationObserver;
import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Config manager for RegionsRecovery Chore - Dynamically reload config and update chore
* accordingly
*/
@InterfaceAudience.Private
public class RegionsRecoveryConfigManager implements ConfigurationObserver {
private static final Logger LOG = LoggerFactory.getLogger(RegionsRecoveryConfigManager.class);
private final HMaster hMaster;
private int prevMaxStoreFileRefCount;
private int prevRegionsRecoveryInterval;
RegionsRecoveryConfigManager(final HMaster hMaster) {
this.hMaster = hMaster;
Configuration conf = hMaster.getConfiguration();
this.prevMaxStoreFileRefCount = getMaxStoreFileRefCount(conf);
this.prevRegionsRecoveryInterval = getRegionsRecoveryChoreInterval(conf);
}
@Override
public void onConfigurationChange(Configuration conf) {
final int newMaxStoreFileRefCount = getMaxStoreFileRefCount(conf);
final int newRegionsRecoveryInterval = getRegionsRecoveryChoreInterval(conf);
if (prevMaxStoreFileRefCount == newMaxStoreFileRefCount
&& prevRegionsRecoveryInterval == newRegionsRecoveryInterval) {
// no need to re-schedule the chore with updated config
// as there is no change in desired configs
return;
}
LOG.info("Config Reload for RegionsRecovery Chore. prevMaxStoreFileRefCount: {}," +
" newMaxStoreFileRefCount: {}, prevRegionsRecoveryInterval: {}, " +
"newRegionsRecoveryInterval: {}", prevMaxStoreFileRefCount, newMaxStoreFileRefCount,
prevRegionsRecoveryInterval, newRegionsRecoveryInterval);
RegionsRecoveryChore regionsRecoveryChore = new RegionsRecoveryChore(this.hMaster,
conf, this.hMaster);
ChoreService choreService = this.hMaster.getChoreService();
// Regions Reopen based on very high storeFileRefCount is considered enabled
// only if hbase.regions.recovery.store.file.ref.count has value > 0
synchronized (this) {
if (newMaxStoreFileRefCount > 0) {
// reschedule the chore
// provide mayInterruptIfRunning - false to take care of completion
// of in progress task if any
choreService.cancelChore(regionsRecoveryChore, false);
choreService.scheduleChore(regionsRecoveryChore);
} else {
choreService.cancelChore(regionsRecoveryChore, false);
}
this.prevMaxStoreFileRefCount = newMaxStoreFileRefCount;
this.prevRegionsRecoveryInterval = newRegionsRecoveryInterval;
}
}
private int getMaxStoreFileRefCount(Configuration configuration) {
return configuration.getInt(
HConstants.STORE_FILE_REF_COUNT_THRESHOLD,
HConstants.DEFAULT_STORE_FILE_REF_COUNT_THRESHOLD);
}
private int getRegionsRecoveryChoreInterval(Configuration configuration) {
return configuration.getInt(
HConstants.REGIONS_RECOVERY_INTERVAL,
HConstants.DEFAULT_REGIONS_RECOVERY_INTERVAL);
}
}

View File

@ -0,0 +1,147 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.master;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.MiniHBaseCluster;
import org.apache.hadoop.hbase.StartMiniClusterOption;
import org.apache.hadoop.hbase.Stoppable;
import org.apache.hadoop.hbase.testclassification.MasterTests;
import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.apache.zookeeper.KeeperException;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.ClassRule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
/**
* Test for Regions Recovery Config Manager
*/
@Category({MasterTests.class, MediumTests.class})
public class TestRegionsRecoveryConfigManager {
@ClassRule
public static final HBaseClassTestRule CLASS_RULE =
HBaseClassTestRule.forClass(TestRegionsRecoveryConfigManager.class);
private static final HBaseTestingUtility HBASE_TESTING_UTILITY = new HBaseTestingUtility();
private MiniHBaseCluster cluster;
private HMaster hMaster;
private RegionsRecoveryChore regionsRecoveryChore;
private RegionsRecoveryConfigManager regionsRecoveryConfigManager;
private Configuration conf;
@Before
public void setup() throws Exception {
conf = HBASE_TESTING_UTILITY.getConfiguration();
conf.unset("hbase.regions.recovery.store.file.ref.count");
conf.unset("hbase.master.regions.recovery.check.interval");
StartMiniClusterOption option = StartMiniClusterOption.builder()
.masterClass(TestHMaster.class)
.numRegionServers(1)
.numDataNodes(1).build();
HBASE_TESTING_UTILITY.startMiniCluster(option);
cluster = HBASE_TESTING_UTILITY.getMiniHBaseCluster();
}
@After
public void tearDown() throws Exception {
HBASE_TESTING_UTILITY.shutdownMiniCluster();
}
@Test
public void testChoreSchedule() throws Exception {
this.hMaster = cluster.getMaster();
Stoppable stoppable = new StoppableImplementation();
this.regionsRecoveryChore = new RegionsRecoveryChore(stoppable, conf, hMaster);
this.regionsRecoveryConfigManager = new RegionsRecoveryConfigManager(this.hMaster);
// not yet scheduled
Assert.assertFalse(hMaster.getChoreService().isChoreScheduled(regionsRecoveryChore));
this.regionsRecoveryConfigManager.onConfigurationChange(conf);
// not yet scheduled
Assert.assertFalse(hMaster.getChoreService().isChoreScheduled(regionsRecoveryChore));
conf.setInt("hbase.master.regions.recovery.check.interval", 10);
this.regionsRecoveryConfigManager.onConfigurationChange(conf);
// not yet scheduled - missing config: hbase.regions.recovery.store.file.ref.count
Assert.assertFalse(hMaster.getChoreService().isChoreScheduled(regionsRecoveryChore));
conf.setInt("hbase.regions.recovery.store.file.ref.count", 10);
this.regionsRecoveryConfigManager.onConfigurationChange(conf);
// chore scheduled
Assert.assertTrue(hMaster.getChoreService().isChoreScheduled(regionsRecoveryChore));
conf.setInt("hbase.regions.recovery.store.file.ref.count", 20);
this.regionsRecoveryConfigManager.onConfigurationChange(conf);
// chore re-scheduled
Assert.assertTrue(hMaster.getChoreService().isChoreScheduled(regionsRecoveryChore));
conf.setInt("hbase.regions.recovery.store.file.ref.count", 20);
this.regionsRecoveryConfigManager.onConfigurationChange(conf);
// chore scheduling untouched
Assert.assertTrue(hMaster.getChoreService().isChoreScheduled(regionsRecoveryChore));
conf.unset("hbase.regions.recovery.store.file.ref.count");
this.regionsRecoveryConfigManager.onConfigurationChange(conf);
// chore un-scheduled
Assert.assertFalse(hMaster.getChoreService().isChoreScheduled(regionsRecoveryChore));
}
// Make it public so that JVMClusterUtil can access it.
public static class TestHMaster extends HMaster {
public TestHMaster(Configuration conf) throws IOException, KeeperException {
super(conf);
}
}
/**
* Simple helper class that just keeps track of whether or not its stopped.
*/
private static class StoppableImplementation implements Stoppable {
private boolean stop = false;
@Override
public void stop(String why) {
this.stop = true;
}
@Override
public boolean isStopped() {
return this.stop;
}
}
}

View File

@ -1150,6 +1150,8 @@ Here are those configurations:
| hbase.master.balancer.stochastic.moveCost | hbase.master.balancer.stochastic.moveCost
| hbase.master.balancer.stochastic.maxMovePercent | hbase.master.balancer.stochastic.maxMovePercent
| hbase.master.balancer.stochastic.tableSkewCost | hbase.master.balancer.stochastic.tableSkewCost
| hbase.master.regions.recovery.check.interval
| hbase.regions.recovery.store.file.ref.count
|=== |===
ifdef::backend-docbook[] ifdef::backend-docbook[]