HBASE-23212 : Dynamically reload configs for Region Recovery chore (#773)
* HBASE-23212 : Dynamically reload configs for Region Recovery chore * remove redundant volatile
This commit is contained in:
parent
2f8860b1da
commit
10cc64a7d6
|
@ -1481,6 +1481,11 @@ public final class HConstants {
|
||||||
// default -1 indicates there is no threshold on high storeRefCount
|
// default -1 indicates there is no threshold on high storeRefCount
|
||||||
public static final int DEFAULT_STORE_FILE_REF_COUNT_THRESHOLD = -1;
|
public static final int DEFAULT_STORE_FILE_REF_COUNT_THRESHOLD = -1;
|
||||||
|
|
||||||
|
public static final String REGIONS_RECOVERY_INTERVAL =
|
||||||
|
"hbase.master.regions.recovery.check.interval";
|
||||||
|
|
||||||
|
public static final int DEFAULT_REGIONS_RECOVERY_INTERVAL = 1200 * 1000; // Default 20 min
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Configurations for master executor services.
|
* Configurations for master executor services.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -423,6 +423,8 @@ public class HMaster extends HRegionServer implements MasterServices {
|
||||||
private MasterProcedureManagerHost mpmHost;
|
private MasterProcedureManagerHost mpmHost;
|
||||||
|
|
||||||
private RegionsRecoveryChore regionsRecoveryChore = null;
|
private RegionsRecoveryChore regionsRecoveryChore = null;
|
||||||
|
|
||||||
|
private RegionsRecoveryConfigManager regionsRecoveryConfigManager = null;
|
||||||
// it is assigned after 'initialized' guard set to true, so should be volatile
|
// it is assigned after 'initialized' guard set to true, so should be volatile
|
||||||
private volatile MasterQuotaManager quotaManager;
|
private volatile MasterQuotaManager quotaManager;
|
||||||
private SpaceQuotaSnapshotNotifier spaceQuotaSnapshotNotifier;
|
private SpaceQuotaSnapshotNotifier spaceQuotaSnapshotNotifier;
|
||||||
|
@ -1146,6 +1148,7 @@ public class HMaster extends HRegionServer implements MasterServices {
|
||||||
configurationManager.registerObserver(this.cleanerPool);
|
configurationManager.registerObserver(this.cleanerPool);
|
||||||
configurationManager.registerObserver(this.hfileCleaner);
|
configurationManager.registerObserver(this.hfileCleaner);
|
||||||
configurationManager.registerObserver(this.logCleaner);
|
configurationManager.registerObserver(this.logCleaner);
|
||||||
|
configurationManager.registerObserver(this.regionsRecoveryConfigManager);
|
||||||
// Set master as 'initialized'.
|
// Set master as 'initialized'.
|
||||||
setInitialized(true);
|
setInitialized(true);
|
||||||
|
|
||||||
|
@ -1480,6 +1483,8 @@ public class HMaster extends HRegionServer implements MasterServices {
|
||||||
HConstants.STORE_FILE_REF_COUNT_THRESHOLD);
|
HConstants.STORE_FILE_REF_COUNT_THRESHOLD);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
this.regionsRecoveryConfigManager = new RegionsRecoveryConfigManager(this);
|
||||||
|
|
||||||
replicationBarrierCleaner = new ReplicationBarrierCleaner(conf, this, getConnection(),
|
replicationBarrierCleaner = new ReplicationBarrierCleaner(conf, this, getConnection(),
|
||||||
replicationPeerManager);
|
replicationPeerManager);
|
||||||
getChoreService().scheduleChore(replicationBarrierCleaner);
|
getChoreService().scheduleChore(replicationBarrierCleaner);
|
||||||
|
|
|
@ -52,11 +52,6 @@ public class RegionsRecoveryChore extends ScheduledChore {
|
||||||
|
|
||||||
private static final String REGIONS_RECOVERY_CHORE_NAME = "RegionsRecoveryChore";
|
private static final String REGIONS_RECOVERY_CHORE_NAME = "RegionsRecoveryChore";
|
||||||
|
|
||||||
private static final String REGIONS_RECOVERY_INTERVAL =
|
|
||||||
"hbase.master.regions.recovery.check.interval";
|
|
||||||
|
|
||||||
private static final int DEFAULT_REGIONS_RECOVERY_INTERVAL = 1200 * 1000; // Default 20 min ?
|
|
||||||
|
|
||||||
private static final String ERROR_REOPEN_REIONS_MSG =
|
private static final String ERROR_REOPEN_REIONS_MSG =
|
||||||
"Error reopening regions with high storeRefCount. ";
|
"Error reopening regions with high storeRefCount. ";
|
||||||
|
|
||||||
|
@ -76,8 +71,8 @@ public class RegionsRecoveryChore extends ScheduledChore {
|
||||||
RegionsRecoveryChore(final Stoppable stopper, final Configuration configuration,
|
RegionsRecoveryChore(final Stoppable stopper, final Configuration configuration,
|
||||||
final HMaster hMaster) {
|
final HMaster hMaster) {
|
||||||
|
|
||||||
super(REGIONS_RECOVERY_CHORE_NAME, stopper, configuration.getInt(REGIONS_RECOVERY_INTERVAL,
|
super(REGIONS_RECOVERY_CHORE_NAME, stopper, configuration.getInt(
|
||||||
DEFAULT_REGIONS_RECOVERY_INTERVAL));
|
HConstants.REGIONS_RECOVERY_INTERVAL, HConstants.DEFAULT_REGIONS_RECOVERY_INTERVAL));
|
||||||
this.hMaster = hMaster;
|
this.hMaster = hMaster;
|
||||||
this.storeFileRefCountThreshold = configuration.getInt(
|
this.storeFileRefCountThreshold = configuration.getInt(
|
||||||
HConstants.STORE_FILE_REF_COUNT_THRESHOLD,
|
HConstants.STORE_FILE_REF_COUNT_THRESHOLD,
|
||||||
|
@ -171,4 +166,20 @@ public class RegionsRecoveryChore extends ScheduledChore {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// hashcode/equals implementation to ensure at-most one object of RegionsRecoveryChore
|
||||||
|
// is scheduled at a time - RegionsRecoveryConfigManager
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
if (this == o) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return o != null && getClass() == o.getClass();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return 31;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,100 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.hbase.master;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.hbase.ChoreService;
|
||||||
|
import org.apache.hadoop.hbase.HConstants;
|
||||||
|
import org.apache.hadoop.hbase.conf.ConfigurationObserver;
|
||||||
|
import org.apache.yetus.audience.InterfaceAudience;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Config manager for RegionsRecovery Chore - Dynamically reload config and update chore
|
||||||
|
* accordingly
|
||||||
|
*/
|
||||||
|
@InterfaceAudience.Private
|
||||||
|
public class RegionsRecoveryConfigManager implements ConfigurationObserver {
|
||||||
|
|
||||||
|
private static final Logger LOG = LoggerFactory.getLogger(RegionsRecoveryConfigManager.class);
|
||||||
|
|
||||||
|
private final HMaster hMaster;
|
||||||
|
private int prevMaxStoreFileRefCount;
|
||||||
|
private int prevRegionsRecoveryInterval;
|
||||||
|
|
||||||
|
RegionsRecoveryConfigManager(final HMaster hMaster) {
|
||||||
|
this.hMaster = hMaster;
|
||||||
|
Configuration conf = hMaster.getConfiguration();
|
||||||
|
this.prevMaxStoreFileRefCount = getMaxStoreFileRefCount(conf);
|
||||||
|
this.prevRegionsRecoveryInterval = getRegionsRecoveryChoreInterval(conf);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onConfigurationChange(Configuration conf) {
|
||||||
|
final int newMaxStoreFileRefCount = getMaxStoreFileRefCount(conf);
|
||||||
|
final int newRegionsRecoveryInterval = getRegionsRecoveryChoreInterval(conf);
|
||||||
|
|
||||||
|
if (prevMaxStoreFileRefCount == newMaxStoreFileRefCount
|
||||||
|
&& prevRegionsRecoveryInterval == newRegionsRecoveryInterval) {
|
||||||
|
// no need to re-schedule the chore with updated config
|
||||||
|
// as there is no change in desired configs
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG.info("Config Reload for RegionsRecovery Chore. prevMaxStoreFileRefCount: {}," +
|
||||||
|
" newMaxStoreFileRefCount: {}, prevRegionsRecoveryInterval: {}, " +
|
||||||
|
"newRegionsRecoveryInterval: {}", prevMaxStoreFileRefCount, newMaxStoreFileRefCount,
|
||||||
|
prevRegionsRecoveryInterval, newRegionsRecoveryInterval);
|
||||||
|
|
||||||
|
RegionsRecoveryChore regionsRecoveryChore = new RegionsRecoveryChore(this.hMaster,
|
||||||
|
conf, this.hMaster);
|
||||||
|
ChoreService choreService = this.hMaster.getChoreService();
|
||||||
|
|
||||||
|
// Regions Reopen based on very high storeFileRefCount is considered enabled
|
||||||
|
// only if hbase.regions.recovery.store.file.ref.count has value > 0
|
||||||
|
|
||||||
|
synchronized (this) {
|
||||||
|
if (newMaxStoreFileRefCount > 0) {
|
||||||
|
// reschedule the chore
|
||||||
|
// provide mayInterruptIfRunning - false to take care of completion
|
||||||
|
// of in progress task if any
|
||||||
|
choreService.cancelChore(regionsRecoveryChore, false);
|
||||||
|
choreService.scheduleChore(regionsRecoveryChore);
|
||||||
|
} else {
|
||||||
|
choreService.cancelChore(regionsRecoveryChore, false);
|
||||||
|
}
|
||||||
|
this.prevMaxStoreFileRefCount = newMaxStoreFileRefCount;
|
||||||
|
this.prevRegionsRecoveryInterval = newRegionsRecoveryInterval;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private int getMaxStoreFileRefCount(Configuration configuration) {
|
||||||
|
return configuration.getInt(
|
||||||
|
HConstants.STORE_FILE_REF_COUNT_THRESHOLD,
|
||||||
|
HConstants.DEFAULT_STORE_FILE_REF_COUNT_THRESHOLD);
|
||||||
|
}
|
||||||
|
|
||||||
|
private int getRegionsRecoveryChoreInterval(Configuration configuration) {
|
||||||
|
return configuration.getInt(
|
||||||
|
HConstants.REGIONS_RECOVERY_INTERVAL,
|
||||||
|
HConstants.DEFAULT_REGIONS_RECOVERY_INTERVAL);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,147 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.hbase.master;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.hbase.HBaseClassTestRule;
|
||||||
|
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
||||||
|
import org.apache.hadoop.hbase.MiniHBaseCluster;
|
||||||
|
import org.apache.hadoop.hbase.StartMiniClusterOption;
|
||||||
|
import org.apache.hadoop.hbase.Stoppable;
|
||||||
|
import org.apache.hadoop.hbase.testclassification.MasterTests;
|
||||||
|
import org.apache.hadoop.hbase.testclassification.MediumTests;
|
||||||
|
import org.apache.zookeeper.KeeperException;
|
||||||
|
import org.junit.After;
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.ClassRule;
|
||||||
|
import org.junit.Test;
|
||||||
|
import org.junit.experimental.categories.Category;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test for Regions Recovery Config Manager
|
||||||
|
*/
|
||||||
|
@Category({MasterTests.class, MediumTests.class})
|
||||||
|
public class TestRegionsRecoveryConfigManager {
|
||||||
|
|
||||||
|
@ClassRule
|
||||||
|
public static final HBaseClassTestRule CLASS_RULE =
|
||||||
|
HBaseClassTestRule.forClass(TestRegionsRecoveryConfigManager.class);
|
||||||
|
|
||||||
|
private static final HBaseTestingUtility HBASE_TESTING_UTILITY = new HBaseTestingUtility();
|
||||||
|
|
||||||
|
private MiniHBaseCluster cluster;
|
||||||
|
|
||||||
|
private HMaster hMaster;
|
||||||
|
|
||||||
|
private RegionsRecoveryChore regionsRecoveryChore;
|
||||||
|
|
||||||
|
private RegionsRecoveryConfigManager regionsRecoveryConfigManager;
|
||||||
|
|
||||||
|
private Configuration conf;
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void setup() throws Exception {
|
||||||
|
conf = HBASE_TESTING_UTILITY.getConfiguration();
|
||||||
|
conf.unset("hbase.regions.recovery.store.file.ref.count");
|
||||||
|
conf.unset("hbase.master.regions.recovery.check.interval");
|
||||||
|
StartMiniClusterOption option = StartMiniClusterOption.builder()
|
||||||
|
.masterClass(TestHMaster.class)
|
||||||
|
.numRegionServers(1)
|
||||||
|
.numDataNodes(1).build();
|
||||||
|
HBASE_TESTING_UTILITY.startMiniCluster(option);
|
||||||
|
cluster = HBASE_TESTING_UTILITY.getMiniHBaseCluster();
|
||||||
|
}
|
||||||
|
|
||||||
|
@After
|
||||||
|
public void tearDown() throws Exception {
|
||||||
|
HBASE_TESTING_UTILITY.shutdownMiniCluster();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testChoreSchedule() throws Exception {
|
||||||
|
|
||||||
|
this.hMaster = cluster.getMaster();
|
||||||
|
|
||||||
|
Stoppable stoppable = new StoppableImplementation();
|
||||||
|
this.regionsRecoveryChore = new RegionsRecoveryChore(stoppable, conf, hMaster);
|
||||||
|
|
||||||
|
this.regionsRecoveryConfigManager = new RegionsRecoveryConfigManager(this.hMaster);
|
||||||
|
// not yet scheduled
|
||||||
|
Assert.assertFalse(hMaster.getChoreService().isChoreScheduled(regionsRecoveryChore));
|
||||||
|
|
||||||
|
this.regionsRecoveryConfigManager.onConfigurationChange(conf);
|
||||||
|
// not yet scheduled
|
||||||
|
Assert.assertFalse(hMaster.getChoreService().isChoreScheduled(regionsRecoveryChore));
|
||||||
|
|
||||||
|
conf.setInt("hbase.master.regions.recovery.check.interval", 10);
|
||||||
|
this.regionsRecoveryConfigManager.onConfigurationChange(conf);
|
||||||
|
// not yet scheduled - missing config: hbase.regions.recovery.store.file.ref.count
|
||||||
|
Assert.assertFalse(hMaster.getChoreService().isChoreScheduled(regionsRecoveryChore));
|
||||||
|
|
||||||
|
conf.setInt("hbase.regions.recovery.store.file.ref.count", 10);
|
||||||
|
this.regionsRecoveryConfigManager.onConfigurationChange(conf);
|
||||||
|
// chore scheduled
|
||||||
|
Assert.assertTrue(hMaster.getChoreService().isChoreScheduled(regionsRecoveryChore));
|
||||||
|
|
||||||
|
conf.setInt("hbase.regions.recovery.store.file.ref.count", 20);
|
||||||
|
this.regionsRecoveryConfigManager.onConfigurationChange(conf);
|
||||||
|
// chore re-scheduled
|
||||||
|
Assert.assertTrue(hMaster.getChoreService().isChoreScheduled(regionsRecoveryChore));
|
||||||
|
|
||||||
|
conf.setInt("hbase.regions.recovery.store.file.ref.count", 20);
|
||||||
|
this.regionsRecoveryConfigManager.onConfigurationChange(conf);
|
||||||
|
// chore scheduling untouched
|
||||||
|
Assert.assertTrue(hMaster.getChoreService().isChoreScheduled(regionsRecoveryChore));
|
||||||
|
|
||||||
|
conf.unset("hbase.regions.recovery.store.file.ref.count");
|
||||||
|
this.regionsRecoveryConfigManager.onConfigurationChange(conf);
|
||||||
|
// chore un-scheduled
|
||||||
|
Assert.assertFalse(hMaster.getChoreService().isChoreScheduled(regionsRecoveryChore));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make it public so that JVMClusterUtil can access it.
|
||||||
|
public static class TestHMaster extends HMaster {
|
||||||
|
public TestHMaster(Configuration conf) throws IOException, KeeperException {
|
||||||
|
super(conf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Simple helper class that just keeps track of whether or not its stopped.
|
||||||
|
*/
|
||||||
|
private static class StoppableImplementation implements Stoppable {
|
||||||
|
|
||||||
|
private boolean stop = false;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void stop(String why) {
|
||||||
|
this.stop = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isStopped() {
|
||||||
|
return this.stop;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -1150,6 +1150,8 @@ Here are those configurations:
|
||||||
| hbase.master.balancer.stochastic.moveCost
|
| hbase.master.balancer.stochastic.moveCost
|
||||||
| hbase.master.balancer.stochastic.maxMovePercent
|
| hbase.master.balancer.stochastic.maxMovePercent
|
||||||
| hbase.master.balancer.stochastic.tableSkewCost
|
| hbase.master.balancer.stochastic.tableSkewCost
|
||||||
|
| hbase.master.regions.recovery.check.interval
|
||||||
|
| hbase.regions.recovery.store.file.ref.count
|
||||||
|===
|
|===
|
||||||
|
|
||||||
ifdef::backend-docbook[]
|
ifdef::backend-docbook[]
|
||||||
|
|
Loading…
Reference in New Issue