HBASE-23212 : Dynamically reload configs for Region Recovery chore (#773)
* HBASE-23212 : Dynamically reload configs for Region Recovery chore * remove redundant volatile
This commit is contained in:
parent
2f8860b1da
commit
10cc64a7d6
|
@ -1481,6 +1481,11 @@ public final class HConstants {
|
|||
// default -1 indicates there is no threshold on high storeRefCount
|
||||
public static final int DEFAULT_STORE_FILE_REF_COUNT_THRESHOLD = -1;
|
||||
|
||||
public static final String REGIONS_RECOVERY_INTERVAL =
|
||||
"hbase.master.regions.recovery.check.interval";
|
||||
|
||||
public static final int DEFAULT_REGIONS_RECOVERY_INTERVAL = 1200 * 1000; // Default 20 min
|
||||
|
||||
/**
|
||||
* Configurations for master executor services.
|
||||
*/
|
||||
|
|
|
@ -423,6 +423,8 @@ public class HMaster extends HRegionServer implements MasterServices {
|
|||
private MasterProcedureManagerHost mpmHost;
|
||||
|
||||
private RegionsRecoveryChore regionsRecoveryChore = null;
|
||||
|
||||
private RegionsRecoveryConfigManager regionsRecoveryConfigManager = null;
|
||||
// it is assigned after 'initialized' guard set to true, so should be volatile
|
||||
private volatile MasterQuotaManager quotaManager;
|
||||
private SpaceQuotaSnapshotNotifier spaceQuotaSnapshotNotifier;
|
||||
|
@ -1146,6 +1148,7 @@ public class HMaster extends HRegionServer implements MasterServices {
|
|||
configurationManager.registerObserver(this.cleanerPool);
|
||||
configurationManager.registerObserver(this.hfileCleaner);
|
||||
configurationManager.registerObserver(this.logCleaner);
|
||||
configurationManager.registerObserver(this.regionsRecoveryConfigManager);
|
||||
// Set master as 'initialized'.
|
||||
setInitialized(true);
|
||||
|
||||
|
@ -1480,6 +1483,8 @@ public class HMaster extends HRegionServer implements MasterServices {
|
|||
HConstants.STORE_FILE_REF_COUNT_THRESHOLD);
|
||||
}
|
||||
|
||||
this.regionsRecoveryConfigManager = new RegionsRecoveryConfigManager(this);
|
||||
|
||||
replicationBarrierCleaner = new ReplicationBarrierCleaner(conf, this, getConnection(),
|
||||
replicationPeerManager);
|
||||
getChoreService().scheduleChore(replicationBarrierCleaner);
|
||||
|
|
|
@ -52,11 +52,6 @@ public class RegionsRecoveryChore extends ScheduledChore {
|
|||
|
||||
private static final String REGIONS_RECOVERY_CHORE_NAME = "RegionsRecoveryChore";
|
||||
|
||||
private static final String REGIONS_RECOVERY_INTERVAL =
|
||||
"hbase.master.regions.recovery.check.interval";
|
||||
|
||||
private static final int DEFAULT_REGIONS_RECOVERY_INTERVAL = 1200 * 1000; // Default 20 min ?
|
||||
|
||||
private static final String ERROR_REOPEN_REIONS_MSG =
|
||||
"Error reopening regions with high storeRefCount. ";
|
||||
|
||||
|
@ -76,8 +71,8 @@ public class RegionsRecoveryChore extends ScheduledChore {
|
|||
RegionsRecoveryChore(final Stoppable stopper, final Configuration configuration,
|
||||
final HMaster hMaster) {
|
||||
|
||||
super(REGIONS_RECOVERY_CHORE_NAME, stopper, configuration.getInt(REGIONS_RECOVERY_INTERVAL,
|
||||
DEFAULT_REGIONS_RECOVERY_INTERVAL));
|
||||
super(REGIONS_RECOVERY_CHORE_NAME, stopper, configuration.getInt(
|
||||
HConstants.REGIONS_RECOVERY_INTERVAL, HConstants.DEFAULT_REGIONS_RECOVERY_INTERVAL));
|
||||
this.hMaster = hMaster;
|
||||
this.storeFileRefCountThreshold = configuration.getInt(
|
||||
HConstants.STORE_FILE_REF_COUNT_THRESHOLD,
|
||||
|
@ -171,4 +166,20 @@ public class RegionsRecoveryChore extends ScheduledChore {
|
|||
|
||||
}
|
||||
|
||||
// hashcode/equals implementation to ensure at-most one object of RegionsRecoveryChore
|
||||
// is scheduled at a time - RegionsRecoveryConfigManager
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
return o != null && getClass() == o.getClass();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return 31;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,100 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.master;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.hbase.ChoreService;
|
||||
import org.apache.hadoop.hbase.HConstants;
|
||||
import org.apache.hadoop.hbase.conf.ConfigurationObserver;
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* Config manager for RegionsRecovery Chore - Dynamically reload config and update chore
|
||||
* accordingly
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class RegionsRecoveryConfigManager implements ConfigurationObserver {
|
||||
|
||||
private static final Logger LOG = LoggerFactory.getLogger(RegionsRecoveryConfigManager.class);
|
||||
|
||||
private final HMaster hMaster;
|
||||
private int prevMaxStoreFileRefCount;
|
||||
private int prevRegionsRecoveryInterval;
|
||||
|
||||
RegionsRecoveryConfigManager(final HMaster hMaster) {
|
||||
this.hMaster = hMaster;
|
||||
Configuration conf = hMaster.getConfiguration();
|
||||
this.prevMaxStoreFileRefCount = getMaxStoreFileRefCount(conf);
|
||||
this.prevRegionsRecoveryInterval = getRegionsRecoveryChoreInterval(conf);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onConfigurationChange(Configuration conf) {
|
||||
final int newMaxStoreFileRefCount = getMaxStoreFileRefCount(conf);
|
||||
final int newRegionsRecoveryInterval = getRegionsRecoveryChoreInterval(conf);
|
||||
|
||||
if (prevMaxStoreFileRefCount == newMaxStoreFileRefCount
|
||||
&& prevRegionsRecoveryInterval == newRegionsRecoveryInterval) {
|
||||
// no need to re-schedule the chore with updated config
|
||||
// as there is no change in desired configs
|
||||
return;
|
||||
}
|
||||
|
||||
LOG.info("Config Reload for RegionsRecovery Chore. prevMaxStoreFileRefCount: {}," +
|
||||
" newMaxStoreFileRefCount: {}, prevRegionsRecoveryInterval: {}, " +
|
||||
"newRegionsRecoveryInterval: {}", prevMaxStoreFileRefCount, newMaxStoreFileRefCount,
|
||||
prevRegionsRecoveryInterval, newRegionsRecoveryInterval);
|
||||
|
||||
RegionsRecoveryChore regionsRecoveryChore = new RegionsRecoveryChore(this.hMaster,
|
||||
conf, this.hMaster);
|
||||
ChoreService choreService = this.hMaster.getChoreService();
|
||||
|
||||
// Regions Reopen based on very high storeFileRefCount is considered enabled
|
||||
// only if hbase.regions.recovery.store.file.ref.count has value > 0
|
||||
|
||||
synchronized (this) {
|
||||
if (newMaxStoreFileRefCount > 0) {
|
||||
// reschedule the chore
|
||||
// provide mayInterruptIfRunning - false to take care of completion
|
||||
// of in progress task if any
|
||||
choreService.cancelChore(regionsRecoveryChore, false);
|
||||
choreService.scheduleChore(regionsRecoveryChore);
|
||||
} else {
|
||||
choreService.cancelChore(regionsRecoveryChore, false);
|
||||
}
|
||||
this.prevMaxStoreFileRefCount = newMaxStoreFileRefCount;
|
||||
this.prevRegionsRecoveryInterval = newRegionsRecoveryInterval;
|
||||
}
|
||||
}
|
||||
|
||||
private int getMaxStoreFileRefCount(Configuration configuration) {
|
||||
return configuration.getInt(
|
||||
HConstants.STORE_FILE_REF_COUNT_THRESHOLD,
|
||||
HConstants.DEFAULT_STORE_FILE_REF_COUNT_THRESHOLD);
|
||||
}
|
||||
|
||||
private int getRegionsRecoveryChoreInterval(Configuration configuration) {
|
||||
return configuration.getInt(
|
||||
HConstants.REGIONS_RECOVERY_INTERVAL,
|
||||
HConstants.DEFAULT_REGIONS_RECOVERY_INTERVAL);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,147 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.master;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.hbase.HBaseClassTestRule;
|
||||
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
||||
import org.apache.hadoop.hbase.MiniHBaseCluster;
|
||||
import org.apache.hadoop.hbase.StartMiniClusterOption;
|
||||
import org.apache.hadoop.hbase.Stoppable;
|
||||
import org.apache.hadoop.hbase.testclassification.MasterTests;
|
||||
import org.apache.hadoop.hbase.testclassification.MediumTests;
|
||||
import org.apache.zookeeper.KeeperException;
|
||||
import org.junit.After;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Before;
|
||||
import org.junit.ClassRule;
|
||||
import org.junit.Test;
|
||||
import org.junit.experimental.categories.Category;
|
||||
|
||||
/**
|
||||
* Test for Regions Recovery Config Manager
|
||||
*/
|
||||
@Category({MasterTests.class, MediumTests.class})
|
||||
public class TestRegionsRecoveryConfigManager {
|
||||
|
||||
@ClassRule
|
||||
public static final HBaseClassTestRule CLASS_RULE =
|
||||
HBaseClassTestRule.forClass(TestRegionsRecoveryConfigManager.class);
|
||||
|
||||
private static final HBaseTestingUtility HBASE_TESTING_UTILITY = new HBaseTestingUtility();
|
||||
|
||||
private MiniHBaseCluster cluster;
|
||||
|
||||
private HMaster hMaster;
|
||||
|
||||
private RegionsRecoveryChore regionsRecoveryChore;
|
||||
|
||||
private RegionsRecoveryConfigManager regionsRecoveryConfigManager;
|
||||
|
||||
private Configuration conf;
|
||||
|
||||
@Before
|
||||
public void setup() throws Exception {
|
||||
conf = HBASE_TESTING_UTILITY.getConfiguration();
|
||||
conf.unset("hbase.regions.recovery.store.file.ref.count");
|
||||
conf.unset("hbase.master.regions.recovery.check.interval");
|
||||
StartMiniClusterOption option = StartMiniClusterOption.builder()
|
||||
.masterClass(TestHMaster.class)
|
||||
.numRegionServers(1)
|
||||
.numDataNodes(1).build();
|
||||
HBASE_TESTING_UTILITY.startMiniCluster(option);
|
||||
cluster = HBASE_TESTING_UTILITY.getMiniHBaseCluster();
|
||||
}
|
||||
|
||||
@After
|
||||
public void tearDown() throws Exception {
|
||||
HBASE_TESTING_UTILITY.shutdownMiniCluster();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testChoreSchedule() throws Exception {
|
||||
|
||||
this.hMaster = cluster.getMaster();
|
||||
|
||||
Stoppable stoppable = new StoppableImplementation();
|
||||
this.regionsRecoveryChore = new RegionsRecoveryChore(stoppable, conf, hMaster);
|
||||
|
||||
this.regionsRecoveryConfigManager = new RegionsRecoveryConfigManager(this.hMaster);
|
||||
// not yet scheduled
|
||||
Assert.assertFalse(hMaster.getChoreService().isChoreScheduled(regionsRecoveryChore));
|
||||
|
||||
this.regionsRecoveryConfigManager.onConfigurationChange(conf);
|
||||
// not yet scheduled
|
||||
Assert.assertFalse(hMaster.getChoreService().isChoreScheduled(regionsRecoveryChore));
|
||||
|
||||
conf.setInt("hbase.master.regions.recovery.check.interval", 10);
|
||||
this.regionsRecoveryConfigManager.onConfigurationChange(conf);
|
||||
// not yet scheduled - missing config: hbase.regions.recovery.store.file.ref.count
|
||||
Assert.assertFalse(hMaster.getChoreService().isChoreScheduled(regionsRecoveryChore));
|
||||
|
||||
conf.setInt("hbase.regions.recovery.store.file.ref.count", 10);
|
||||
this.regionsRecoveryConfigManager.onConfigurationChange(conf);
|
||||
// chore scheduled
|
||||
Assert.assertTrue(hMaster.getChoreService().isChoreScheduled(regionsRecoveryChore));
|
||||
|
||||
conf.setInt("hbase.regions.recovery.store.file.ref.count", 20);
|
||||
this.regionsRecoveryConfigManager.onConfigurationChange(conf);
|
||||
// chore re-scheduled
|
||||
Assert.assertTrue(hMaster.getChoreService().isChoreScheduled(regionsRecoveryChore));
|
||||
|
||||
conf.setInt("hbase.regions.recovery.store.file.ref.count", 20);
|
||||
this.regionsRecoveryConfigManager.onConfigurationChange(conf);
|
||||
// chore scheduling untouched
|
||||
Assert.assertTrue(hMaster.getChoreService().isChoreScheduled(regionsRecoveryChore));
|
||||
|
||||
conf.unset("hbase.regions.recovery.store.file.ref.count");
|
||||
this.regionsRecoveryConfigManager.onConfigurationChange(conf);
|
||||
// chore un-scheduled
|
||||
Assert.assertFalse(hMaster.getChoreService().isChoreScheduled(regionsRecoveryChore));
|
||||
}
|
||||
|
||||
// Make it public so that JVMClusterUtil can access it.
|
||||
public static class TestHMaster extends HMaster {
|
||||
public TestHMaster(Configuration conf) throws IOException, KeeperException {
|
||||
super(conf);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple helper class that just keeps track of whether or not its stopped.
|
||||
*/
|
||||
private static class StoppableImplementation implements Stoppable {
|
||||
|
||||
private boolean stop = false;
|
||||
|
||||
@Override
|
||||
public void stop(String why) {
|
||||
this.stop = true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isStopped() {
|
||||
return this.stop;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -1150,6 +1150,8 @@ Here are those configurations:
|
|||
| hbase.master.balancer.stochastic.moveCost
|
||||
| hbase.master.balancer.stochastic.maxMovePercent
|
||||
| hbase.master.balancer.stochastic.tableSkewCost
|
||||
| hbase.master.regions.recovery.check.interval
|
||||
| hbase.regions.recovery.store.file.ref.count
|
||||
|===
|
||||
|
||||
ifdef::backend-docbook[]
|
||||
|
|
Loading…
Reference in New Issue