HBASE-23307 Add running of ReplicationBarrierCleaner to hbck2 fixMeta invocation (#859)
Signed-off-by: Lijin Bin <binlijin@apache.org>
This commit is contained in:
parent
3b0c276aa3
commit
8e52339cb8
|
@ -3869,4 +3869,11 @@ public class HMaster extends HRegionServer implements MasterServices {
|
||||||
return cachedClusterId.getFromCacheOrFetch();
|
return cachedClusterId.getFromCacheOrFetch();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void runReplicationBarrierCleaner() {
|
||||||
|
ReplicationBarrierCleaner rbc = this.replicationBarrierCleaner;
|
||||||
|
if (rbc != null) {
|
||||||
|
rbc.chore();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/**
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
|
@ -122,7 +122,6 @@ public class HbckChore extends ScheduledChore {
|
||||||
LOG.warn("hbckChore is either disabled or is already running. Can't run the chore");
|
LOG.warn("hbckChore is either disabled or is already running. Can't run the chore");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
running = true;
|
|
||||||
regionInfoMap.clear();
|
regionInfoMap.clear();
|
||||||
disabledTableRegions.clear();
|
disabledTableRegions.clear();
|
||||||
splitParentRegions.clear();
|
splitParentRegions.clear();
|
||||||
|
@ -130,14 +129,19 @@ public class HbckChore extends ScheduledChore {
|
||||||
orphanRegionsOnFS.clear();
|
orphanRegionsOnFS.clear();
|
||||||
inconsistentRegions.clear();
|
inconsistentRegions.clear();
|
||||||
checkingStartTimestamp = EnvironmentEdgeManager.currentTime();
|
checkingStartTimestamp = EnvironmentEdgeManager.currentTime();
|
||||||
loadRegionsFromInMemoryState();
|
running = true;
|
||||||
loadRegionsFromRSReport();
|
|
||||||
try {
|
try {
|
||||||
loadRegionsFromFS();
|
loadRegionsFromInMemoryState();
|
||||||
} catch (IOException e) {
|
loadRegionsFromRSReport();
|
||||||
LOG.warn("Failed to load the regions from filesystem", e);
|
try {
|
||||||
|
loadRegionsFromFS();
|
||||||
|
} catch (IOException e) {
|
||||||
|
LOG.warn("Failed to load the regions from filesystem", e);
|
||||||
|
}
|
||||||
|
saveCheckResultToSnapshot();
|
||||||
|
} catch (Throwable t) {
|
||||||
|
LOG.warn("Unexpected", t);
|
||||||
}
|
}
|
||||||
saveCheckResultToSnapshot();
|
|
||||||
running = false;
|
running = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -262,6 +266,10 @@ public class HbckChore extends ScheduledChore {
|
||||||
List<Path> regionDirs = FSUtils.getRegionDirs(fs, tableDir);
|
List<Path> regionDirs = FSUtils.getRegionDirs(fs, tableDir);
|
||||||
for (Path regionDir : regionDirs) {
|
for (Path regionDir : regionDirs) {
|
||||||
String encodedRegionName = regionDir.getName();
|
String encodedRegionName = regionDir.getName();
|
||||||
|
if (encodedRegionName == null) {
|
||||||
|
LOG.warn("Failed get of encoded name from {}", regionDir);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
HbckRegionInfo hri = regionInfoMap.get(encodedRegionName);
|
HbckRegionInfo hri = regionInfoMap.get(encodedRegionName);
|
||||||
if (hri == null) {
|
if (hri == null) {
|
||||||
orphanRegionsOnFS.put(encodedRegionName, regionDir);
|
orphanRegionsOnFS.put(encodedRegionName, regionDir);
|
||||||
|
|
|
@ -537,4 +537,8 @@ public interface MasterServices extends Server {
|
||||||
*/
|
*/
|
||||||
List<RegionPlan> executeRegionPlansWithThrottling(List<RegionPlan> plans);
|
List<RegionPlan> executeRegionPlansWithThrottling(List<RegionPlan> plans);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Run the ReplicationBarrierChore.
|
||||||
|
*/
|
||||||
|
void runReplicationBarrierCleaner();
|
||||||
}
|
}
|
||||||
|
|
|
@ -77,6 +77,9 @@ class MetaFixer {
|
||||||
}
|
}
|
||||||
fixHoles(report);
|
fixHoles(report);
|
||||||
fixOverlaps(report);
|
fixOverlaps(report);
|
||||||
|
// Run the ReplicationBarrierCleaner here; it may clear out rep_barrier rows which
|
||||||
|
// can help cleaning up damaged hbase:meta.
|
||||||
|
this.masterServices.runReplicationBarrierCleaner();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -48,7 +48,6 @@ import org.slf4j.LoggerFactory;
|
||||||
*/
|
*/
|
||||||
@InterfaceAudience.Private
|
@InterfaceAudience.Private
|
||||||
public class ReplicationBarrierCleaner extends ScheduledChore {
|
public class ReplicationBarrierCleaner extends ScheduledChore {
|
||||||
|
|
||||||
private static final Logger LOG = LoggerFactory.getLogger(ReplicationBarrierCleaner.class);
|
private static final Logger LOG = LoggerFactory.getLogger(ReplicationBarrierCleaner.class);
|
||||||
|
|
||||||
private static final String REPLICATION_BARRIER_CLEANER_INTERVAL =
|
private static final String REPLICATION_BARRIER_CLEANER_INTERVAL =
|
||||||
|
@ -71,7 +70,9 @@ public class ReplicationBarrierCleaner extends ScheduledChore {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void chore() {
|
// Public so can be run out of MasterRpcServices. Synchronized so only one
|
||||||
|
// running instance at a time.
|
||||||
|
public synchronized void chore() {
|
||||||
long totalRows = 0;
|
long totalRows = 0;
|
||||||
long cleanedRows = 0;
|
long cleanedRows = 0;
|
||||||
long deletedRows = 0;
|
long deletedRows = 0;
|
||||||
|
@ -168,11 +169,9 @@ public class ReplicationBarrierCleaner extends ScheduledChore {
|
||||||
LOG.warn("Failed to clean up replication barrier", e);
|
LOG.warn("Failed to clean up replication barrier", e);
|
||||||
}
|
}
|
||||||
if (totalRows > 0) {
|
if (totalRows > 0) {
|
||||||
LOG.info(
|
LOG.info("TotalRows={}, cleanedRows={}, deletedRows={}, deletedBarriers={}, " +
|
||||||
"Cleanup replication barriers: totalRows {}, " +
|
"deletedLastPushedSeqIds={}", totalRows, cleanedRows, deletedRows,
|
||||||
"cleanedRows {}, deletedRows {}, deletedBarriers {}, deletedLastPushedSeqIds {}",
|
deletedBarriers, deletedLastPushedSeqIds);
|
||||||
totalRows, cleanedRows, deletedRows, deletedBarriers, deletedLastPushedSeqIds);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -490,4 +490,7 @@ public class MockNoopMasterServices implements MasterServices {
|
||||||
public AsyncClusterConnection getAsyncClusterConnection() {
|
public AsyncClusterConnection getAsyncClusterConnection() {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void runReplicationBarrierCleaner() {}
|
||||||
}
|
}
|
|
@ -108,7 +108,7 @@ public class TestClusterRestartFailover extends AbstractTestRestartCluster {
|
||||||
.filter(p -> (p instanceof ServerCrashProcedure) &&
|
.filter(p -> (p instanceof ServerCrashProcedure) &&
|
||||||
((ServerCrashProcedure) p).getServerName().equals(SERVER_FOR_TEST)).findAny();
|
((ServerCrashProcedure) p).getServerName().equals(SERVER_FOR_TEST)).findAny();
|
||||||
assertTrue("Should have one SCP for " + SERVER_FOR_TEST, procedure.isPresent());
|
assertTrue("Should have one SCP for " + SERVER_FOR_TEST, procedure.isPresent());
|
||||||
assertFalse("Submit the SCP for the same serverName " + SERVER_FOR_TEST + " which should fail",
|
assertTrue("Submit the SCP for the same serverName " + SERVER_FOR_TEST + " which should fail",
|
||||||
UTIL.getHBaseCluster().getMaster().getServerManager().expireServer(SERVER_FOR_TEST) ==
|
UTIL.getHBaseCluster().getMaster().getServerManager().expireServer(SERVER_FOR_TEST) ==
|
||||||
Procedure.NO_PROC_ID);
|
Procedure.NO_PROC_ID);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue