HBASE-22807 HBCK Report showed wrong orphans regions on FileSystem (#461)

Signed-off-by: Sakthi <sakthi@apache.org>
This commit is contained in:
Guanghao Zhang 2019-08-07 19:17:26 -05:00 committed by GitHub
parent 1b168cd6de
commit 547cec4078
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 45 additions and 22 deletions

View File

@ -19,13 +19,13 @@ package org.apache.hadoop.hbase.master;
import java.io.IOException; import java.io.IOException;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.ScheduledChore; import org.apache.hadoop.hbase.ScheduledChore;
@ -40,8 +40,6 @@ import org.apache.yetus.audience.InterfaceStability;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
/** /**
* Used to do the hbck checking job at master side. * Used to do the hbck checking job at master side.
*/ */
@ -69,7 +67,7 @@ public class HbckChore extends ScheduledChore {
/** /**
* The regions have directory on FileSystem, but no region info in meta. * The regions have directory on FileSystem, but no region info in meta.
*/ */
private final List<String> orphanRegionsOnFS = new LinkedList<>(); private final Set<String> orphanRegionsOnFS = new HashSet<>();
/** /**
* The inconsistent regions. There are three case: * The inconsistent regions. There are three case:
* case 1. Master thought this region opened, but no regionserver reported it. * case 1. Master thought this region opened, but no regionserver reported it.
@ -83,7 +81,7 @@ public class HbckChore extends ScheduledChore {
* The "snapshot" is used to save the last round's HBCK checking report. * The "snapshot" is used to save the last round's HBCK checking report.
*/ */
private final Map<String, ServerName> orphanRegionsOnRSSnapshot = new HashMap<>(); private final Map<String, ServerName> orphanRegionsOnRSSnapshot = new HashMap<>();
private final List<String> orphanRegionsOnFSSnapshot = new LinkedList<>(); private final Set<String> orphanRegionsOnFSSnapshot = new HashSet<>();
private final Map<String, Pair<ServerName, List<ServerName>>> inconsistentRegionsSnapshot = private final Map<String, Pair<ServerName, List<ServerName>>> inconsistentRegionsSnapshot =
new HashMap<>(); new HashMap<>();
@ -153,9 +151,11 @@ public class HbckChore extends ScheduledChore {
regionState.getStamp()); regionState.getStamp());
regionInfoMap.put(regionInfo.getEncodedName(), new HbckRegionInfo(metaEntry)); regionInfoMap.put(regionInfo.getEncodedName(), new HbckRegionInfo(metaEntry));
} }
LOG.info("Loaded {} regions from in-memory state of AssignmentManager", regionStates.size());
} }
private void loadRegionsFromRSReport() { private void loadRegionsFromRSReport() {
int numRegions = 0;
Map<ServerName, Set<byte[]>> rsReports = master.getAssignmentManager().getRSReports(); Map<ServerName, Set<byte[]>> rsReports = master.getAssignmentManager().getRSReports();
for (Map.Entry<ServerName, Set<byte[]>> entry : rsReports.entrySet()) { for (Map.Entry<ServerName, Set<byte[]>> entry : rsReports.entrySet()) {
ServerName serverName = entry.getKey(); ServerName serverName = entry.getKey();
@ -168,7 +168,10 @@ public class HbckChore extends ScheduledChore {
} }
hri.addServer(hri.getMetaEntry(), serverName); hri.addServer(hri.getMetaEntry(), serverName);
} }
numRegions += entry.getValue().size();
} }
LOG.info("Loaded {} regions from {} regionservers' reports and found {} orphan regions",
numRegions, rsReports.size(), orphanRegionsOnFS.size());
for (Map.Entry<String, HbckRegionInfo> entry : regionInfoMap.entrySet()) { for (Map.Entry<String, HbckRegionInfo> entry : regionInfoMap.entrySet()) {
String encodedRegionName = entry.getKey(); String encodedRegionName = entry.getKey();
@ -191,27 +194,24 @@ public class HbckChore extends ScheduledChore {
Path rootDir = master.getMasterFileSystem().getRootDir(); Path rootDir = master.getMasterFileSystem().getRootDir();
FileSystem fs = master.getMasterFileSystem().getFileSystem(); FileSystem fs = master.getMasterFileSystem().getFileSystem();
// list all tables from HDFS int numRegions = 0;
List<FileStatus> tableDirs = Lists.newArrayList(); List<Path> tableDirs = FSUtils.getTableDirs(fs, rootDir);
List<Path> paths = FSUtils.getTableDirs(fs, rootDir); for (Path tableDir : tableDirs) {
for (Path path : paths) { List<Path> regionDirs = FSUtils.getRegionDirs(fs, tableDir);
tableDirs.add(fs.getFileStatus(path)); for (Path regionDir : regionDirs) {
} String encodedRegionName = regionDir.getName();
for (FileStatus tableDir : tableDirs) {
FileStatus[] regionDirs = fs.listStatus(tableDir.getPath());
for (FileStatus regionDir : regionDirs) {
String encodedRegionName = regionDir.getPath().getName();
HbckRegionInfo hri = regionInfoMap.get(encodedRegionName); HbckRegionInfo hri = regionInfoMap.get(encodedRegionName);
if (hri == null) { if (hri == null) {
orphanRegionsOnFS.add(encodedRegionName); orphanRegionsOnFS.add(encodedRegionName);
continue; continue;
} }
HbckRegionInfo.HdfsEntry hdfsEntry = HbckRegionInfo.HdfsEntry hdfsEntry = new HbckRegionInfo.HdfsEntry(regionDir);
new HbckRegionInfo.HdfsEntry(regionDir.getPath(), regionDir.getModificationTime());
hri.setHdfsEntry(hdfsEntry); hri.setHdfsEntry(hdfsEntry);
} }
numRegions += regionDirs.size();
} }
LOG.info("Loaded {} tables {} regions from filesyetem and found {} orphan regions",
tableDirs.size(), numRegions, orphanRegionsOnFS.size());
} }
/** /**
@ -237,7 +237,7 @@ public class HbckChore extends ScheduledChore {
/** /**
* @return the regions have directory on FileSystem, but no region info in meta. * @return the regions have directory on FileSystem, but no region info in meta.
*/ */
public List<String> getOrphanRegionsOnFS() { public Set<String> getOrphanRegionsOnFS() {
// Need synchronized here, as this "snapshot" may be changed after checking. // Need synchronized here, as this "snapshot" may be changed after checking.
rwLock.readLock().lock(); rwLock.readLock().lock();
try { try {

View File

@ -330,9 +330,8 @@ public class HbckRegionInfo implements KeyRange {
HdfsEntry() { HdfsEntry() {
} }
public HdfsEntry(Path regionDir, long regionDirModTime) { public HdfsEntry(Path regionDir) {
this.regionDir = regionDir; this.regionDir = regionDir;
this.regionDirModTime = regionDirModTime;
} }
} }

View File

@ -23,6 +23,7 @@
import="java.util.Date" import="java.util.Date"
import="java.util.List" import="java.util.List"
import="java.util.Map" import="java.util.Map"
import="java.util.Set"
import="java.util.stream.Collectors" import="java.util.stream.Collectors"
import="java.time.ZonedDateTime" import="java.time.ZonedDateTime"
import="java.time.format.DateTimeFormatter" import="java.time.format.DateTimeFormatter"
@ -41,7 +42,7 @@
HbckChore hbckChore = master.getHbckChore(); HbckChore hbckChore = master.getHbckChore();
Map<String, Pair<ServerName, List<ServerName>>> inconsistentRegions = null; Map<String, Pair<ServerName, List<ServerName>>> inconsistentRegions = null;
Map<String, ServerName> orphanRegionsOnRS = null; Map<String, ServerName> orphanRegionsOnRS = null;
List<String> orphanRegionsOnFS = null; Set<String> orphanRegionsOnFS = null;
long startTimestamp = 0; long startTimestamp = 0;
long endTimestamp = 0; long endTimestamp = 0;
if (hbckChore != null) { if (hbckChore != null) {

View File

@ -26,14 +26,18 @@ import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.concurrent.Future; import java.util.concurrent.Future;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.client.RegionInfoBuilder; import org.apache.hadoop.hbase.client.RegionInfoBuilder;
import org.apache.hadoop.hbase.master.HbckChore; import org.apache.hadoop.hbase.master.HbckChore;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.testclassification.MasterTests; import org.apache.hadoop.hbase.testclassification.MasterTests;
import org.apache.hadoop.hbase.testclassification.MediumTests; import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.hbase.util.Pair;
import org.junit.Before; import org.junit.Before;
import org.junit.ClassRule; import org.junit.ClassRule;
@ -141,4 +145,23 @@ public class TestHbckChore extends TestAssignmentManagerBase {
inconsistentRegions = hbckChore.getInconsistentRegions(); inconsistentRegions = hbckChore.getInconsistentRegions();
assertFalse(inconsistentRegions.containsKey(regionName)); assertFalse(inconsistentRegions.containsKey(regionName));
} }
@Test
public void testOrphanRegionsOnFS() throws Exception {
TableName tableName = TableName.valueOf("testOrphanRegionsOnFS");
RegionInfo regionInfo = RegionInfoBuilder.newBuilder(tableName).build();
Configuration conf = util.getConfiguration();
hbckChore.choreForTesting();
assertEquals(0, hbckChore.getOrphanRegionsOnFS().size());
HRegion.createRegionDir(conf, regionInfo, FSUtils.getRootDir(conf));
hbckChore.choreForTesting();
assertEquals(1, hbckChore.getOrphanRegionsOnFS().size());
assertTrue(hbckChore.getOrphanRegionsOnFS().contains(regionInfo.getEncodedName()));
FSUtils.deleteRegionDir(conf, new HRegionInfo(regionInfo));
hbckChore.choreForTesting();
assertEquals(0, hbckChore.getOrphanRegionsOnFS().size());
}
} }