HBASE-24273 HBCK's "Orphan Regions on FileSystem" reports regions with referenced HFiles (#1613)

Signed-off-by: stack <stack@apache.org>
This commit is contained in:
huaxiangsun 2020-05-05 11:10:02 -07:00 committed by GitHub
parent 3340c0024e
commit a240ba8214
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 48 additions and 10 deletions

View File

@ -28,6 +28,7 @@ import java.util.concurrent.locks.ReentrantReadWriteLock;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.MetaTableAccessor;
import org.apache.hadoop.hbase.ScheduledChore; import org.apache.hadoop.hbase.ScheduledChore;
import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.client.RegionInfo;
@ -134,7 +135,7 @@ public class HbckChore extends ScheduledChore {
loadRegionsFromInMemoryState(); loadRegionsFromInMemoryState();
loadRegionsFromRSReport(); loadRegionsFromRSReport();
try { try {
loadRegionsFromFS(); loadRegionsFromFS(scanForMergedParentRegions());
} catch (IOException e) { } catch (IOException e) {
LOG.warn("Failed to load the regions from filesystem", e); LOG.warn("Failed to load the regions from filesystem", e);
} }
@ -187,6 +188,31 @@ public class HbckChore extends ScheduledChore {
} }
} }
/**
* Scan hbase:meta to get set of merged parent regions, this is a very heavy scan.
*
* @return Return generated {@link HashSet}
*/
private HashSet<String> scanForMergedParentRegions() throws IOException {
HashSet<String> mergedParentRegions = new HashSet<>();
// Null tablename means scan all of meta.
MetaTableAccessor.scanMetaForTableRegions(this.master.getConnection(),
r -> {
List<RegionInfo> mergeParents = MetaTableAccessor.getMergeRegions(r.rawCells());
if (mergeParents != null) {
for (RegionInfo mergeRegion : mergeParents) {
if (mergeRegion != null) {
// This region is already being merged
mergedParentRegions.add(mergeRegion.getEncodedName());
}
}
}
return true;
},
null);
return mergedParentRegions;
}
private void loadRegionsFromInMemoryState() { private void loadRegionsFromInMemoryState() {
List<RegionState> regionStates = List<RegionState> regionStates =
master.getAssignmentManager().getRegionStates().getRegionStates(); master.getAssignmentManager().getRegionStates().getRegionStates();
@ -256,7 +282,7 @@ public class HbckChore extends ScheduledChore {
} }
} }
private void loadRegionsFromFS() throws IOException { private void loadRegionsFromFS(final HashSet<String> mergedParentRegions) throws IOException {
Path rootDir = master.getMasterFileSystem().getRootDir(); Path rootDir = master.getMasterFileSystem().getRootDir();
FileSystem fs = master.getMasterFileSystem().getFileSystem(); FileSystem fs = master.getMasterFileSystem().getFileSystem();
@ -271,12 +297,12 @@ public class HbckChore extends ScheduledChore {
continue; continue;
} }
HbckRegionInfo hri = regionInfoMap.get(encodedRegionName); HbckRegionInfo hri = regionInfoMap.get(encodedRegionName);
if (hri == null) { // If it is not in in-memory database and not a merged region,
// report it as an orphan region.
if (hri == null && !mergedParentRegions.contains(encodedRegionName)) {
orphanRegionsOnFS.put(encodedRegionName, regionDir); orphanRegionsOnFS.put(encodedRegionName, regionDir);
continue; continue;
} }
HbckRegionInfo.HdfsEntry hdfsEntry = new HbckRegionInfo.HdfsEntry(regionDir);
hri.setHdfsEntry(hdfsEntry);
} }
numRegions += regionDirs.size(); numRegions += regionDirs.size();
} }

View File

@ -17,8 +17,8 @@
*/ */
package org.apache.hadoop.hbase.client; package org.apache.hadoop.hbase.client;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
import org.mockito.Mockito; import org.mockito.Mockito;
/** /**
@ -39,12 +39,18 @@ public class HConnectionTestingUtility {
* probably not what you want. * probably not what you want.
* @param conf configuration * @param conf configuration
* @return ConnectionImplementation object for <code>conf</code> * @return ConnectionImplementation object for <code>conf</code>
* @throws ZooKeeperConnectionException
*/ */
public static Connection getMockedConnection(final Configuration conf) public static Connection getMockedConnection(final Configuration conf)
throws ZooKeeperConnectionException { throws IOException {
Connection connection = Mockito.mock(Connection.class); Connection connection = Mockito.mock(Connection.class);
Mockito.when(connection.getConfiguration()).thenReturn(conf); Mockito.when(connection.getConfiguration()).thenReturn(conf);
// Some test cases need Mock of getTable and getScanner
Table t = Mockito.mock(Table.class);
Mockito.when(connection.getTable(Mockito.any())).thenReturn(t);
ResultScanner rs = Mockito.mock(ResultScanner.class);
Mockito.when(t.getScanner((Scan)Mockito.any())).thenReturn(rs);
return connection; return connection;
} }
} }

View File

@ -31,6 +31,7 @@ import org.apache.hadoop.hbase.MetaTableAccessor;
import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.client.RegionInfoBuilder; import org.apache.hadoop.hbase.client.RegionInfoBuilder;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.master.assignment.GCRegionProcedure; import org.apache.hadoop.hbase.master.assignment.GCRegionProcedure;
import org.apache.hadoop.hbase.testclassification.LargeTests; import org.apache.hadoop.hbase.testclassification.LargeTests;
import org.apache.hadoop.hbase.testclassification.MasterTests; import org.apache.hadoop.hbase.testclassification.MasterTests;
@ -147,10 +148,12 @@ public class TestMetaFixer {
@Test @Test
public void testOverlap() throws Exception { public void testOverlap() throws Exception {
TableName tn = TableName.valueOf(this.name.getMethodName()); TableName tn = TableName.valueOf(this.name.getMethodName());
TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY); Table t = TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY);
TEST_UTIL.loadTable(t, HConstants.CATALOG_FAMILY);
List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
assertTrue(ris.size() > 5); assertTrue(ris.size() > 5);
MasterServices services = TEST_UTIL.getHBaseCluster().getMaster(); HMaster services = TEST_UTIL.getHBaseCluster().getMaster();
HbckChore hbckChore = services.getHbckChore();
services.getCatalogJanitor().scan(); services.getCatalogJanitor().scan();
CatalogJanitor.Report report = services.getCatalogJanitor().getLastReport(); CatalogJanitor.Report report = services.getCatalogJanitor().getLastReport();
assertTrue(report.isEmpty()); assertTrue(report.isEmpty());
@ -174,6 +177,9 @@ public class TestMetaFixer {
throw new RuntimeException(e); throw new RuntimeException(e);
} }
}); });
hbckChore.chore();
assertEquals(0, hbckChore.getOrphanRegionsOnFS().size());
} }
/** /**