From f68b61a027bfc2df6a59f5a1d4dec060dbb06b18 Mon Sep 17 00:00:00 2001 From: "Tak Lon (Stephen) Wu" Date: Sun, 20 Nov 2022 14:44:39 -0800 Subject: [PATCH] HBASE-27495 Improve HFileLinkCleaner to validate back reference links ahead the next traverse (#4887) Signed-off-by: Peter Somogyi Signed-off-by: Wellington Ramos Chevreuil --- .../master/cleaner/HFileLinkCleaner.java | 19 ++- .../master/cleaner/TestHFileLinkCleaner.java | 132 ++++++++++++------ 2 files changed, 107 insertions(+), 44 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/HFileLinkCleaner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/HFileLinkCleaner.java index b550f39e2d7..3b137ebc1a9 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/HFileLinkCleaner.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/HFileLinkCleaner.java @@ -90,10 +90,23 @@ public class HFileLinkCleaner extends BaseHFileCleanerDelegate { } // HFile is deletable only if has no links - Path backRefDir = null; + Path backRefDir = HFileLink.getBackReferencesDir(parentDir, filePath.getName()); try { - backRefDir = HFileLink.getBackReferencesDir(parentDir, filePath.getName()); - return CommonFSUtils.listStatus(fs, backRefDir) == null; + FileStatus[] fileStatuses = CommonFSUtils.listStatus(fs, backRefDir); + // for empty reference directory, retain the logic to be deletable + if (fileStatuses == null) { + return true; + } + // reuse the found back reference files, check if the forward reference exists. + // with this optimization, the chore could save one round compute time if we're visiting + // the archive HFile earlier than the HFile Link + for (FileStatus fileStatus : fileStatuses) { + if (!isFileDeletable(fileStatus)) { + return false; + } + } + // all the found back reference files are clear, we can delete it. + return true; } catch (IOException e) { if (LOG.isDebugEnabled()) { LOG.debug("Couldn't get the references, not deleting file, just in case. filePath=" diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestHFileLinkCleaner.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestHFileLinkCleaner.java index d216e6f0004..318927b2f77 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestHFileLinkCleaner.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestHFileLinkCleaner.java @@ -39,7 +39,9 @@ import org.apache.hadoop.hbase.util.CommonFSUtils; import org.apache.hadoop.hbase.util.HFileArchiveUtil; import org.apache.hadoop.hbase.util.MockServer; import org.apache.hadoop.hbase.zookeeper.ZKWatcher; +import org.junit.After; import org.junit.AfterClass; +import org.junit.Before; import org.junit.BeforeClass; import org.junit.ClassRule; import org.junit.Rule; @@ -57,9 +59,28 @@ public class TestHFileLinkCleaner { public static final HBaseClassTestRule CLASS_RULE = HBaseClassTestRule.forClass(TestHFileLinkCleaner.class); + private Configuration conf; + private Path rootDir; + private FileSystem fs; + private TableName tableName; + private TableName tableLinkName; + private String hfileName; + private String familyName; + private RegionInfo hri; + private RegionInfo hriLink; + private Path archiveDir; + private Path archiveStoreDir; + private Path familyPath; + private Path hfilePath; + private Path familyLinkPath; + private String hfileLinkName; + private Path linkBackRefDir; + private Path linkBackRef; + private FileStatus[] backRefs; + private HFileCleaner cleaner; private final static HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(); - private static DirScanPool POOL; + private static final long TTL = 1000; @Rule public TestName name = new TestName(); @@ -74,49 +95,71 @@ public class TestHFileLinkCleaner { POOL.shutdownNow(); } - @Test - public void testHFileLinkCleaning() throws Exception { - Configuration conf = TEST_UTIL.getConfiguration(); + @Before + public void configureDirectoriesAndLinks() throws IOException { + conf = TEST_UTIL.getConfiguration(); CommonFSUtils.setRootDir(conf, TEST_UTIL.getDataTestDir()); conf.set(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS, HFileLinkCleaner.class.getName()); - Path rootDir = CommonFSUtils.getRootDir(conf); - FileSystem fs = FileSystem.get(conf); + rootDir = CommonFSUtils.getRootDir(conf); + fs = FileSystem.get(conf); - final TableName tableName = TableName.valueOf(name.getMethodName()); - final TableName tableLinkName = TableName.valueOf(name.getMethodName() + "-link"); - final String hfileName = "1234567890"; - final String familyName = "cf"; + tableName = TableName.valueOf(name.getMethodName()); + tableLinkName = TableName.valueOf(name.getMethodName() + "-link"); + hfileName = "1234567890"; + familyName = "cf"; - RegionInfo hri = RegionInfoBuilder.newBuilder(tableName).build(); - RegionInfo hriLink = RegionInfoBuilder.newBuilder(tableLinkName).build(); + hri = RegionInfoBuilder.newBuilder(tableName).build(); + hriLink = RegionInfoBuilder.newBuilder(tableLinkName).build(); - Path archiveDir = HFileArchiveUtil.getArchivePath(conf); - Path archiveStoreDir = + archiveDir = HFileArchiveUtil.getArchivePath(conf); + archiveStoreDir = HFileArchiveUtil.getStoreArchivePath(conf, tableName, hri.getEncodedName(), familyName); // Create hfile /hbase/table-link/region/cf/getEncodedName.HFILE(conf); - Path familyPath = getFamilyDirPath(archiveDir, tableName, hri.getEncodedName(), familyName); + familyPath = getFamilyDirPath(archiveDir, tableName, hri.getEncodedName(), familyName); fs.mkdirs(familyPath); - Path hfilePath = new Path(familyPath, hfileName); + hfilePath = new Path(familyPath, hfileName); fs.createNewFile(hfilePath); - // Create link to hfile - Path familyLinkPath = - getFamilyDirPath(rootDir, tableLinkName, hriLink.getEncodedName(), familyName); - fs.mkdirs(familyLinkPath); - HFileLink.create(conf, fs, familyLinkPath, hri, hfileName); - Path linkBackRefDir = HFileLink.getBackReferencesDir(archiveStoreDir, hfileName); - assertTrue(fs.exists(linkBackRefDir)); - FileStatus[] backRefs = fs.listStatus(linkBackRefDir); - assertEquals(1, backRefs.length); - Path linkBackRef = backRefs[0].getPath(); + createLink(true); // Initialize cleaner - final long ttl = 1000; - conf.setLong(TimeToLiveHFileCleaner.TTL_CONF_KEY, ttl); + conf.setLong(TimeToLiveHFileCleaner.TTL_CONF_KEY, TTL); Server server = new DummyServer(); - HFileCleaner cleaner = new HFileCleaner(1000, server, conf, fs, archiveDir, POOL); + cleaner = new HFileCleaner(1000, server, conf, fs, archiveDir, POOL); + } + private void createLink(boolean createBackReference) throws IOException { + // Create link to hfile + familyLinkPath = getFamilyDirPath(rootDir, tableLinkName, hriLink.getEncodedName(), familyName); + fs.mkdirs(familyLinkPath); + hfileLinkName = HFileLink.create(conf, fs, familyLinkPath, hri, hfileName, createBackReference); + linkBackRefDir = HFileLink.getBackReferencesDir(archiveStoreDir, hfileName); + assertTrue(fs.exists(linkBackRefDir)); + backRefs = fs.listStatus(linkBackRefDir); + assertEquals(1, backRefs.length); + linkBackRef = backRefs[0].getPath(); + } + + @After + public void cleanup() throws IOException, InterruptedException { + // HFile can be removed + Thread.sleep(TTL * 2); + cleaner.chore(); + assertFalse("HFile should be deleted", fs.exists(hfilePath)); + // Remove everything + for (int i = 0; i < 4; ++i) { + Thread.sleep(TTL * 2); + cleaner.chore(); + } + assertFalse("HFile should be deleted", + fs.exists(CommonFSUtils.getTableDir(archiveDir, tableName))); + assertFalse("Link should be deleted", + fs.exists(CommonFSUtils.getTableDir(archiveDir, tableLinkName))); + } + + @Test + public void testHFileLinkCleaning() throws Exception { // Link backref cannot be removed cleaner.chore(); assertTrue(fs.exists(linkBackRef)); @@ -127,21 +170,28 @@ public class TestHFileLinkCleaner { CommonFSUtils.getTableDir(archiveDir, tableLinkName)); cleaner.chore(); assertFalse("Link should be deleted", fs.exists(linkBackRef)); + } - // HFile can be removed - Thread.sleep(ttl * 2); + @Test + public void testHFileLinkByRemovingReference() throws Exception { + // Link backref cannot be removed cleaner.chore(); - assertFalse("HFile should be deleted", fs.exists(hfilePath)); + assertTrue(fs.exists(linkBackRef)); + assertTrue(fs.exists(hfilePath)); - // Remove everything - for (int i = 0; i < 4; ++i) { - Thread.sleep(ttl * 2); - cleaner.chore(); - } - assertFalse("HFile should be deleted", - fs.exists(CommonFSUtils.getTableDir(archiveDir, tableName))); - assertFalse("Link should be deleted", - fs.exists(CommonFSUtils.getTableDir(archiveDir, tableLinkName))); + // simulate after removing the reference in data directory, the Link backref can be removed + fs.delete(new Path(familyLinkPath, hfileLinkName), false); + cleaner.chore(); + assertFalse("Link should be deleted", fs.exists(linkBackRef)); + } + + @Test + public void testHFileLinkEmptyBackReferenceDirectory() throws Exception { + // simulate and remove the back reference + fs.delete(linkBackRef, false); + assertTrue("back reference directory still exists", fs.exists(linkBackRefDir)); + cleaner.chore(); + assertFalse("back reference directory should be deleted", fs.exists(linkBackRefDir)); } private static Path getFamilyDirPath(final Path rootDir, final TableName table,