HBASE-27495 Improve HFileLinkCleaner to validate back reference links ahead the next traverse (#4887)

Signed-off-by: Peter Somogyi <psomogyi@apache.org>
Signed-off-by: Wellington Ramos Chevreuil <wchevreuil@apache.org>
This commit is contained in:
Tak Lon (Stephen) Wu 2022-11-20 14:44:39 -08:00 committed by GitHub
parent 7b0ac6451d
commit f68b61a027
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 107 additions and 44 deletions

View File

@ -90,10 +90,23 @@ public class HFileLinkCleaner extends BaseHFileCleanerDelegate {
} }
// HFile is deletable only if has no links // HFile is deletable only if has no links
Path backRefDir = null; Path backRefDir = HFileLink.getBackReferencesDir(parentDir, filePath.getName());
try { try {
backRefDir = HFileLink.getBackReferencesDir(parentDir, filePath.getName()); FileStatus[] fileStatuses = CommonFSUtils.listStatus(fs, backRefDir);
return CommonFSUtils.listStatus(fs, backRefDir) == null; // for empty reference directory, retain the logic to be deletable
if (fileStatuses == null) {
return true;
}
// reuse the found back reference files, check if the forward reference exists.
// with this optimization, the chore could save one round compute time if we're visiting
// the archive HFile earlier than the HFile Link
for (FileStatus fileStatus : fileStatuses) {
if (!isFileDeletable(fileStatus)) {
return false;
}
}
// all the found back reference files are clear, we can delete it.
return true;
} catch (IOException e) { } catch (IOException e) {
if (LOG.isDebugEnabled()) { if (LOG.isDebugEnabled()) {
LOG.debug("Couldn't get the references, not deleting file, just in case. filePath=" LOG.debug("Couldn't get the references, not deleting file, just in case. filePath="

View File

@ -39,7 +39,9 @@ import org.apache.hadoop.hbase.util.CommonFSUtils;
import org.apache.hadoop.hbase.util.HFileArchiveUtil; import org.apache.hadoop.hbase.util.HFileArchiveUtil;
import org.apache.hadoop.hbase.util.MockServer; import org.apache.hadoop.hbase.util.MockServer;
import org.apache.hadoop.hbase.zookeeper.ZKWatcher; import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
import org.junit.After;
import org.junit.AfterClass; import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass; import org.junit.BeforeClass;
import org.junit.ClassRule; import org.junit.ClassRule;
import org.junit.Rule; import org.junit.Rule;
@ -57,9 +59,28 @@ public class TestHFileLinkCleaner {
public static final HBaseClassTestRule CLASS_RULE = public static final HBaseClassTestRule CLASS_RULE =
HBaseClassTestRule.forClass(TestHFileLinkCleaner.class); HBaseClassTestRule.forClass(TestHFileLinkCleaner.class);
private Configuration conf;
private Path rootDir;
private FileSystem fs;
private TableName tableName;
private TableName tableLinkName;
private String hfileName;
private String familyName;
private RegionInfo hri;
private RegionInfo hriLink;
private Path archiveDir;
private Path archiveStoreDir;
private Path familyPath;
private Path hfilePath;
private Path familyLinkPath;
private String hfileLinkName;
private Path linkBackRefDir;
private Path linkBackRef;
private FileStatus[] backRefs;
private HFileCleaner cleaner;
private final static HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(); private final static HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
private static DirScanPool POOL; private static DirScanPool POOL;
private static final long TTL = 1000;
@Rule @Rule
public TestName name = new TestName(); public TestName name = new TestName();
@ -74,49 +95,71 @@ public class TestHFileLinkCleaner {
POOL.shutdownNow(); POOL.shutdownNow();
} }
@Test @Before
public void testHFileLinkCleaning() throws Exception { public void configureDirectoriesAndLinks() throws IOException {
Configuration conf = TEST_UTIL.getConfiguration(); conf = TEST_UTIL.getConfiguration();
CommonFSUtils.setRootDir(conf, TEST_UTIL.getDataTestDir()); CommonFSUtils.setRootDir(conf, TEST_UTIL.getDataTestDir());
conf.set(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS, HFileLinkCleaner.class.getName()); conf.set(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS, HFileLinkCleaner.class.getName());
Path rootDir = CommonFSUtils.getRootDir(conf); rootDir = CommonFSUtils.getRootDir(conf);
FileSystem fs = FileSystem.get(conf); fs = FileSystem.get(conf);
final TableName tableName = TableName.valueOf(name.getMethodName()); tableName = TableName.valueOf(name.getMethodName());
final TableName tableLinkName = TableName.valueOf(name.getMethodName() + "-link"); tableLinkName = TableName.valueOf(name.getMethodName() + "-link");
final String hfileName = "1234567890"; hfileName = "1234567890";
final String familyName = "cf"; familyName = "cf";
RegionInfo hri = RegionInfoBuilder.newBuilder(tableName).build(); hri = RegionInfoBuilder.newBuilder(tableName).build();
RegionInfo hriLink = RegionInfoBuilder.newBuilder(tableLinkName).build(); hriLink = RegionInfoBuilder.newBuilder(tableLinkName).build();
Path archiveDir = HFileArchiveUtil.getArchivePath(conf); archiveDir = HFileArchiveUtil.getArchivePath(conf);
Path archiveStoreDir = archiveStoreDir =
HFileArchiveUtil.getStoreArchivePath(conf, tableName, hri.getEncodedName(), familyName); HFileArchiveUtil.getStoreArchivePath(conf, tableName, hri.getEncodedName(), familyName);
// Create hfile /hbase/table-link/region/cf/getEncodedName.HFILE(conf); // Create hfile /hbase/table-link/region/cf/getEncodedName.HFILE(conf);
Path familyPath = getFamilyDirPath(archiveDir, tableName, hri.getEncodedName(), familyName); familyPath = getFamilyDirPath(archiveDir, tableName, hri.getEncodedName(), familyName);
fs.mkdirs(familyPath); fs.mkdirs(familyPath);
Path hfilePath = new Path(familyPath, hfileName); hfilePath = new Path(familyPath, hfileName);
fs.createNewFile(hfilePath); fs.createNewFile(hfilePath);
// Create link to hfile createLink(true);
Path familyLinkPath =
getFamilyDirPath(rootDir, tableLinkName, hriLink.getEncodedName(), familyName);
fs.mkdirs(familyLinkPath);
HFileLink.create(conf, fs, familyLinkPath, hri, hfileName);
Path linkBackRefDir = HFileLink.getBackReferencesDir(archiveStoreDir, hfileName);
assertTrue(fs.exists(linkBackRefDir));
FileStatus[] backRefs = fs.listStatus(linkBackRefDir);
assertEquals(1, backRefs.length);
Path linkBackRef = backRefs[0].getPath();
// Initialize cleaner // Initialize cleaner
final long ttl = 1000; conf.setLong(TimeToLiveHFileCleaner.TTL_CONF_KEY, TTL);
conf.setLong(TimeToLiveHFileCleaner.TTL_CONF_KEY, ttl);
Server server = new DummyServer(); Server server = new DummyServer();
HFileCleaner cleaner = new HFileCleaner(1000, server, conf, fs, archiveDir, POOL); cleaner = new HFileCleaner(1000, server, conf, fs, archiveDir, POOL);
}
private void createLink(boolean createBackReference) throws IOException {
// Create link to hfile
familyLinkPath = getFamilyDirPath(rootDir, tableLinkName, hriLink.getEncodedName(), familyName);
fs.mkdirs(familyLinkPath);
hfileLinkName = HFileLink.create(conf, fs, familyLinkPath, hri, hfileName, createBackReference);
linkBackRefDir = HFileLink.getBackReferencesDir(archiveStoreDir, hfileName);
assertTrue(fs.exists(linkBackRefDir));
backRefs = fs.listStatus(linkBackRefDir);
assertEquals(1, backRefs.length);
linkBackRef = backRefs[0].getPath();
}
@After
public void cleanup() throws IOException, InterruptedException {
// HFile can be removed
Thread.sleep(TTL * 2);
cleaner.chore();
assertFalse("HFile should be deleted", fs.exists(hfilePath));
// Remove everything
for (int i = 0; i < 4; ++i) {
Thread.sleep(TTL * 2);
cleaner.chore();
}
assertFalse("HFile should be deleted",
fs.exists(CommonFSUtils.getTableDir(archiveDir, tableName)));
assertFalse("Link should be deleted",
fs.exists(CommonFSUtils.getTableDir(archiveDir, tableLinkName)));
}
@Test
public void testHFileLinkCleaning() throws Exception {
// Link backref cannot be removed // Link backref cannot be removed
cleaner.chore(); cleaner.chore();
assertTrue(fs.exists(linkBackRef)); assertTrue(fs.exists(linkBackRef));
@ -127,21 +170,28 @@ public class TestHFileLinkCleaner {
CommonFSUtils.getTableDir(archiveDir, tableLinkName)); CommonFSUtils.getTableDir(archiveDir, tableLinkName));
cleaner.chore(); cleaner.chore();
assertFalse("Link should be deleted", fs.exists(linkBackRef)); assertFalse("Link should be deleted", fs.exists(linkBackRef));
// HFile can be removed
Thread.sleep(ttl * 2);
cleaner.chore();
assertFalse("HFile should be deleted", fs.exists(hfilePath));
// Remove everything
for (int i = 0; i < 4; ++i) {
Thread.sleep(ttl * 2);
cleaner.chore();
} }
assertFalse("HFile should be deleted",
fs.exists(CommonFSUtils.getTableDir(archiveDir, tableName))); @Test
assertFalse("Link should be deleted", public void testHFileLinkByRemovingReference() throws Exception {
fs.exists(CommonFSUtils.getTableDir(archiveDir, tableLinkName))); // Link backref cannot be removed
cleaner.chore();
assertTrue(fs.exists(linkBackRef));
assertTrue(fs.exists(hfilePath));
// simulate after removing the reference in data directory, the Link backref can be removed
fs.delete(new Path(familyLinkPath, hfileLinkName), false);
cleaner.chore();
assertFalse("Link should be deleted", fs.exists(linkBackRef));
}
@Test
public void testHFileLinkEmptyBackReferenceDirectory() throws Exception {
// simulate and remove the back reference
fs.delete(linkBackRef, false);
assertTrue("back reference directory still exists", fs.exists(linkBackRefDir));
cleaner.chore();
assertFalse("back reference directory should be deleted", fs.exists(linkBackRefDir));
} }
private static Path getFamilyDirPath(final Path rootDir, final TableName table, private static Path getFamilyDirPath(final Path rootDir, final TableName table,