HBASE-19478 Utilize multi-get to speed up WAL file checking in BackupLogCleaner (Toshihiro Suzuki)

This commit is contained in:
tedyu 2018-01-01 06:55:11 -08:00
parent 6c2aa4c9cc
commit cafd4e4ad7
3 changed files with 74 additions and 6 deletions

View File

@ -37,6 +37,7 @@ import java.util.stream.Collectors;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
@ -1090,6 +1091,47 @@ public final class BackupSystemTable implements Closeable {
}
}
/**
* Check if WAL file is eligible for deletion using multi-get
* @param files names of a file to check
* @return map of results
* (key: FileStatus object. value: true if the file is deletable, false otherwise)
* @throws IOException exception
*/
public Map<FileStatus, Boolean> areWALFilesDeletable(Iterable<FileStatus> files)
throws IOException {
final int BUF_SIZE = 100;
Map<FileStatus, Boolean> ret = new HashMap<>();
try (Table table = connection.getTable(tableName)) {
List<Get> getBuffer = new ArrayList<>();
List<FileStatus> fileStatuses = new ArrayList<>();
for (FileStatus file : files) {
String wal = file.getPath().toString();
Get get = createGetForCheckWALFile(wal);
getBuffer.add(get);
fileStatuses.add(file);
if (getBuffer.size() >= BUF_SIZE) {
Result[] results = table.get(getBuffer);
for (int i = 0; i < results.length; i++) {
ret.put(fileStatuses.get(i), !results[i].isEmpty());
}
getBuffer.clear();
fileStatuses.clear();
}
}
if (!getBuffer.isEmpty()) {
Result[] results = table.get(getBuffer);
for (int i = 0; i < results.length; i++) {
ret.put(fileStatuses.get(i), !results[i].isEmpty());
}
}
}
return ret;
}
/**
* Checks if we have at least one backup session in backup system table This API is used by
* BackupLogCleaner

View File

@ -96,14 +96,18 @@ public class BackupLogCleaner extends BaseLogCleanerDelegate {
return files;
}
for (FileStatus file : files) {
Map<FileStatus, Boolean> walFilesDeletableMap = table.areWALFilesDeletable(files);
for (Map.Entry<FileStatus, Boolean> entry: walFilesDeletableMap.entrySet()) {
FileStatus file = entry.getKey();
String wal = file.getPath().toString();
boolean logInSystemTable = table.isWALFileDeletable(wal);
boolean deletable = entry.getValue();
if (deletable) {
if (LOG.isDebugEnabled()) {
if (logInSystemTable) {
LOG.debug("Found log file in backup system table, deleting: " + wal);
}
list.add(file);
} else {
if (LOG.isDebugEnabled()) {
LOG.debug("Didn't find this log in backup system table, keeping: " + wal);
}
}

View File

@ -32,10 +32,13 @@ import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.MiniHBaseCluster;
import org.apache.hadoop.hbase.TableName;
@ -335,6 +338,25 @@ public class TestBackupSystemTable {
assertTrue(table.isWALFileDeletable(files.get(2)));
assertFalse(table.isWALFileDeletable(newFile));
// test for isWALFilesDeletable
List<FileStatus> fileStatues = new ArrayList<>();
for (String file : files) {
FileStatus fileStatus = new FileStatus();
fileStatus.setPath(new Path(file));
fileStatues.add(fileStatus);
}
FileStatus newFileStatus = new FileStatus();
newFileStatus.setPath(new Path(newFile));
fileStatues.add(newFileStatus);
Map<FileStatus, Boolean> walFilesDeletableMap = table.areWALFilesDeletable(fileStatues);
assertTrue(walFilesDeletableMap.get(fileStatues.get(0)));
assertTrue(walFilesDeletableMap.get(fileStatues.get(1)));
assertTrue(walFilesDeletableMap.get(fileStatues.get(2)));
assertFalse(walFilesDeletableMap.get(newFileStatus));
cleanBackupTable();
}