HBASE-20404 Fixes to CleanChore correctness and operability.

* Make CleanerChore less chatty: move WARN message to DEBUG when we expect non-empty dirs
* Make CleanerChore less chatty: move IOE we'll retry to INFO
* CleanerChore should treat IOE for FileStatus as a failure
* Add tests asserting assumptions in above

Signed-off-by: Reid Chan <reidddchan@outlook.com>
Signed-off-by: Mike Drob <mdrob@apache.org>
This commit is contained in:
Sean Busbey 2018-04-13 00:57:35 -05:00
parent d951675df5
commit 1c8d9d788f
3 changed files with 83 additions and 8 deletions

View File

@ -32,6 +32,7 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathIsNotEmptyDirectoryException;
import org.apache.hadoop.hbase.ScheduledChore;
import org.apache.hadoop.hbase.Stoppable;
import org.apache.hadoop.hbase.conf.ConfigurationObserver;
@ -446,6 +447,10 @@ public abstract class CleanerChore<T extends FileCleanerDelegate> extends Schedu
T act() throws IOException;
}
/**
* Attemps to clean up a directory, its subdirectories, and files.
* Return value is true if everything was deleted. false on partial / total failures.
*/
private class CleanerTask extends RecursiveTask<Boolean> {
private final Path dir;
private final boolean root;
@ -465,11 +470,13 @@ public abstract class CleanerChore<T extends FileCleanerDelegate> extends Schedu
List<FileStatus> subDirs;
List<FileStatus> files;
try {
// if dir doesn't exist, we'll get null back for both of these
// which will fall through to succeeding.
subDirs = getFilteredStatus(status -> status.isDirectory());
files = getFilteredStatus(status -> status.isFile());
} catch (IOException ioe) {
LOG.warn(dir + " doesn't exist, just skip it. ", ioe);
return true;
LOG.warn("failed to get FileStatus for contents of '{}'", dir, ioe);
return false;
}
boolean nullSubDirs = subDirs == null;
@ -507,8 +514,8 @@ public abstract class CleanerChore<T extends FileCleanerDelegate> extends Schedu
* Pay attention that FSUtils #listStatusWithStatusFilter would return null,
* even though status is empty but not null.
* @param function a filter function
* @return filtered FileStatus
* @throws IOException if there's no such a directory
* @return filtered FileStatus or null if dir doesn't exist
* @throws IOException if there's an error other than dir not existing
*/
private List<FileStatus> getFilteredStatus(Predicate<FileStatus> function) throws IOException {
return FSUtils.listStatusWithStatusFilter(fs, dir, status -> function.test(status));
@ -525,8 +532,18 @@ public abstract class CleanerChore<T extends FileCleanerDelegate> extends Schedu
try {
LOG.trace("Start deleting {} under {}", type, dir);
deleted = deletion.act();
} catch (PathIsNotEmptyDirectoryException exception) {
// N.B. HDFS throws this exception when we try to delete a non-empty directory, but
// LocalFileSystem throws a bare IOException. So some test code will get the verbose
// message below.
LOG.debug("Couldn't delete '{}' yet because it isn't empty. Probably transient. " +
"exception details at TRACE.", dir);
LOG.trace("Couldn't delete '{}' yet because it isn't empty w/exception.", dir, exception);
deleted = false;
} catch (IOException ioe) {
LOG.warn("Could not delete {} under {}; {}", type, dir, ioe);
LOG.info("Could not delete {} under {}. might be transient; we'll retry. if it keeps " +
"happening, use following exception when asking on mailing list.",
type, dir, ioe);
deleted = false;
}
LOG.trace("Finish deleting {} under {}, deleted=", type, dir, deleted);

View File

@ -23,10 +23,12 @@ import static org.junit.Assert.assertTrue;
import java.io.IOException;
import java.util.Random;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FilterFileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.HBaseTestingUtility;
@ -89,9 +91,55 @@ public class TestCleanerChore {
// run the chore
chore.chore();
// verify all the files got deleted
assertTrue("File didn't get deleted", fs.exists(file));
assertTrue("Empty directory didn't get deleted", fs.exists(parent));
// verify all the files were preserved
assertTrue("File shouldn't have been deleted", fs.exists(file));
assertTrue("directory shouldn't have been deleted", fs.exists(parent));
}
@Test
public void retriesIOExceptionInStatus() throws Exception {
Stoppable stop = new StoppableImplementation();
Configuration conf = UTIL.getConfiguration();
Path testDir = UTIL.getDataTestDir();
FileSystem fs = UTIL.getTestFileSystem();
String confKey = "hbase.test.cleaner.delegates";
Path child = new Path(testDir, "child");
Path file = new Path(child, "file");
fs.mkdirs(child);
fs.create(file).close();
assertTrue("test file didn't get created.", fs.exists(file));
final AtomicBoolean fails = new AtomicBoolean(true);
FilterFileSystem filtered = new FilterFileSystem(fs) {
public FileStatus[] listStatus(Path f) throws IOException {
if (fails.get()) {
throw new IOException("whomp whomp.");
}
return fs.listStatus(f);
}
};
AllValidPaths chore = new AllValidPaths("test-retry-ioe", stop, conf, filtered, testDir, confKey);
// trouble talking to the filesystem
Boolean result = chore.runCleaner();
// verify that it couldn't clean the files.
assertTrue("test rig failed to inject failure.", fs.exists(file));
assertTrue("test rig failed to inject failure.", fs.exists(child));
// and verify that it accurately reported the failure.
assertFalse("chore should report that it failed.", result);
// filesystem is back
fails.set(false);
result = chore.runCleaner();
// verify everything is gone.
assertFalse("file should have been destroyed.", fs.exists(file));
assertFalse("directory should have been destroyed.", fs.exists(child));
// and verify that it accurately reported success.
assertTrue("chore should claim it succeeded.", result);
}
@Test

View File

@ -289,6 +289,16 @@ public class TestFSUtils {
}
}
@Test
public void testFilteredStatusDoesNotThrowOnNotFound() throws Exception {
MiniDFSCluster cluster = htu.startMiniDFSCluster(1);
try {
assertNull(FSUtils.listStatusWithStatusFilter(cluster.getFileSystem(), new Path("definitely/doesn't/exist"), null));
} finally {
cluster.shutdown();
}
}
@Test
public void testRenameAndSetModifyTime() throws Exception {