From 1c8d9d788f3e39bf9406d11e590fd64781cd73de Mon Sep 17 00:00:00 2001 From: Sean Busbey Date: Fri, 13 Apr 2018 00:57:35 -0500 Subject: [PATCH] HBASE-20404 Fixes to CleanChore correctness and operability. * Make CleanerChore less chatty: move WARN message to DEBUG when we expect non-empty dirs * Make CleanerChore less chatty: move IOE we'll retry to INFO * CleanerChore should treat IOE for FileStatus as a failure * Add tests asserting assumptions in above Signed-off-by: Reid Chan Signed-off-by: Mike Drob --- .../hbase/master/cleaner/CleanerChore.java | 27 ++++++++-- .../master/cleaner/TestCleanerChore.java | 54 +++++++++++++++++-- .../apache/hadoop/hbase/util/TestFSUtils.java | 10 ++++ 3 files changed, 83 insertions(+), 8 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java index 396fbafec90..dc13645e6cd 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java @@ -32,6 +32,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathIsNotEmptyDirectoryException; import org.apache.hadoop.hbase.ScheduledChore; import org.apache.hadoop.hbase.Stoppable; import org.apache.hadoop.hbase.conf.ConfigurationObserver; @@ -446,6 +447,10 @@ public abstract class CleanerChore extends Schedu T act() throws IOException; } + /** + * Attemps to clean up a directory, its subdirectories, and files. + * Return value is true if everything was deleted. false on partial / total failures. + */ private class CleanerTask extends RecursiveTask { private final Path dir; private final boolean root; @@ -465,11 +470,13 @@ public abstract class CleanerChore extends Schedu List subDirs; List files; try { + // if dir doesn't exist, we'll get null back for both of these + // which will fall through to succeeding. subDirs = getFilteredStatus(status -> status.isDirectory()); files = getFilteredStatus(status -> status.isFile()); } catch (IOException ioe) { - LOG.warn(dir + " doesn't exist, just skip it. ", ioe); - return true; + LOG.warn("failed to get FileStatus for contents of '{}'", dir, ioe); + return false; } boolean nullSubDirs = subDirs == null; @@ -507,8 +514,8 @@ public abstract class CleanerChore extends Schedu * Pay attention that FSUtils #listStatusWithStatusFilter would return null, * even though status is empty but not null. * @param function a filter function - * @return filtered FileStatus - * @throws IOException if there's no such a directory + * @return filtered FileStatus or null if dir doesn't exist + * @throws IOException if there's an error other than dir not existing */ private List getFilteredStatus(Predicate function) throws IOException { return FSUtils.listStatusWithStatusFilter(fs, dir, status -> function.test(status)); @@ -525,8 +532,18 @@ public abstract class CleanerChore extends Schedu try { LOG.trace("Start deleting {} under {}", type, dir); deleted = deletion.act(); + } catch (PathIsNotEmptyDirectoryException exception) { + // N.B. HDFS throws this exception when we try to delete a non-empty directory, but + // LocalFileSystem throws a bare IOException. So some test code will get the verbose + // message below. + LOG.debug("Couldn't delete '{}' yet because it isn't empty. Probably transient. " + + "exception details at TRACE.", dir); + LOG.trace("Couldn't delete '{}' yet because it isn't empty w/exception.", dir, exception); + deleted = false; } catch (IOException ioe) { - LOG.warn("Could not delete {} under {}; {}", type, dir, ioe); + LOG.info("Could not delete {} under {}. might be transient; we'll retry. if it keeps " + + "happening, use following exception when asking on mailing list.", + type, dir, ioe); deleted = false; } LOG.trace("Finish deleting {} under {}, deleted=", type, dir, deleted); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestCleanerChore.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestCleanerChore.java index 5736663acf0..c977f98e942 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestCleanerChore.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestCleanerChore.java @@ -23,10 +23,12 @@ import static org.junit.Assert.assertTrue; import java.io.IOException; import java.util.Random; +import java.util.concurrent.atomic.AtomicBoolean; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FilterFileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.HBaseTestingUtility; @@ -89,9 +91,55 @@ public class TestCleanerChore { // run the chore chore.chore(); - // verify all the files got deleted - assertTrue("File didn't get deleted", fs.exists(file)); - assertTrue("Empty directory didn't get deleted", fs.exists(parent)); + // verify all the files were preserved + assertTrue("File shouldn't have been deleted", fs.exists(file)); + assertTrue("directory shouldn't have been deleted", fs.exists(parent)); + } + + @Test + public void retriesIOExceptionInStatus() throws Exception { + Stoppable stop = new StoppableImplementation(); + Configuration conf = UTIL.getConfiguration(); + Path testDir = UTIL.getDataTestDir(); + FileSystem fs = UTIL.getTestFileSystem(); + String confKey = "hbase.test.cleaner.delegates"; + + Path child = new Path(testDir, "child"); + Path file = new Path(child, "file"); + fs.mkdirs(child); + fs.create(file).close(); + assertTrue("test file didn't get created.", fs.exists(file)); + final AtomicBoolean fails = new AtomicBoolean(true); + + FilterFileSystem filtered = new FilterFileSystem(fs) { + public FileStatus[] listStatus(Path f) throws IOException { + if (fails.get()) { + throw new IOException("whomp whomp."); + } + return fs.listStatus(f); + } + }; + + AllValidPaths chore = new AllValidPaths("test-retry-ioe", stop, conf, filtered, testDir, confKey); + + // trouble talking to the filesystem + Boolean result = chore.runCleaner(); + + // verify that it couldn't clean the files. + assertTrue("test rig failed to inject failure.", fs.exists(file)); + assertTrue("test rig failed to inject failure.", fs.exists(child)); + // and verify that it accurately reported the failure. + assertFalse("chore should report that it failed.", result); + + // filesystem is back + fails.set(false); + result = chore.runCleaner(); + + // verify everything is gone. + assertFalse("file should have been destroyed.", fs.exists(file)); + assertFalse("directory should have been destroyed.", fs.exists(child)); + // and verify that it accurately reported success. + assertTrue("chore should claim it succeeded.", result); } @Test diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestFSUtils.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestFSUtils.java index edd35c7f824..2718120084b 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestFSUtils.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestFSUtils.java @@ -289,6 +289,16 @@ public class TestFSUtils { } } + @Test + public void testFilteredStatusDoesNotThrowOnNotFound() throws Exception { + MiniDFSCluster cluster = htu.startMiniDFSCluster(1); + try { + assertNull(FSUtils.listStatusWithStatusFilter(cluster.getFileSystem(), new Path("definitely/doesn't/exist"), null)); + } finally { + cluster.shutdown(); + } + + } @Test public void testRenameAndSetModifyTime() throws Exception {