From ce4c24339923cdc497794347b14238d722aa742a Mon Sep 17 00:00:00 2001 From: Sean Busbey Date: Fri, 13 Apr 2018 00:57:35 -0500 Subject: [PATCH] HBASE-20404 Fixes to CleanChore correctness and operability. * Make CleanerChore less chatty: move WARN message to DEBUG when we expect non-empty dirs * Make CleanerChore less chatty: move IOE we'll retry to INFO * CleanerChore should treat IOE for FileStatus as a failure * Add tests asserting assumptions in above Signed-off-by: Reid Chan Signed-off-by: Mike Drob Conflicts: hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestCleanerChore.java hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestFSUtils.java --- .../hbase/master/cleaner/CleanerChore.java | 24 +++++++-- .../master/cleaner/TestCleanerChore.java | 54 +++++++++++++++++-- .../apache/hadoop/hbase/util/TestFSUtils.java | 12 +++++ 3 files changed, 84 insertions(+), 6 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java index 50d8da32cc1..ea18193fee8 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java @@ -32,6 +32,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathIsNotEmptyDirectoryException; import org.apache.hadoop.hbase.ScheduledChore; import org.apache.hadoop.hbase.Stoppable; import org.apache.hadoop.hbase.classification.InterfaceAudience; @@ -396,6 +397,10 @@ public abstract class CleanerChore extends Schedu T act() throws IOException; } + /** + * Attemps to clean up a directory, its subdirectories, and files. + * Return value is true if everything was deleted. false on partial / total failures. + */ private class CleanerTask extends RecursiveTask { private final Path dir; private final boolean root; @@ -415,6 +420,8 @@ public abstract class CleanerChore extends Schedu List subDirs; final List files; try { + // if dir doesn't exist, we'll get null back for both of these + // which will fall through to succeeding. subDirs = FSUtils.listStatusWithStatusFilter(fs, dir, new FileStatusFilter() { @Override public boolean accept(FileStatus f) { @@ -428,8 +435,8 @@ public abstract class CleanerChore extends Schedu } }); } catch (IOException ioe) { - LOG.warn(dir + " doesn't exist, just skip it. ", ioe); - return true; + LOG.warn("failed to get FileStatus for contents of '" + dir + "'", ioe); + return false; } boolean nullSubDirs = subDirs == null; @@ -487,8 +494,19 @@ public abstract class CleanerChore extends Schedu try { LOG.trace("Start deleting " + type + " under " + dir); deleted = deletion.act(); + } catch (PathIsNotEmptyDirectoryException exception) { + // N.B. HDFS throws this exception when we try to delete a non-empty directory, but + // LocalFileSystem throws a bare IOException. So some test code will get the verbose + // message below. + LOG.debug("Couldn't delete '" + dir + "' yet because it isn't empty. Probably transient. " + + "exception details at TRACE."); + LOG.trace("Couldn't delete '" + dir + "' yet because it isn't empty w/exception.", + exception); + deleted = false; } catch (IOException ioe) { - LOG.warn("Could not delete " + type + " under " + dir, ioe); + LOG.info("Could not delete " + type + " under " + dir + ". might be transient; we'll " + + "retry. if it keeps happening, use following exception when asking on mailing list.", + ioe); deleted = false; } LOG.trace("Finish deleting " + type + " under " + dir + " deleted=" + deleted); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestCleanerChore.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestCleanerChore.java index 8bbf61a7a6e..a444f79c672 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestCleanerChore.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestCleanerChore.java @@ -23,6 +23,7 @@ import static org.junit.Assert.assertTrue; import java.io.IOException; import java.util.Random; +import java.util.concurrent.atomic.AtomicBoolean; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -30,6 +31,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FilterFileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.testclassification.SmallTests; @@ -84,9 +86,55 @@ public class TestCleanerChore { // run the chore chore.chore(); - // verify all the files got deleted - assertTrue("File didn't get deleted", fs.exists(file)); - assertTrue("Empty directory didn't get deleted", fs.exists(parent)); + // verify all the files were preserved + assertTrue("File shouldn't have been deleted", fs.exists(file)); + assertTrue("directory shouldn't have been deleted", fs.exists(parent)); + } + + @Test + public void retriesIOExceptionInStatus() throws Exception { + Stoppable stop = new StoppableImplementation(); + Configuration conf = UTIL.getConfiguration(); + Path testDir = UTIL.getDataTestDir(); + FileSystem fs = UTIL.getTestFileSystem(); + String confKey = "hbase.test.cleaner.delegates"; + + Path child = new Path(testDir, "child"); + Path file = new Path(child, "file"); + fs.mkdirs(child); + fs.create(file).close(); + assertTrue("test file didn't get created.", fs.exists(file)); + final AtomicBoolean fails = new AtomicBoolean(true); + + FilterFileSystem filtered = new FilterFileSystem(fs) { + public FileStatus[] listStatus(Path f) throws IOException { + if (fails.get()) { + throw new IOException("whomp whomp."); + } + return fs.listStatus(f); + } + }; + + AllValidPaths chore = new AllValidPaths("test-retry-ioe", stop, conf, filtered, testDir, confKey); + + // trouble talking to the filesystem + Boolean result = chore.runCleaner(); + + // verify that it couldn't clean the files. + assertTrue("test rig failed to inject failure.", fs.exists(file)); + assertTrue("test rig failed to inject failure.", fs.exists(child)); + // and verify that it accurately reported the failure. + assertFalse("chore should report that it failed.", result); + + // filesystem is back + fails.set(false); + result = chore.runCleaner(); + + // verify everything is gone. + assertFalse("file should have been destroyed.", fs.exists(file)); + assertFalse("directory should have been destroyed.", fs.exists(child)); + // and verify that it accurately reported success. + assertTrue("chore should claim it succeeded.", result); } @Test diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestFSUtils.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestFSUtils.java index ef5ad933c87..5fa96333662 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestFSUtils.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestFSUtils.java @@ -300,6 +300,18 @@ public class TestFSUtils { } } + @Test + public void testFilteredStatusDoesNotThrowOnNotFound() throws Exception { + HBaseTestingUtility htu = new HBaseTestingUtility(); + MiniDFSCluster cluster = htu.startMiniDFSCluster(1); + try { + assertNull(FSUtils.listStatusWithStatusFilter(cluster.getFileSystem(), new Path("definitely/doesn't/exist"), null)); + } finally { + cluster.shutdown(); + } + + } + @Test public void testRenameAndSetModifyTime() throws Exception { HBaseTestingUtility htu = new HBaseTestingUtility();