HBASE-20404 Fixes to CleanChore correctness and operability.
* Make CleanerChore less chatty: move WARN message to DEBUG when we expect non-empty dirs * Make CleanerChore less chatty: move IOE we'll retry to INFO * CleanerChore should treat IOE for FileStatus as a failure * Add tests asserting assumptions in above Signed-off-by: Reid Chan <reidddchan@outlook.com> Signed-off-by: Mike Drob <mdrob@apache.org> Conflicts: hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestCleanerChore.java hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestFSUtils.java
This commit is contained in:
parent
f37916253e
commit
ce4c243399
|
@ -32,6 +32,7 @@ import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.fs.PathIsNotEmptyDirectoryException;
|
||||||
import org.apache.hadoop.hbase.ScheduledChore;
|
import org.apache.hadoop.hbase.ScheduledChore;
|
||||||
import org.apache.hadoop.hbase.Stoppable;
|
import org.apache.hadoop.hbase.Stoppable;
|
||||||
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
||||||
|
@ -396,6 +397,10 @@ public abstract class CleanerChore<T extends FileCleanerDelegate> extends Schedu
|
||||||
T act() throws IOException;
|
T act() throws IOException;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Attemps to clean up a directory, its subdirectories, and files.
|
||||||
|
* Return value is true if everything was deleted. false on partial / total failures.
|
||||||
|
*/
|
||||||
private class CleanerTask extends RecursiveTask<Boolean> {
|
private class CleanerTask extends RecursiveTask<Boolean> {
|
||||||
private final Path dir;
|
private final Path dir;
|
||||||
private final boolean root;
|
private final boolean root;
|
||||||
|
@ -415,6 +420,8 @@ public abstract class CleanerChore<T extends FileCleanerDelegate> extends Schedu
|
||||||
List<FileStatus> subDirs;
|
List<FileStatus> subDirs;
|
||||||
final List<FileStatus> files;
|
final List<FileStatus> files;
|
||||||
try {
|
try {
|
||||||
|
// if dir doesn't exist, we'll get null back for both of these
|
||||||
|
// which will fall through to succeeding.
|
||||||
subDirs = FSUtils.listStatusWithStatusFilter(fs, dir, new FileStatusFilter() {
|
subDirs = FSUtils.listStatusWithStatusFilter(fs, dir, new FileStatusFilter() {
|
||||||
@Override
|
@Override
|
||||||
public boolean accept(FileStatus f) {
|
public boolean accept(FileStatus f) {
|
||||||
|
@ -428,8 +435,8 @@ public abstract class CleanerChore<T extends FileCleanerDelegate> extends Schedu
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
} catch (IOException ioe) {
|
} catch (IOException ioe) {
|
||||||
LOG.warn(dir + " doesn't exist, just skip it. ", ioe);
|
LOG.warn("failed to get FileStatus for contents of '" + dir + "'", ioe);
|
||||||
return true;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean nullSubDirs = subDirs == null;
|
boolean nullSubDirs = subDirs == null;
|
||||||
|
@ -487,8 +494,19 @@ public abstract class CleanerChore<T extends FileCleanerDelegate> extends Schedu
|
||||||
try {
|
try {
|
||||||
LOG.trace("Start deleting " + type + " under " + dir);
|
LOG.trace("Start deleting " + type + " under " + dir);
|
||||||
deleted = deletion.act();
|
deleted = deletion.act();
|
||||||
|
} catch (PathIsNotEmptyDirectoryException exception) {
|
||||||
|
// N.B. HDFS throws this exception when we try to delete a non-empty directory, but
|
||||||
|
// LocalFileSystem throws a bare IOException. So some test code will get the verbose
|
||||||
|
// message below.
|
||||||
|
LOG.debug("Couldn't delete '" + dir + "' yet because it isn't empty. Probably transient. " +
|
||||||
|
"exception details at TRACE.");
|
||||||
|
LOG.trace("Couldn't delete '" + dir + "' yet because it isn't empty w/exception.",
|
||||||
|
exception);
|
||||||
|
deleted = false;
|
||||||
} catch (IOException ioe) {
|
} catch (IOException ioe) {
|
||||||
LOG.warn("Could not delete " + type + " under " + dir, ioe);
|
LOG.info("Could not delete " + type + " under " + dir + ". might be transient; we'll " +
|
||||||
|
"retry. if it keeps happening, use following exception when asking on mailing list.",
|
||||||
|
ioe);
|
||||||
deleted = false;
|
deleted = false;
|
||||||
}
|
}
|
||||||
LOG.trace("Finish deleting " + type + " under " + dir + " deleted=" + deleted);
|
LOG.trace("Finish deleting " + type + " under " + dir + " deleted=" + deleted);
|
||||||
|
|
|
@ -23,6 +23,7 @@ import static org.junit.Assert.assertTrue;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
@ -30,6 +31,7 @@ import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.FilterFileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
||||||
import org.apache.hadoop.hbase.testclassification.SmallTests;
|
import org.apache.hadoop.hbase.testclassification.SmallTests;
|
||||||
|
@ -84,9 +86,55 @@ public class TestCleanerChore {
|
||||||
// run the chore
|
// run the chore
|
||||||
chore.chore();
|
chore.chore();
|
||||||
|
|
||||||
// verify all the files got deleted
|
// verify all the files were preserved
|
||||||
assertTrue("File didn't get deleted", fs.exists(file));
|
assertTrue("File shouldn't have been deleted", fs.exists(file));
|
||||||
assertTrue("Empty directory didn't get deleted", fs.exists(parent));
|
assertTrue("directory shouldn't have been deleted", fs.exists(parent));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void retriesIOExceptionInStatus() throws Exception {
|
||||||
|
Stoppable stop = new StoppableImplementation();
|
||||||
|
Configuration conf = UTIL.getConfiguration();
|
||||||
|
Path testDir = UTIL.getDataTestDir();
|
||||||
|
FileSystem fs = UTIL.getTestFileSystem();
|
||||||
|
String confKey = "hbase.test.cleaner.delegates";
|
||||||
|
|
||||||
|
Path child = new Path(testDir, "child");
|
||||||
|
Path file = new Path(child, "file");
|
||||||
|
fs.mkdirs(child);
|
||||||
|
fs.create(file).close();
|
||||||
|
assertTrue("test file didn't get created.", fs.exists(file));
|
||||||
|
final AtomicBoolean fails = new AtomicBoolean(true);
|
||||||
|
|
||||||
|
FilterFileSystem filtered = new FilterFileSystem(fs) {
|
||||||
|
public FileStatus[] listStatus(Path f) throws IOException {
|
||||||
|
if (fails.get()) {
|
||||||
|
throw new IOException("whomp whomp.");
|
||||||
|
}
|
||||||
|
return fs.listStatus(f);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
AllValidPaths chore = new AllValidPaths("test-retry-ioe", stop, conf, filtered, testDir, confKey);
|
||||||
|
|
||||||
|
// trouble talking to the filesystem
|
||||||
|
Boolean result = chore.runCleaner();
|
||||||
|
|
||||||
|
// verify that it couldn't clean the files.
|
||||||
|
assertTrue("test rig failed to inject failure.", fs.exists(file));
|
||||||
|
assertTrue("test rig failed to inject failure.", fs.exists(child));
|
||||||
|
// and verify that it accurately reported the failure.
|
||||||
|
assertFalse("chore should report that it failed.", result);
|
||||||
|
|
||||||
|
// filesystem is back
|
||||||
|
fails.set(false);
|
||||||
|
result = chore.runCleaner();
|
||||||
|
|
||||||
|
// verify everything is gone.
|
||||||
|
assertFalse("file should have been destroyed.", fs.exists(file));
|
||||||
|
assertFalse("directory should have been destroyed.", fs.exists(child));
|
||||||
|
// and verify that it accurately reported success.
|
||||||
|
assertTrue("chore should claim it succeeded.", result);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
|
@ -300,6 +300,18 @@ public class TestFSUtils {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testFilteredStatusDoesNotThrowOnNotFound() throws Exception {
|
||||||
|
HBaseTestingUtility htu = new HBaseTestingUtility();
|
||||||
|
MiniDFSCluster cluster = htu.startMiniDFSCluster(1);
|
||||||
|
try {
|
||||||
|
assertNull(FSUtils.listStatusWithStatusFilter(cluster.getFileSystem(), new Path("definitely/doesn't/exist"), null));
|
||||||
|
} finally {
|
||||||
|
cluster.shutdown();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testRenameAndSetModifyTime() throws Exception {
|
public void testRenameAndSetModifyTime() throws Exception {
|
||||||
HBaseTestingUtility htu = new HBaseTestingUtility();
|
HBaseTestingUtility htu = new HBaseTestingUtility();
|
||||||
|
|
Loading…
Reference in New Issue