From 30284d020d36c502dad5bdbae61ec48e9dfe9f8c Mon Sep 17 00:00:00 2001 From: Aaron Fabbri Date: Tue, 29 May 2018 13:38:15 -0700 Subject: [PATCH] HADOOP-14946 S3Guard testPruneCommandCLI can fail. Contributed by Gabor Bota. --- .../s3guard/AbstractS3GuardToolTestBase.java | 52 ++++++++++++++++--- 1 file changed, 44 insertions(+), 8 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/AbstractS3GuardToolTestBase.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/AbstractS3GuardToolTestBase.java index 4381749d71e..2b4381041df 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/AbstractS3GuardToolTestBase.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/AbstractS3GuardToolTestBase.java @@ -31,6 +31,7 @@ import java.util.concurrent.Callable; import java.util.concurrent.TimeUnit; +import org.apache.hadoop.util.StopWatch; import org.junit.Assume; import org.junit.Test; @@ -61,6 +62,8 @@ public abstract class AbstractS3GuardToolTestBase extends AbstractS3ATestBase { protected static final String S3A_THIS_BUCKET_DOES_NOT_EXIST = "s3a://this-bucket-does-not-exist-00000000000"; + private static final int PRUNE_MAX_AGE_SECS = 2; + private MetadataStore ms; protected static void expectResult(int expected, @@ -186,24 +189,57 @@ protected void createFile(Path path, boolean onS3, boolean onMetadataStore) } } + /** + * Attempt to test prune() with sleep() without having flaky tests + * when things run slowly. Test is basically: + * 1. Set max path age to X seconds + * 2. Create some files (which writes entries to MetadataStore) + * 3. Sleep X+2 seconds (all files from above are now "stale") + * 4. Create some other files (these are "fresh"). + * 5. Run prune on MetadataStore. + * 6. Assert that only files that were created before the sleep() were pruned. + * + * Problem is: #6 can fail if X seconds elapse between steps 4 and 5, since + * the newer files also become stale and get pruned. This is easy to + * reproduce by running all integration tests in parallel with a ton of + * threads, or anything else that slows down execution a lot. + * + * Solution: Keep track of time elapsed between #4 and #5, and if it + * exceeds X, just print a warn() message instead of failing. + * + * @param cmdConf configuration for command + * @param parent path + * @param args command args + * @throws Exception + */ private void testPruneCommand(Configuration cmdConf, Path parent, String...args) throws Exception { Path keepParent = path("prune-cli-keep"); + StopWatch timer = new StopWatch(); try { - getFileSystem().mkdirs(parent); - getFileSystem().mkdirs(keepParent); - S3GuardTool.Prune cmd = new S3GuardTool.Prune(cmdConf); cmd.setMetadataStore(ms); + getFileSystem().mkdirs(parent); + getFileSystem().mkdirs(keepParent); createFile(new Path(parent, "stale"), true, true); createFile(new Path(keepParent, "stale-to-keep"), true, true); - Thread.sleep(TimeUnit.SECONDS.toMillis(2)); + + Thread.sleep(TimeUnit.SECONDS.toMillis(PRUNE_MAX_AGE_SECS + 2)); + + timer.start(); createFile(new Path(parent, "fresh"), true, true); assertMetastoreListingCount(parent, "Children count before pruning", 2); exec(cmd, args); - assertMetastoreListingCount(parent, "Pruned children count", 1); + long msecElapsed = timer.now(TimeUnit.MILLISECONDS); + if (msecElapsed >= PRUNE_MAX_AGE_SECS * 1000) { + LOG.warn("Skipping an assertion: Test running too slowly ({} msec)", + msecElapsed); + } else { + assertMetastoreListingCount(parent, "Pruned children count remaining", + 1); + } assertMetastoreListingCount(keepParent, "This child should have been kept (prefix restriction).", 1); } finally { @@ -224,13 +260,14 @@ private void assertMetastoreListingCount(Path parent, public void testPruneCommandCLI() throws Exception { Path testPath = path("testPruneCommandCLI"); testPruneCommand(getFileSystem().getConf(), testPath, - "prune", "-seconds", "1", testPath.toString()); + "prune", "-seconds", String.valueOf(PRUNE_MAX_AGE_SECS), + testPath.toString()); } @Test public void testPruneCommandConf() throws Exception { getConfiguration().setLong(Constants.S3GUARD_CLI_PRUNE_AGE, - TimeUnit.SECONDS.toMillis(1)); + TimeUnit.SECONDS.toMillis(PRUNE_MAX_AGE_SECS)); Path testPath = path("testPruneCommandConf"); testPruneCommand(getConfiguration(), testPath, "prune", testPath.toString()); @@ -286,7 +323,6 @@ protected String getLandsatCSVFile() { * Execute a command, returning the buffer if the command actually completes. * If an exception is raised the output is logged instead. * @param cmd command - * @param buf buffer to use for tool output (not SLF4J output) * @param args argument list * @throws Exception on any failure */