HBASE-16775 Fix flaky TestExportSnapshot#testExportRetry.
Reason for flakyness: Current test is probability based fault injection and triggers failure 3% of the time. Earlier when test used LocalJobRunner which didn't honor "mapreduce.map.maxattempts", it'd pass 97% time (when no fault is injected) and fail 3% time (when fault was injected). Point being, even when the test was complete wrong, we couldn't catch it because it was probability based. This change will inject fault in a deterministic manner. On design side, it encapsulates all testing hooks in ExportSnapshot.java into single inner class. Change-Id: Icba866e1d56a5281748df89f4dd374bc45bad249
This commit is contained in:
parent
cf3215d343
commit
da5fb27eab
|
@ -29,7 +29,6 @@ import java.util.Collections;
|
|||
import java.util.Comparator;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.commons.cli.CommandLine;
|
||||
import org.apache.commons.cli.Option;
|
||||
|
@ -110,9 +109,12 @@ public class ExportSnapshot extends AbstractHBaseTool implements Tool {
|
|||
private static final String CONF_BANDWIDTH_MB = "snapshot.export.map.bandwidth.mb";
|
||||
protected static final String CONF_SKIP_TMP = "snapshot.export.skip.tmp";
|
||||
|
||||
static final String CONF_TEST_FAILURE = "test.snapshot.export.failure";
|
||||
static final String CONF_TEST_RETRY = "test.snapshot.export.failure.retry";
|
||||
|
||||
static class Testing {
|
||||
static final String CONF_TEST_FAILURE = "test.snapshot.export.failure";
|
||||
static final String CONF_TEST_FAILURE_COUNT = "test.snapshot.export.failure.count";
|
||||
int failuresCountToInject = 0;
|
||||
int injectedFailureCount = 0;
|
||||
}
|
||||
|
||||
// Command line options and defaults.
|
||||
static final class Options {
|
||||
|
@ -149,12 +151,10 @@ public class ExportSnapshot extends AbstractHBaseTool implements Tool {
|
|||
|
||||
private static class ExportMapper extends Mapper<BytesWritable, NullWritable,
|
||||
NullWritable, NullWritable> {
|
||||
private static final Log LOG = LogFactory.getLog(ExportMapper.class);
|
||||
final static int REPORT_SIZE = 1 * 1024 * 1024;
|
||||
final static int BUFFER_SIZE = 64 * 1024;
|
||||
|
||||
private boolean testFailures;
|
||||
private Random random;
|
||||
|
||||
private boolean verifyChecksum;
|
||||
private String filesGroup;
|
||||
private String filesUser;
|
||||
|
@ -169,9 +169,12 @@ public class ExportSnapshot extends AbstractHBaseTool implements Tool {
|
|||
private Path inputArchive;
|
||||
private Path inputRoot;
|
||||
|
||||
private static Testing testing = new Testing();
|
||||
|
||||
@Override
|
||||
public void setup(Context context) throws IOException {
|
||||
Configuration conf = context.getConfiguration();
|
||||
|
||||
Configuration srcConf = HBaseConfiguration.createClusterConf(conf, null, CONF_SOURCE_PREFIX);
|
||||
Configuration destConf = HBaseConfiguration.createClusterConf(conf, null, CONF_DEST_PREFIX);
|
||||
|
||||
|
@ -186,8 +189,6 @@ public class ExportSnapshot extends AbstractHBaseTool implements Tool {
|
|||
inputArchive = new Path(inputRoot, HConstants.HFILE_ARCHIVE_DIRECTORY);
|
||||
outputArchive = new Path(outputRoot, HConstants.HFILE_ARCHIVE_DIRECTORY);
|
||||
|
||||
testFailures = conf.getBoolean(CONF_TEST_FAILURE, false);
|
||||
|
||||
try {
|
||||
srcConf.setBoolean("fs." + inputRoot.toUri().getScheme() + ".impl.disable.cache", true);
|
||||
inputFs = FileSystem.get(inputRoot.toUri(), srcConf);
|
||||
|
@ -210,6 +211,12 @@ public class ExportSnapshot extends AbstractHBaseTool implements Tool {
|
|||
for (Counter c : Counter.values()) {
|
||||
context.getCounter(c).increment(0);
|
||||
}
|
||||
if (context.getConfiguration().getBoolean(Testing.CONF_TEST_FAILURE, false)) {
|
||||
testing.failuresCountToInject = conf.getInt(Testing.CONF_TEST_FAILURE_COUNT, 0);
|
||||
// Get number of times we have already injected failure based on attempt number of this
|
||||
// task.
|
||||
testing.injectedFailureCount = context.getTaskAttemptID().getId();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -251,35 +258,23 @@ public class ExportSnapshot extends AbstractHBaseTool implements Tool {
|
|||
return new Path(outputArchive, path);
|
||||
}
|
||||
|
||||
/*
|
||||
* Used by TestExportSnapshot to simulate a failure
|
||||
/**
|
||||
* Used by TestExportSnapshot to test for retries when failures happen.
|
||||
* Failure is injected in {@link #copyFile(Context, SnapshotFileInfo, Path)}.
|
||||
*/
|
||||
private void injectTestFailure(final Context context, final SnapshotFileInfo inputInfo)
|
||||
throws IOException {
|
||||
if (testFailures) {
|
||||
if (context.getConfiguration().getBoolean(CONF_TEST_RETRY, false)) {
|
||||
if (random == null) {
|
||||
random = new Random();
|
||||
}
|
||||
|
||||
// FLAKY-TEST-WARN: lower is better, we can get some runs without the
|
||||
// retry, but at least we reduce the number of test failures due to
|
||||
// this test exception from the same map task.
|
||||
if (random.nextFloat() < 0.03) {
|
||||
throw new IOException("TEST RETRY FAILURE: Unable to copy input=" + inputInfo
|
||||
+ " time=" + System.currentTimeMillis());
|
||||
}
|
||||
} else {
|
||||
context.getCounter(Counter.COPY_FAILED).increment(1);
|
||||
throw new IOException("TEST FAILURE: Unable to copy input=" + inputInfo);
|
||||
}
|
||||
}
|
||||
if (!context.getConfiguration().getBoolean(Testing.CONF_TEST_FAILURE, false)) return;
|
||||
if (testing.injectedFailureCount >= testing.failuresCountToInject) return;
|
||||
testing.injectedFailureCount++;
|
||||
context.getCounter(Counter.COPY_FAILED).increment(1);
|
||||
LOG.debug("Injecting failure. Count: " + testing.injectedFailureCount);
|
||||
throw new IOException(String.format("TEST FAILURE (%d of max %d): Unable to copy input=%s",
|
||||
testing.injectedFailureCount, testing.failuresCountToInject, inputInfo));
|
||||
}
|
||||
|
||||
private void copyFile(final Context context, final SnapshotFileInfo inputInfo,
|
||||
final Path outputPath) throws IOException {
|
||||
injectTestFailure(context, inputInfo);
|
||||
|
||||
// Get the file information
|
||||
FileStatus inputStat = getSourceFileStatus(context, inputInfo);
|
||||
|
||||
|
@ -318,6 +313,7 @@ public class ExportSnapshot extends AbstractHBaseTool implements Tool {
|
|||
}
|
||||
} finally {
|
||||
in.close();
|
||||
injectTestFailure(context, inputInfo);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -21,6 +21,7 @@ package org.apache.hadoop.hbase.snapshot;
|
|||
import static org.apache.hadoop.util.ToolRunner.run;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URI;
|
||||
|
@ -44,12 +45,10 @@ import org.apache.hadoop.hbase.client.Admin;
|
|||
import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
|
||||
import org.apache.hadoop.hbase.testclassification.LargeTests;
|
||||
import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.SnapshotDescription;
|
||||
import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotFileInfo;
|
||||
import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotRegionManifest;
|
||||
import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.hbase.util.FSUtils;
|
||||
import org.apache.hadoop.util.ToolRunner;
|
||||
import org.junit.After;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.Before;
|
||||
|
@ -57,6 +56,7 @@ import org.junit.BeforeClass;
|
|||
import org.junit.Rule;
|
||||
import org.junit.Test;
|
||||
import org.junit.experimental.categories.Category;
|
||||
import org.junit.rules.TestName;
|
||||
import org.junit.rules.TestRule;
|
||||
|
||||
/**
|
||||
|
@ -72,6 +72,9 @@ public class TestExportSnapshot {
|
|||
|
||||
protected final static byte[] FAMILY = Bytes.toBytes("cf");
|
||||
|
||||
@Rule
|
||||
public final TestName testName = new TestName();
|
||||
|
||||
protected TableName tableName;
|
||||
private byte[] emptySnapshotName;
|
||||
private byte[] snapshotName;
|
||||
|
@ -85,17 +88,27 @@ public class TestExportSnapshot {
|
|||
conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 6);
|
||||
conf.setBoolean("hbase.master.enabletable.roundrobin", true);
|
||||
conf.setInt("mapreduce.map.maxattempts", 10);
|
||||
// If a single node has enough failures (default 3), resource manager will blacklist it.
|
||||
// With only 2 nodes and tests injecting faults, we don't want that.
|
||||
conf.setInt("mapreduce.job.maxtaskfailures.per.tracker", 100);
|
||||
}
|
||||
|
||||
@BeforeClass
|
||||
public static void setUpBeforeClass() throws Exception {
|
||||
setUpBaseConf(TEST_UTIL.getConfiguration());
|
||||
TEST_UTIL.startMiniCluster(3);
|
||||
// Setup separate test-data directory for MR cluster and set corresponding configurations.
|
||||
// Otherwise, different test classes running MR cluster can step on each other.
|
||||
TEST_UTIL.getDataTestDir();
|
||||
TEST_UTIL.startMiniZKCluster();
|
||||
TEST_UTIL.startMiniMapReduceCluster();
|
||||
TEST_UTIL.startMiniHBaseCluster(1, 3);
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
public static void tearDownAfterClass() throws Exception {
|
||||
TEST_UTIL.shutdownMiniCluster();
|
||||
TEST_UTIL.shutdownMiniHBaseCluster();
|
||||
TEST_UTIL.shutdownMiniMapReduceCluster();
|
||||
TEST_UTIL.shutdownMiniZKCluster();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -105,10 +118,9 @@ public class TestExportSnapshot {
|
|||
public void setUp() throws Exception {
|
||||
this.admin = TEST_UTIL.getAdmin();
|
||||
|
||||
long tid = System.currentTimeMillis();
|
||||
tableName = TableName.valueOf("testtb-" + tid);
|
||||
snapshotName = Bytes.toBytes("snaptb0-" + tid);
|
||||
emptySnapshotName = Bytes.toBytes("emptySnaptb0-" + tid);
|
||||
tableName = TableName.valueOf("testtb-" + testName.getMethodName());
|
||||
snapshotName = Bytes.toBytes("snaptb0-" + testName.getMethodName());
|
||||
emptySnapshotName = Bytes.toBytes("emptySnaptb0-" + testName.getMethodName());
|
||||
|
||||
// create Table
|
||||
createTable();
|
||||
|
@ -191,16 +203,16 @@ public class TestExportSnapshot {
|
|||
Path copyDir, boolean overwrite) throws Exception {
|
||||
testExportFileSystemState(TEST_UTIL.getConfiguration(), tableName, snapshotName, targetName,
|
||||
filesExpected, TEST_UTIL.getDefaultRootDirPath(), copyDir,
|
||||
overwrite, getBypassRegionPredicate());
|
||||
overwrite, getBypassRegionPredicate(), true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test ExportSnapshot
|
||||
* Creates destination directory, runs ExportSnapshot() tool, and runs some verifications.
|
||||
*/
|
||||
protected static void testExportFileSystemState(final Configuration conf, final TableName tableName,
|
||||
final byte[] snapshotName, final byte[] targetName, final int filesExpected,
|
||||
final Path sourceDir, Path copyDir, final boolean overwrite,
|
||||
final RegionPredicate bypassregionPredicate) throws Exception {
|
||||
final RegionPredicate bypassregionPredicate, boolean success) throws Exception {
|
||||
URI hdfsUri = FileSystem.get(conf).getUri();
|
||||
FileSystem fs = FileSystem.get(copyDir.toUri(), new Configuration());
|
||||
copyDir = copyDir.makeQualified(fs);
|
||||
|
@ -218,7 +230,12 @@ public class TestExportSnapshot {
|
|||
|
||||
// Export Snapshot
|
||||
int res = run(conf, new ExportSnapshot(), opts.toArray(new String[opts.size()]));
|
||||
assertEquals(0, res);
|
||||
assertEquals(success ? 0 : 1, res);
|
||||
if (!success) {
|
||||
final Path targetDir = new Path(HConstants.SNAPSHOT_DIR_NAME, Bytes.toString(targetName));
|
||||
assertFalse(fs.exists(new Path(copyDir, targetDir)));
|
||||
return;
|
||||
}
|
||||
|
||||
// Verify File-System state
|
||||
FileStatus[] rootFiles = fs.listStatus(copyDir);
|
||||
|
@ -242,42 +259,35 @@ public class TestExportSnapshot {
|
|||
}
|
||||
|
||||
/**
|
||||
* Check that ExportSnapshot will return a failure if something fails.
|
||||
*/
|
||||
@Test
|
||||
public void testExportFailure() throws Exception {
|
||||
assertEquals(1, runExportAndInjectFailures(snapshotName, false));
|
||||
}
|
||||
|
||||
/**
|
||||
* Check that ExportSnapshot will succede if something fails but the retry succede.
|
||||
* Check that ExportSnapshot will succeed if something fails but the retry succeed.
|
||||
*/
|
||||
@Test
|
||||
public void testExportRetry() throws Exception {
|
||||
assertEquals(0, runExportAndInjectFailures(snapshotName, true));
|
||||
}
|
||||
|
||||
/*
|
||||
* Execute the ExportSnapshot job injecting failures
|
||||
*/
|
||||
private int runExportAndInjectFailures(final byte[] snapshotName, boolean retry)
|
||||
throws Exception {
|
||||
Path copyDir = getLocalDestinationDir();
|
||||
URI hdfsUri = FileSystem.get(TEST_UTIL.getConfiguration()).getUri();
|
||||
FileSystem fs = FileSystem.get(copyDir.toUri(), new Configuration());
|
||||
copyDir = copyDir.makeQualified(fs);
|
||||
|
||||
Configuration conf = new Configuration(TEST_UTIL.getConfiguration());
|
||||
conf.setBoolean(ExportSnapshot.CONF_TEST_FAILURE, true);
|
||||
conf.setBoolean(ExportSnapshot.CONF_TEST_RETRY, retry);
|
||||
if (!retry) {
|
||||
conf.setInt("mapreduce.map.maxattempts", 3);
|
||||
}
|
||||
// Export Snapshot
|
||||
Path sourceDir = TEST_UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getRootDir();
|
||||
String[] args = new String[] { "--snapshot", Bytes.toString(snapshotName),
|
||||
"--copy-from", sourceDir.toString(), "--copy-to", copyDir.toString() };
|
||||
return ToolRunner.run(conf, new ExportSnapshot(), args);
|
||||
conf.setBoolean(ExportSnapshot.Testing.CONF_TEST_FAILURE, true);
|
||||
conf.setInt(ExportSnapshot.Testing.CONF_TEST_FAILURE_COUNT, 2);
|
||||
conf.setInt("mapreduce.map.maxattempts", 3);
|
||||
testExportFileSystemState(conf, tableName, snapshotName, snapshotName, tableNumFiles,
|
||||
TEST_UTIL.getDefaultRootDirPath(), copyDir, true, getBypassRegionPredicate(), true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check that ExportSnapshot will fail if we inject failure more times than MR will retry.
|
||||
*/
|
||||
@Test
|
||||
public void testExportFailure() throws Exception {
|
||||
Path copyDir = getLocalDestinationDir();
|
||||
FileSystem fs = FileSystem.get(copyDir.toUri(), new Configuration());
|
||||
copyDir = copyDir.makeQualified(fs);
|
||||
Configuration conf = new Configuration(TEST_UTIL.getConfiguration());
|
||||
conf.setBoolean(ExportSnapshot.Testing.CONF_TEST_FAILURE, true);
|
||||
conf.setInt(ExportSnapshot.Testing.CONF_TEST_FAILURE_COUNT, 4);
|
||||
conf.setInt("mapreduce.map.maxattempts", 3);
|
||||
testExportFileSystemState(conf, tableName, snapshotName, snapshotName, tableNumFiles,
|
||||
TEST_UTIL.getDefaultRootDirPath(), copyDir, true, getBypassRegionPredicate(), false);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -104,7 +104,7 @@ public class TestExportSnapshotNoCluster {
|
|||
TableName tableName = builder.getTableDescriptor().getTableName();
|
||||
TestExportSnapshot.testExportFileSystemState(TEST_UTIL.getConfiguration(),
|
||||
tableName, snapshotName, snapshotName, snapshotFilesCount,
|
||||
testDir, getDestinationDir(), false, null);
|
||||
testDir, getDestinationDir(), false, null, true);
|
||||
}
|
||||
|
||||
private Path getDestinationDir() {
|
||||
|
|
|
@ -45,7 +45,12 @@ public class TestMobExportSnapshot extends TestExportSnapshot {
|
|||
@BeforeClass
|
||||
public static void setUpBeforeClass() throws Exception {
|
||||
setUpBaseConf(TEST_UTIL.getConfiguration());
|
||||
TEST_UTIL.startMiniCluster(3);
|
||||
// Setup separate test-data directory for MR cluster and set corresponding configurations.
|
||||
// Otherwise, different test classes running MR cluster can step on each other.
|
||||
TEST_UTIL.getDataTestDir();
|
||||
TEST_UTIL.startMiniZKCluster();
|
||||
TEST_UTIL.startMiniMapReduceCluster();
|
||||
TEST_UTIL.startMiniHBaseCluster(1, 3);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -37,6 +37,9 @@ public class TestMobSecureExportSnapshot extends TestMobExportSnapshot {
|
|||
@BeforeClass
|
||||
public static void setUpBeforeClass() throws Exception {
|
||||
setUpBaseConf(TEST_UTIL.getConfiguration());
|
||||
// Setup separate test-data directory for MR cluster and set corresponding configurations.
|
||||
// Otherwise, different test classes running MR cluster can step on each other.
|
||||
TEST_UTIL.getDataTestDir();
|
||||
|
||||
// set the always on security provider
|
||||
UserProvider.setUserProviderForTesting(TEST_UTIL.getConfiguration(),
|
||||
|
@ -45,7 +48,9 @@ public class TestMobSecureExportSnapshot extends TestMobExportSnapshot {
|
|||
// setup configuration
|
||||
SecureTestUtil.enableSecurity(TEST_UTIL.getConfiguration());
|
||||
|
||||
TEST_UTIL.startMiniCluster(3);
|
||||
TEST_UTIL.startMiniZKCluster();
|
||||
TEST_UTIL.startMiniMapReduceCluster();
|
||||
TEST_UTIL.startMiniHBaseCluster(1, 3);
|
||||
|
||||
// Wait for the ACL table to become available
|
||||
TEST_UTIL.waitTableEnabled(AccessControlLists.ACL_TABLE_NAME);
|
||||
|
|
|
@ -42,6 +42,9 @@ public class TestSecureExportSnapshot extends TestExportSnapshot {
|
|||
@BeforeClass
|
||||
public static void setUpBeforeClass() throws Exception {
|
||||
setUpBaseConf(TEST_UTIL.getConfiguration());
|
||||
// Setup separate test-data directory for MR cluster and set corresponding configurations.
|
||||
// Otherwise, different test classes running MR cluster can step on each other.
|
||||
TEST_UTIL.getDataTestDir();
|
||||
|
||||
// set the always on security provider
|
||||
UserProvider.setUserProviderForTesting(TEST_UTIL.getConfiguration(),
|
||||
|
@ -50,7 +53,9 @@ public class TestSecureExportSnapshot extends TestExportSnapshot {
|
|||
// setup configuration
|
||||
SecureTestUtil.enableSecurity(TEST_UTIL.getConfiguration());
|
||||
|
||||
TEST_UTIL.startMiniCluster(3);
|
||||
TEST_UTIL.startMiniZKCluster();
|
||||
TEST_UTIL.startMiniMapReduceCluster();
|
||||
TEST_UTIL.startMiniHBaseCluster(1, 3);
|
||||
|
||||
// Wait for the ACL table to become available
|
||||
TEST_UTIL.waitTableEnabled(AccessControlLists.ACL_TABLE_NAME);
|
||||
|
|
Loading…
Reference in New Issue