HBASE-7643 HFileArchiver.resolveAndArchive() race condition may lead to snapshot data loss (Matteo Bertozzi)
git-svn-id: https://svn.apache.org/repos/asf/hbase/branches/hbase-7290@1445861 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
d0906be22c
commit
69523a5ef8
|
@ -55,6 +55,9 @@ public class HFileArchiver {
|
|||
private static final Log LOG = LogFactory.getLog(HFileArchiver.class);
|
||||
private static final String SEPARATOR = ".";
|
||||
|
||||
/** Number of retries in case of fs operation failure */
|
||||
private static final int DEFAULT_RETRIES_NUMBER = 6;
|
||||
|
||||
private HFileArchiver() {
|
||||
// hidden ctor since this is just a util
|
||||
}
|
||||
|
@ -133,6 +136,7 @@ public class HFileArchiver {
|
|||
try {
|
||||
success = resolveAndArchive(fs, regionArchiveDir, toArchive);
|
||||
} catch (IOException e) {
|
||||
LOG.error("Failed to archive: " + toArchive, e);
|
||||
success = false;
|
||||
}
|
||||
|
||||
|
@ -143,7 +147,7 @@ public class HFileArchiver {
|
|||
}
|
||||
|
||||
throw new IOException("Received error when attempting to archive files (" + toArchive
|
||||
+ "), cannot delete region directory.");
|
||||
+ "), cannot delete region directory. ");
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -273,14 +277,12 @@ public class HFileArchiver {
|
|||
long start = EnvironmentEdgeManager.currentTimeMillis();
|
||||
List<File> failures = resolveAndArchive(fs, baseArchiveDir, toArchive, start);
|
||||
|
||||
// clean out the failures by just deleting them
|
||||
// notify that some files were not archived.
|
||||
// We can't delete the files otherwise snapshots or other backup system
|
||||
// that relies on the archiver end up with data loss.
|
||||
if (failures.size() > 0) {
|
||||
try {
|
||||
LOG.error("Failed to complete archive, deleting extra store files.");
|
||||
deleteFilesWithoutArchiving(failures);
|
||||
} catch (IOException e) {
|
||||
LOG.warn("Failed to delete store file(s) when archiving failed", e);
|
||||
}
|
||||
LOG.warn("Failed to complete archive of: " + failures +
|
||||
". Those files are still in the original location, and they may slow down reads.");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
@ -383,19 +385,48 @@ public class HFileArchiver {
|
|||
if (!fs.delete(archiveFile, false)) {
|
||||
throw new IOException("Couldn't delete existing archive file (" + archiveFile
|
||||
+ ") or rename it to the backup file (" + backedupArchiveFile
|
||||
+ ")to make room for similarly named file.");
|
||||
+ ") to make room for similarly named file.");
|
||||
}
|
||||
}
|
||||
LOG.debug("Backed up archive file from: " + archiveFile);
|
||||
}
|
||||
|
||||
LOG.debug("No existing file in archive for:" + archiveFile + ", free to archive original file.");
|
||||
LOG.debug("No existing file in archive for:" + archiveFile +
|
||||
", free to archive original file.");
|
||||
|
||||
// at this point, we should have a free spot for the archive file
|
||||
if (currentFile.moveAndClose(archiveFile)) {
|
||||
boolean success = false;
|
||||
for (int i = 0; !success && i < DEFAULT_RETRIES_NUMBER; ++i) {
|
||||
if (i > 0) {
|
||||
// Ensure that the archive directory exists.
|
||||
// The previous "move to archive" operation has failed probably because
|
||||
// the cleaner has removed our archive directory (HBASE-7643).
|
||||
// (we're in a retry loop, so don't worry too much about the exception)
|
||||
try {
|
||||
if (!fs.exists(archiveDir)) {
|
||||
if (fs.mkdirs(archiveDir)) {
|
||||
LOG.debug("Created archive directory:" + archiveDir);
|
||||
}
|
||||
}
|
||||
} catch (IOException e) {
|
||||
LOG.warn("Failed to create the archive directory: " + archiveDir, e);
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
success = currentFile.moveAndClose(archiveFile);
|
||||
} catch (IOException e) {
|
||||
LOG.warn("Failed to archive file: " + currentFile + " on try #" + i, e);
|
||||
success = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!success) {
|
||||
LOG.error("Failed to archive file:" + currentFile);
|
||||
return false;
|
||||
} else if (LOG.isDebugEnabled()) {
|
||||
}
|
||||
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("Finished archiving file from: " + currentFile + ", to: " + archiveFile);
|
||||
}
|
||||
return true;
|
||||
|
@ -572,7 +603,7 @@ public class HFileArchiver {
|
|||
public boolean moveAndClose(Path dest) throws IOException {
|
||||
this.close();
|
||||
Path p = this.getPath();
|
||||
return !fs.rename(p, dest);
|
||||
return fs.rename(p, dest);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -36,13 +36,18 @@ import org.apache.hadoop.fs.PathFilter;
|
|||
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
||||
import org.apache.hadoop.hbase.HConstants;
|
||||
import org.apache.hadoop.hbase.MediumTests;
|
||||
import org.apache.hadoop.hbase.Stoppable;
|
||||
import org.apache.hadoop.hbase.backup.HFileArchiver;
|
||||
import org.apache.hadoop.hbase.client.HBaseAdmin;
|
||||
import org.apache.hadoop.hbase.regionserver.HRegion;
|
||||
import org.apache.hadoop.hbase.regionserver.HRegionServer;
|
||||
import org.apache.hadoop.hbase.master.MasterFileSystem;
|
||||
import org.apache.hadoop.hbase.master.cleaner.HFileCleaner;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.hbase.util.FSUtils;
|
||||
import org.apache.hadoop.hbase.util.HFileArchiveTestingUtil;
|
||||
import org.apache.hadoop.hbase.util.HFileArchiveUtil;
|
||||
import org.apache.hadoop.hbase.util.StoppableImplementation;
|
||||
import org.junit.After;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.Assert;
|
||||
|
@ -314,6 +319,69 @@ public class TestHFileArchiving {
|
|||
archivedFiles.containsAll(storeFiles));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test HFileArchiver.resolveAndArchive() race condition HBASE-7643
|
||||
*/
|
||||
@Test
|
||||
public void testCleaningRace() throws Exception {
|
||||
final long TEST_TIME = 20 * 1000;
|
||||
|
||||
Configuration conf = UTIL.getMiniHBaseCluster().getMaster().getConfiguration();
|
||||
Path rootDir = UTIL.getDataTestDir("testCleaningRace");
|
||||
FileSystem fs = UTIL.getTestFileSystem();
|
||||
|
||||
Path archiveDir = new Path(rootDir, HConstants.HFILE_ARCHIVE_DIRECTORY);
|
||||
Path regionDir = new Path("table", "abcdef");
|
||||
Path familyDir = new Path(regionDir, "cf");
|
||||
|
||||
Path sourceRegionDir = new Path(rootDir, regionDir);
|
||||
fs.mkdirs(sourceRegionDir);
|
||||
|
||||
Stoppable stoppable = new StoppableImplementation();
|
||||
|
||||
// The cleaner should be looping without long pauses to reproduce the race condition.
|
||||
HFileCleaner cleaner = new HFileCleaner(1, stoppable, conf, fs, archiveDir);
|
||||
try {
|
||||
cleaner.start();
|
||||
|
||||
// Keep creating/archiving new files while the cleaner is running in the other thread
|
||||
long startTime = System.currentTimeMillis();
|
||||
for (long fid = 0; (System.currentTimeMillis() - startTime) < TEST_TIME; ++fid) {
|
||||
Path file = new Path(familyDir, String.valueOf(fid));
|
||||
Path sourceFile = new Path(rootDir, file);
|
||||
Path archiveFile = new Path(archiveDir, file);
|
||||
|
||||
fs.createNewFile(sourceFile);
|
||||
|
||||
try {
|
||||
// Try to archive the file
|
||||
HFileArchiver.archiveRegion(fs, rootDir,
|
||||
sourceRegionDir.getParent(), sourceRegionDir);
|
||||
|
||||
// The archiver succeded, the file is no longer in the original location
|
||||
// but it's in the archive location.
|
||||
LOG.debug("hfile=" + fid + " should be in the archive");
|
||||
assertTrue(fs.exists(archiveFile));
|
||||
assertFalse(fs.exists(sourceFile));
|
||||
} catch (IOException e) {
|
||||
// The archiver is unable to archive the file. Probably HBASE-7643 race condition.
|
||||
// in this case, the file should not be archived, and we should have the file
|
||||
// in the original location.
|
||||
LOG.debug("hfile=" + fid + " should be in the source location");
|
||||
assertFalse(fs.exists(archiveFile));
|
||||
assertTrue(fs.exists(sourceFile));
|
||||
|
||||
// Avoid to have this file in the next run
|
||||
fs.delete(sourceFile, false);
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
stoppable.stop("test end");
|
||||
cleaner.join();
|
||||
fs.delete(rootDir, true);
|
||||
}
|
||||
}
|
||||
|
||||
private void clearArchiveDirectory() throws IOException {
|
||||
UTIL.getTestFileSystem().delete(new Path(UTIL.getDefaultRootDirPath(), ".archive"), true);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue